/usr/share/augustus/scripts/wigchoose.pl is in augustus 3.3+dfsg-2build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | #!/usr/bin/perl
#
# choose an interval with good coverage
#
# Mario Stanke, March 2010
#
# Sebastian Adler, November 25 2010
#
use strict;
use Getopt::Long;
my $usage = "$0 -- choose an interval from each target with good coverage\n";
$usage .= "\n";
$usage .= "Usage: cat in.wig | [perl] $0 > out.bed\n";
$usage .= "Options:\n";
$usage .= " --mincov=n minimal coverage for each position of the interval (default: 1)\n";
$usage .= " --minrelcov=f minimal relative coverage (0 <= f <= 1, default: 0)\n";
$usage .= " --maxgap=n coverage gaps of maximal this length each are allowed (default: 0)\n";
my $help = 0;
my $mincov = 1; # as given on the command line
my $mincoverage; # adjusted
my $minrelcov = 0; # per default no constraint
my $maxgap = 0;
GetOptions(
'mincov:n'=>\$mincov,
'minrelcov:f'=>\$minrelcov, #optional floating point value
'maxgap:n'=>\$maxgap,
'help!'=>\$help);
if ($help) {
print "$usage";
exit(0);
}
my $tname;
my ($pos, $cov);
my @ctglines = (); # all lines of the input file belonging to one contig
while (<>) {
if (/variableStep chrom=(\S+)/){
if (defined($tname)){
processCtg(\@ctglines);
}
$tname = $1;
@ctglines = ();
next;
}
push @ctglines, $_;
}
if (defined($tname)) {
processCtg(\@ctglines);
}
sub processCtg {
my $ctglinesref = shift;
my $r = getBestRange($ctglinesref, $mincov);
#print "ohne Filter: $tname\t" . $r->{"a"} . "\t" . $r->{"b"} . "\t" . $r->{"score"} . "\n";
if ($r->{"score"}>=0){
my $relcov = $r->{"score"} / ($r->{"b"} - $r->{"a"} + 1);
$mincoverage = $mincov;
if ($mincoverage < $relcov * $minrelcov){
#print "increasing mincov from $mincov to " . ($relcov * $minrelcov) . " in $tname\n";
$mincoverage = $relcov * $minrelcov;
$r = getBestRange(\@ctglines, $mincoverage);
}
print "$tname\t" . $r->{"a"} . "\t" . $r->{"b"} . "\t" . $r->{"score"} . "\n"; # print best interval of previous target
}
}
sub getBestRange {
my $lines = shift;
my $mc = shift;
my $ma; # left and right boundaries of the
my $mb; # best interval seen so far
my $ms; # score of the best interval seen so far
my $a; # boundaries of the current/last interval
my $b;
my $s;
$ma = $mb = $ms = $a = $b = -1;
$s = 0;
foreach my $line (@$lines){
if ( $line =~ /(\d+)\s(\d+)/){
$pos = $1;
$cov = $2;
# update intervals and scores
next if ($cov < $mc);
if ($pos <= $b + 1 + $maxgap) {
$b = $pos;
$s += $cov; # problem, if $s still undefined?
} else {
$a = $b = $pos;
$s = $cov; # fixed bug here on June 8th 2010: was s=pos
}
if ($ms < 0 || $ms < $s) {# current interval is best interval
#print "new best: $a, $b, $s\n";
($ma, $mb, $ms) = ($a, $b, $s);
}
}
}
my %h= ("a" => $ma, "b" => $mb, "score" => $ms);
return \%h;
}
|