/usr/share/ssake/nLength.pl is in ssake 3.8.4-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | #!/usr/bin/perl
#Rene Warren 2002-2008
my $min = 200;
if ($#ARGV<1){
die "$0 <fasta file> <minimum length>\n";
}
if($ARGV[1]){
$min = $ARGV[1];
}
if(! -e $ARGV[0]){
die "$ARGV[0] doesn't exist -- fatal.\n";
}
open (OUT, $ARGV[0]) || die "can't open $ARGV[0] for reading -- fatal.\n";
my ($seq,$prev) = ("","");
my $bins;
while (<OUT>){
chomp;
if (/\>(\S+)/){
my $tig = $1;
if ($prev ne $tig && $prev ne ""){
$seq =~ s/N//i; ### remove Ns
$bins->{$prev}=length($seq) if(length($seq) >= $min);
}
$seq='';
$prev=$tig;
}elsif(/^([ACGNTX]*)$/){
$seq .= $_;
}
}
$seq =~ s/N//i; ### remove Ns
$bins->{$prev}=length($seq) if(length($seq) >= $min);
close OUT;
for(my $cn;$cn<=90;$cn+=10){
my $n = &calculateN($bins,$cn,$min);
print "N$cn = $n bp ($cn% of the bases in your assembly are in sequences of length $n bp or larger.)\n";
}
exit;
#------------------------------------------------------------------------
sub calculateN{
# Ny length = the length L such that y% of all base-pairs are contained in contigs of this length or larger
my ($bins,$portion,$min) = @_; # reference to an array of contig sizes
my ($n,$total_size, $contig_number, $tig_size); # total_size = total bases in assembly
# get total size
foreach my $tig (sort {$bins->{$a}<=>$bins->{$b}} keys %$bins){
$total_size += $bins->{$tig};
}
my $size_subset = $total_size * ($portion / 100);
my $running_count=0;
# look for largest size contig containing size_subset% of bases
foreach my $tig (sort {$bins->{$b}<=>$bins->{$a}} keys %$bins){
$tig_size=$bins->{$tig};
$running_count+=$tig_size;
if ($running_count >= $size_subset){
$n=$tig_size;
last;
}
}
return $n;
}#end sub
|