/usr/lib/cd-hit/clstr_rev.pl is in cd-hit 4.6.8-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | #!/usr/bin/perl
# if nr90 from nr100 and
# nr80 from nr90, so I have nr90.clstr and nr80.clstr
# but, in nr80.clstr, some gi numbers whose from nr100 are there
# use this script, I create a new nr80new.clstr, as it is clustered from nr100
$file90 = shift;
$file80 = shift;
my %gi2clstr = ();
open(TMP, $file90) || die "Can not open file";
$readin = 0;
my $gi = "";
my $clstr = "";
my $this_no = 0;
while(my $ll=<TMP>) {
if ($ll =~ /^>/ ) {
if ($readin and $gi and $this_no>1 ) {
$gi2clstr{$gi} = $clstr;
}
$gi="";
$clstr="";
$this_no=0;
}
else {
$readin = 1;
$clstr .= $ll;
if ($ll =~ /\*/ and $ll =~ />(.+)\.\.\./ ) { $gi = $1; }
$this_no++;
}
}
close(TMP);
if ($readin and $gi and $this_no>1 ) {
$gi2clstr{$gi} = $clstr;
}
my $no = 0;
open(TMP, $file80) || die "Can not open file";
while( $ll = <TMP>) {
if ($ll =~ /^>/ ) {
print $ll;
$no = 0;
}
elsif ($ll =~ />(.+)\.\.\./ ) {
$gi = $1;
chop($ll);
$rep = ( $ll =~ /\*$/) ? 1 : 0;
$iden = "";
if ($ll =~ / at (.+)$/) { $iden = $1; }
else { $iden = "100%"; }
if ( $gi2clstr{$gi} ) {
$aa = $gi2clstr{$gi};
@aa = split(/\n/, $aa);
foreach $a (@aa) {
$a =~ s/^\d+/$no/;
if (not $rep) {
if ($a =~ /\*$/) {
$a =~ s/\*/at $iden/;
}
else {
$a =~ s/at (.+)$/at $iden,$1/;
}
}
print "$a\n";
$no++;
}
}
else {
$ll =~ s/^\d+/$no/;
print "$ll\n";
$no++;
}
}
else {
print $ll;
}
}
close(TMP);
|