This file is indexed.

/usr/share/hhsuite/scripts/splitfasta.pl is in hhsuite 3.0~beta2+dfsg-3.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/perl
# splitfasta.pl 
# Split a file with multiple, FASTA formatted sequences into many single-sequence FASTA files
#
# (C) Johannes Soeding, 2012
#
#     HHsuite version 3.0.0 (15-03-2015)
#
#     Reference: 
#     Remmert M., Biegert A., Hauser A., and Soding J.
#     HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
#     Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).

#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.

#     We are very grateful for bug reports! Please contact us at soeding@mpibpc.mpg.de

use lib ( $ENV{"HHLIB"} || '/usr/share/hhsuite' )."/scripts";
use HHPaths;   # config file with path variables for nr, blast, psipred, pdb, dssp etc.
use strict;
use warnings;

my $ext="seq";
my $usage="
splitfasta.pl from HHsuite $VERSION  
Split a file with multiple, FASTA formatted sequences into multiple single-sequence FASTA files.
Write files into current directory and name each file by the first word after \">\" in the name line. 

Usage: splitfasta.pl infile [option]
Option:
-fam       : use family-based name (for SCOP/ASTRAL sequences
-name      : use sequence name as file name (default)
-ext <ext> : extension for sequence files (default=$ext)
\n";

if (@ARGV<1) {die $usage;;}

my $line;
my $infile=$ARGV[0];
my $outfile;
my $sequence="";
my $options="";
my $fam=0;             # option -fam?
my $famid="";
my %numfams=(); 
my $n=0;               # number of name lines read in so far

if (@ARGV>1) {
    $options.=join(" ",@ARGV[1..$#ARGV]);
}

# Set number of cpus to use
if ($options=~s/-fam//g)         {$fam=1;}
if ($options=~s/-name//g)        {$fam=0;}
if ($options=~s/-ext\s+(\S+)//g) {$ext=$1;}


open (INFILE,"<$infile") || die("ERROR: Can't open $infile: $!\n");

if ($fam) {

    while ($line=<INFILE>) {
	if ($line=~/^>(\S+)\s+(\S+)/) {
	    $famid=$2;
	    if ($n) {
		open (OUTFILE,">$outfile") || die("ERROR: Can't open $outfile: $!\n");
		print(OUTFILE $sequence);
		close(OUTFILE);
	    }
	    if (defined $numfams{$fam}) {$numfams{$fam}++;} else {$numfams{$fam}=1};
	    $outfile="$fam.".$numfams{$fam}.".seq";
	    $sequence=$line;
	    $n++;
	} else {
	    $sequence.=$line;
	}
    }
    if ($n) {
	open (OUTFILE,">$outfile") || die("ERROR: Can't open $outfile: $!\n");
	print(OUTFILE $sequence);
	close(OUTFILE);
    }

} else {

    my %exists=();
    while ($line=<INFILE>) {
	if ($line=~/^>(\S+)/) {
	    if ($n) {
		open (OUTFILE,">$outfile") || die("ERROR: Can't open $outfile: $!\n");
		print(OUTFILE $sequence);
		close(OUTFILE);
	    }
	    if ($exists{$1}) {print("\nWarning: id $1 appears more than once in $infile\n");}
	    $exists{$1}=1;
	    my $tmp = $1;
	    $tmp =~ s/\|/_/g;
	    $tmp =~ s/\./_/g;
	    $outfile="$tmp.$ext";
	    $sequence=$line;
	    $n++;
	} else {
	    $sequence.=$line;
	}
    }
    if ($n) {
	open (OUTFILE,">$outfile") || die("ERROR: Can't open $outfile: $!\n");
	print(OUTFILE $sequence);
	close(OUTFILE);
    }
}


close(INFILE);
printf("Created %i sequence files\n",$n);