/usr/bin/bp_sreformat is in bioperl 1.6.924-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
# Author: Jason Stajich <jason-at-bioperl-dot-org>
# Purpose: Bioperl implementation of Sean Eddy's sreformat
# We're not as clever as Sean's squid library though so
# you have to specify the input format rather than letting
# the application guess.
use strict;
use warnings;
use Bio::SeqIO;
use Bio::AlignIO;
use Getopt::Long;
my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
-h/--help Print this help
-if/--informat Specify the input format
-of/--outformat Specify the output format
-i/--input Specify the input file name
(to pass in data on STDIN use minus sign as filename)
-o/--output Specify the output file name
(to pass data out on STDOUT use minus sign as filename)
--msa Specify this is multiple sequence alignment data
--special=specialparams Specify special params supported by some formats
Comma or space separated please.
These include:
nointerleaved -- for phylip,non-interleaved format
idlinebreak -- for phylip, makes it molphy format
percentages -- for clustalw, show % id per line
flat -- don't show start-end in seqid
linelength -- line length for clustalw
mrbayes -- for MrBayes proper NEXUS output
";
my ($input,$output,$informat,$outformat,$msa,$special);
GetOptions(
'h|help' => sub { print STDERR ($USAGE); exit(0) },
'i|input:s' => \$input,
'o|output:s' => \$output,
'if|informat:s' => \$informat,
'of|outformat:s' => \$outformat,
'msa' => \$msa,
's|special:s' => \$special,
);
unless( defined $informat && defined $outformat ) {
die(sprintf("Cannot proceed without a defined input and output you gave (%s,%s)\n",
defined $informat ? $informat : "''" ,
defined $outformat ? $outformat : "''"));
}
my ($in,$out);
my @extra;
if( $special ) {
@extra = map { my @rc;
if( /nointerleaved/) {
@rc = ('-interleaved' => '0');
} elsif( /mrbayes/ ) {
@rc = ('-show_symbols' => 0,
'-show_endblock' => 0);
} elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) }
else{ @rc = ("-$_" => 1) }
@rc;
} split(/[\s,]/,$special);
}
# guess we're talking about MSA if any of the standard MSA names are used
if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
$outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
$msa = 1;
}
if( $msa ) {
eval {
if( defined $input ) {
$in = new Bio::AlignIO(-format => $informat, -file => $input);
} else {
$in = new Bio::AlignIO(-format => $informat, -fh => \*ARGV);
}
};
if( $@ ) {
die("Unknown MSA format to bioperl $informat\n");
}
eval {
if( $output ) {
$out = new Bio::AlignIO(-format => $outformat,
-file => ">$output", @extra);
} else {
# default to STDOUT for output
$out = new Bio::AlignIO(-format => $outformat,@extra);
}
};
if( $@ ) {
die("Unknown MSA format to bioperl $outformat\n");
}
while( my $aln = $in->next_aln) {
if( $special =~ /flat/ ) {$aln->set_displayname_flat(1); }
$out->write_aln($aln) }
} else {
eval {
if( defined $input ) {
$in = new Bio::SeqIO(-format => $informat, -file => $input);
} else {
$in = new Bio::SeqIO(-format => $informat, -fh => \*ARGV);
}
};
if( $@ ) {
if( $@ =~ /Could not open/ ) {
die("Could not open input file: $input\n");
} else {
die("Unknown sequence format to bioperl $informat\n");
}
}
eval {
if( $output ) {
$out = new Bio::SeqIO(-format => $outformat,
-file => ">$output");
} else {
# default to STDOUT for output
$out = new Bio::SeqIO(-format => $outformat);
}
};
if( $@ ) {
if( $@ =~ /Could not open/ ) {
die("Could not open output file: $output\n");
} else {
die("Unknown sequence format to bioperl $outformat: $@\n");
}
}
while( my $seq = $in->next_seq ) {
$out->write_seq($seq);
}
}
=head1 NAME
bpsreformat - convert sequence formats
=head1 DESCRIPTION
This script uses the SeqIO system that allows conversion of sequence
formats either sequence data or multiple sequence alignment data. The
name comes from the fact that Sean Eddy's program sreformat (part of
the HMMER pkg) already does this. Sean's program tries to guess the
input formats while in our code we currently require your to specify what
the input and output formats are and if the data is from a multiple
sequence alignment or from straight sequence files.
Usage:
bpsreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
-h/--help Print this help
-if/--informat Specify the input format
-of/--outformat Specify the output format
-i/--input Specify the input file name
(to pass in data on STDIN use minus sign as filename)
-o/--output Specify the output file name
(to pass data out on STDOUT use minus sign as filename)
--msa Specify this is multiple sequence alignment data
--special Will pass on special parameters to the AlignIO/SeqIO
object -- most of these are for Bio::AlignIO objects
Comma separated list of the following
nointerleaved -- for phylip,non-interleaved format
idlinebreak -- for phylip, makes it molphy format
percentages -- for clustalw, show % id per line
=cut
|