/usr/bin/bp_download_query_genbank is in bioperl 1.6.923-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
bp_download_query_genbank - script to query Genbank and retrieve records
=head1 USAGE
bp_download_query_genbank --query "Neurospora[ORGN]" --db nucest -o Ncrassa_ESTs.fa --format fasta
bp_download_query_genbank --queryfile 'filewithquery' --db nucest -o Ncrassa_ESTs.fa --format fasta
=head2 Other options
Provide ONE of:
-q --query query string OR
--queryfile profile file with query OR
--gi --gis --gifile file with list of GIs to download
Database type:
-d --db database (nucleotide [default], nucest, protein, )
-o --out --outfile output file (results are displayed on screen otherwise)
-f --format sequence file output format (fasta by default)
-v --verbose debugging output
=head2 Query options
--maxids maximum number of IDs to retrieve in a set (100 at a time by default)
--reldate
--maxdate maxdate for a record
--mindate minimum date for record
--datetype edat or mdat (entered or modified)
=head1 AUTHOR Jason Stajich
Jason Stajich, jason-AT-bioperl.org
=cut
use strict;
use warnings;
use Bio::DB::GenBank;
use Bio::DB::GenPept;
use Bio::DB::Query::GenBank;
use Bio::SeqIO;
use Getopt::Long;
my ($queryfile,$outfile,$format,$debug,%options);
$format = 'fasta';
$options{'-maxids'} = '100';
$options{'-db'} = 'nucleotide'; # can be nucleotide, nucest, protein
my $gifile;
GetOptions(
'h|help' => sub { exec('perldoc', $0);
exit(0);
},
'v|verbose' => \$debug,
'f|format:s' => \$format,
'queryfile:s' => \$queryfile,
'o|out|outfile:s' => \$outfile,
'gi|gifile|gis:s' => \$gifile,
# DB::Query options
'd|db:s' => \$options{'-db'},
'mindate:s' => \$options{'-mindate'},
'maxdate:s' => \$options{'-maxdate'},
'reldate:s' => \$options{'-reldate'},
'datetype:s' => \$options{'-datetype'}, # edat or mdat
'maxids:i' => \$options{'-maxids'},
'q|query:s' => \$options{'-query'},
);
my $out;
if( $outfile ) {
$out = Bio::SeqIO->new(-format => $format,
-file => ">$outfile");
} else {
$out = Bio::SeqIO->new(-format => $format); # write to STDOUT
}
my $dbh;
if( $options{'-db'} eq 'protein' ) {
$dbh = Bio::DB::GenPept->new(-verbose => $debug);
} else {
$dbh = Bio::DB::GenBank->new(-verbose => $debug);
}
my $query;
if( $gifile ) {
my @ids;
open( my $fh => $gifile ) || die $!;
while(<$fh>) {
push @ids, split;
}
close($fh);
while( @ids ) {
my @mini_ids = splice(@ids, 0, $options{'-maxids'});
$query = Bio::DB::Query::GenBank->new(%options,
-ids => \@mini_ids,
);
my $stream = $dbh->get_Stream_by_query($query);
while( my $seq = $stream->next_seq ) {
$out->write_seq($seq);
}
}
exit;
} elsif( $options{'-query'}) {
$query = Bio::DB::Query::GenBank->new(%options);
} elsif( $queryfile ) {
open(my $fh => $queryfile) || die $!;
while(<$queryfile>) {
chomp;
$options{'-query'} .= $_;
}
$query = Bio::DB::Query::GenBank->new(%options);
close($fh);
} else {
die("no query string or gifile\n");
}
my $stream = $dbh->get_Stream_by_query($query);
while( my $seq = $stream->next_seq ) {
$out->write_seq($seq);
}
|