/usr/bin/bp_taxid4species is in bioperl 1.7.2-2.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | #!/usr/bin/perl
# Author: Jason Stajich <jason@bioperl.org>
# Purpose: Retrieve the NCBI Taxa ID for organism(s)
# TODO: add rest of POD
#
use LWP::UserAgent;
use XML::Twig;
use strict;
use warnings;
use Getopt::Long;
use Data::Dumper;
my $verbose = 0;
my $plain = 0;
my $help = 0;
my $USAGE = "taxid4species: [-v] [-p] \"Genus1 species1\" \"Genus2 species2\"";
GetOptions('v|verbose' => \$verbose,
'p|plain' => \$plain,
'h|help' => \$help);
die("$USAGE\n") if $help;
my $ua = new LWP::UserAgent();
my $urlbase = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/';
my $esearch = 'esearch.fcgi?db=taxonomy&usehistory=y&term=';
my $esummary = 'esummary.fcgi?db=taxonomy&query_key=QUERYKEY&WebEnv=WEBENV';
my (@organisms) = @ARGV;
die("must provide valid organism") unless @organisms;
my $organismstr = join(" OR ", @organisms);
$organismstr =~ s/\s/\+/g;
# Esearch
my $response = $ua->get($urlbase . $esearch . $organismstr);
my $t = XML::Twig->new();
print $response->content,"\n"if($verbose);
$t->parse($response->content);
my $root = $t->root;
my $querykey = $root->first_child('QueryKey')->text;
my $webenv = $root->first_child('WebEnv')->text;
# Esummary
$esummary =~ s/QUERYKEY/$querykey/;
$esummary =~ s/WEBENV/$webenv/;
$response = $ua->get($urlbase . $esummary);
$t = XML::Twig->new();
print $response->content,"\n"if($verbose);
$t->parse($response->content);
$root = $t->root;
# Parse XML
my %taxinfo;
foreach my $docsum ($root->children) {
foreach my $item ($docsum->children('Item')) {
if ($item->{att}{Name} eq 'ScientificName') {
my $sciname = $item->text;
$taxinfo{lc $sciname}{sciname} = $sciname;
$taxinfo{lc $sciname}{tid} = $docsum->first_child_text('Id');
last;
}
}
}
# Output in same order as given on command line
foreach my $orgn (@organisms) {
if (exists $taxinfo{lc $orgn}) {
my $tid = $taxinfo{lc $orgn}{tid};
if ($plain) { print $tid, "\n"; }
else { print join(", ", "'$orgn'", $tid), "\n"; }
}
else { print "'$orgn' not found\n"; }
}
=head1 NAME
bp_taxid4species - simple script which returns the NCBI Taxonomic id for a requested species
=head1 SYNOPSIS
bp_taxid4species [-v] [-p] [-h] "Genus1 species1" "Genus2 species2"
Options:
-v verbose
-p plain
-h help
=head1 DESCRIPTION
This simple script shows how to get the taxa id from NCBI Entrez and
will return a list of taxa ids for requested organisms.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via the
web:
https://github.com/bioperl/bioperl-live/issues
=head1 AUTHOR
Jason Stajich jason-at-bioperl-dot-org
=cut
|