/usr/bin/bp_taxonomy2tree is in bioperl 1.6.924-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
bp_taxonomy2tree - Building a taxonomic tree based on the full lineages of a set of species names
=head1 DESCRIPTION
This scripts looks up the provided species names in the NCBI Taxonomy database,
retrieves their full lineage and puts them in a Newick taxonomic tree displayed
on screen.
bp_taxonomy2tree.pl -s Orangutan -s Gorilla -s Chimpanzee -s Human
bp_taxonomy2tree.pl -s Orangutan -s Gorilla -s Chimpanzee -s "Homo Sapiens"
Can also provide -d to specify the directory to store index files in, -o to
specify the location of your NCBI nodes file, and -a for the NCBI names file.
Or the option -e to use the web-based Entrez taxonomy database if you do not
have the NCBI flatfiles installed.
This script requires that the bioperl-run pkg be also installed.
Providing the nodes.dmp and names.dmp files from the NCBI Taxonomy
dump (see Bio::DB::Taxonomy::flatfile for more info) is only necessary
on the first time running. This will create the local indexes and may
take quite a long time. However once created, these indexes will
allow fast access for species to taxon id OR taxon id to species name
lookups.
=head1 AUTHOR - Gabriel Valiente, reimplemented by Sendu Bala
Email valiente@lsi.upc.edu
Email bix@sendu.me.uk
=cut
use strict;
use warnings;
use Bio::DB::Taxonomy;
use Bio::TreeIO;
use Bio::Tree::Compatible;
use Getopt::Long;
my @species;
my $index_dir = "./db/";
my $nodesfile = "nodes.dmp";
my $namesfile = "names.dmp";
my $use_entrez = 0;
# the input to the script is an array of species names
GetOptions( 's|species=s' => \@species,
'd|dir:s' => \$index_dir,
'o|nodesfile:s' => \$nodesfile,
'a|namesfile:s' => \$namesfile,
'e|entrez' => \$use_entrez,
'h|help' => sub { system('perldoc', $0); exit }, );
my $db = Bio::DB::Taxonomy->new( -source => $use_entrez ? 'entrez' : 'flatfile',
-directory => $index_dir,
-nodesfile => $nodesfile,
-namesfile => $namesfile );
# the full lineages of the species are merged into a single tree
my $tree;
for my $name (@species) {
my $node = $db->get_taxon(-name => $name);
if ($node) {
if ($tree) {
$tree->merge_lineage($node);
}
else {
$tree = Bio::Tree::Tree->new(-node => $node);
}
}
else {
warn "no NCBI Taxonomy node for species ",$name,"\n";
}
}
# simple paths are contracted by removing degree one nodes
$tree->contract_linear_paths;
# convert tree ids to their names for nice output with TreeIO
foreach my $node ($tree->get_nodes) {
$node->id($node->node_name);
}
# the tree is output in Newick format
my $output = Bio::TreeIO->new(-format => 'newick');
$output->write_tree($tree);
$output->close;
1;
|