/usr/lib/rtax/scripts/greengenesExtract.pl is in rtax 0.984-5.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | #!/usr/bin/env perl
use strict;
use warnings;
open( FASTA, ">greengenes.fasta" ) || die("Can't write to greengenes.fasta");
open( TAX, ">greengenes.taxonomy" ) || die("Can't write to greengenes.taxonomy");
my $fieldname = "gg_norm_tax_string";
while (<STDIN>) {
# read in by blocks
if ( $_ =~ /^BEGIN$/ ) {
my $prokMSAid = "NONE";
my $tax = "NONE";
my $seq = "NONE";
until ( $_ =~ /^END$/ ) {
if (/^prokMSA_id=(.+)/) {
$prokMSAid = $1;
}
elsif (/^$fieldname=(.+)/) {
$tax = $1;
}
elsif (/aligned_seq=(.+)/) {
$seq = $1;
}
if ( $seq ne "NONE" && $seq ne "unaligned" ) {
print FASTA ">$prokMSAid\n$seq\n";
# don't include taxonomy info if there is no sequence anyway
# and certainly not if there is no taxonomy data
if ( $tax ne "" && $tax ne "NONE" ) {
print TAX "$prokMSAid\t$tax\n";
}
}
$_ = <STDIN>;
}
}
# else ignore anything outside a BEGIN/END block
}
close FASTA;
close TAX;
|