/usr/bin/go-dag-summary is in libgo-perl 0.13-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | #!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
# POD docs at end of file
use strict;
use Getopt::Long;
use FileHandle;
use GO::Parser;
use Data::Stag;
$|=1;
my $opt = {};
GetOptions($opt,
"help|h",
"format|p=s",
"err|e=s",
"use_cache",
);
if ($opt->{help}) {
system("perldoc $0");
exit;
}
my $errf = $opt->{err};
my $errhandler = Data::Stag->getformathandler('xml');
if ($errf) {
$errhandler->file($errf);
}
else {
$errhandler->fh(\*STDERR);
}
my @files = GO::Parser->new->normalize_files(@ARGV);
while (my $fn = shift @files) {
eval {
summarise_file($fn);
};
if ($@) {
$errhandler->err_event(exception=>"$@");
}
}
sub summarise_file {
my $fn = shift;
my %h = %$opt;
my $fmt;
if ($fn =~ /\.obo/) {
$fmt = 'obo_text';
}
if ($fn =~ /\.ont/) {
$fmt = 'go_ont';
}
if ($fmt && !$h{format}) {
$h{format} = $fmt;
}
$h{handler} = 'obj';
my $parser = new GO::Parser(%h);
$parser->litemode(1);
$parser->use_cache(1) if $opt->{use_cache};
$parser->errhandler($errhandler);
$parser->parse($fn);
my $g = $parser->handler->graph;
my %counts = ();
my %ns_h=();
foreach my $t (@{$g->get_all_terms}) {
next if $t->is_obsolete;
my $ns = $t->term_type;
if (!$ns) {
if ($fn =~ /\/(.*)\.\w+/) {
$ns = $1;
}
else {
$ns = $fn;
}
}
$ns_h{$ns}=1;
my $acc = $t->acc;
$counts{term}->{$ns}++;
my $parent_rels = $g->get_parent_relationships($acc);
$counts{relationship}->{$ns} += scalar(@$parent_rels);
my $paths = $g->paths_to_top($acc);
my $n_paths = scalar(@$paths);
$counts{path}->{$ns} += $n_paths;
if ($n_paths >= $counts{pathmax}->{$ns}) {
$counts{pathmax}->{$ns} = $n_paths;
$counts{pathmaxacc}->{$ns} = $acc;
$counts{pathmaxname}->{$ns} = $t->name;
}
}
foreach my $ns (keys %ns_h) {
printf "%s\n",
join("\t",
$fn,
$ns,
(map {$counts{$_}->{$ns}} qw(term relationship path)),
$counts{path}->{$ns}/$counts{term}->{$ns},
(map {$counts{$_}->{$ns}} qw(pathmax pathmaxacc pathmaxname)),
);
}
}
$errhandler->finish;
exit 0;
__END__
=head1 NAME
go-dag-summary - summarises an ontology
=head1 SYNOPSIS
go-dag-summary ontology/gene_ontology.obo
=head1 DESCRIPTION
Summarises an ontology
=head1 ARGUMENTS
=head3 -e ERRFILE
writes parse errors in XML - defaults to STDERR
(there should be no parse errors in well formed files)
=head3 -p FORMAT
determines which parser to use; if left unspecified, will make a guess
based on file suffix. See below for formats
=head2 -use_cache
If this switch is specified, then caching mode is turned on.
With caching mode, the first time you parse a file, then an additional
file will be exported in a special format that is fast to parse. This
file will have the same filename as the original file, except it will
have the ".cache" suffix.
The next time you parse the file, this program will automatically
check for the existence of the ".cache" file. If it exists, and is
more recent than the file you specified, this is parsed instead. If it
does not exist, it is rebuilt.
=head1 OUTPUT
One row per ontology
Each row has the following columns
=over
=item input filename
=item total no of terms
=item total no of relationships
=item total no of paths
=item avg no of paths per term (p/t)
=item maximum no of paths for any term
=item ID of term with maximum no of paths
NOTE: obsolete terms are not included
=back
=head2 DOCUMENTATION
L<http://www.godatabase.org/dev>
=head2 SEE ALSO
L<http://www.fruitfly.org/~cjm/obol/doc/go-complexity.html>
=cut
|