/usr/share/perl5/GO/Parsers/mgi_assoc_parser.pm is in libgo-perl 0.15-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | # $Id: mgi_assoc_parser.pm,v 1.3 2006/08/05 20:26:12 cmungall Exp $
#
#
# see also - http://www.geneontology.org
# - http://www.godatabase.org/dev
#
# You may distribute this module under the same terms as perl itself
package GO::Parsers::mgi_assoc_parser;
=head1 NAME
GO::Parsers::mgi_assoc_parser - parses MGI gene assoc stanza files
=head1 SYNOPSIS
do not use this class directly; use GO::Parser
=head1 DESCRIPTION
This is an EXPERIMENTAL module for converting MGIs in-house stanza format
this is richer than go_assoc files as it allows combinatorial assocs
these are represented with the go-assoc xml <in> element
=head1 AUTHOR
=cut
use Exporter;
use base qw(GO::Parsers::base_parser Exporter);
use GO::Parsers::ParserEventNames;
use Carp;
use FileHandle;
use strict;
sub dtd {
'go_assoc-parser-events.dtd';
}
sub ev_filter {
my $self = shift;
$self->{_ev_filter} = shift if @_;
return $self->{_ev_filter};
}
sub skip_uncurated {
my $self = shift;
$self->{_skip_uncurated} = shift if @_;
return $self->{_skip_uncurated};
}
sub _parse_ids {
my $txt = shift;
$txt =~ s/^:+//;
$txt =~ s/^\s+//;
my @parts = split(/\|/,$txt);
return grep {$_} map {if (/(\w+:\S+)/){$1}else {()}} @parts;
}
sub parse_fh {
my ($self, $fh) = @_;
my $file = $self->file;
$self->start_event(ASSOCS);
$self->start_event(DBSET);
$self->event(PRODDB, 'mgi');
my $curr_gene = '';
while (<$fh>) {
# UNICODE causes problems for XML and DB
# delete 8th bit
tr [\200-\377]
[\000-\177]; # see 'man perlop', section on tr/
# weird ascii characters should be excluded
tr/\0-\10//d; # remove weird characters; ascii 0-8
# preserve \11 (9 - tab) and \12 (10-linefeed)
tr/\13\14//d; # remove weird characters; 11,12
# preserve \15 (13 - carriage return)
tr/\16-\37//d; # remove 14-31 (all rest before space)
tr/\177//d; # remove DEL character
chomp;
if (/^\!/) {
next;
}
if (!$_) {
next;
}
if (/^MGI gene:\s(\S+);\s+(\S+)/) {
if ($1 ne $curr_gene) {
$self->pop_stack_to_depth(2);
$self->start_event(PROD);
$self->event(PRODACC, $1);
$self->event(PRODSYMBOL, $2);
$self->event(PRODTYPE, 'gene');
$curr_gene = $1;
}
}
if (/^GO term:\s(.*);\s+(\S+)/) {
$self->start_event(ASSOC);
$self->event(TERMACC, $2);
while (<>) {
chomp;
last unless $_;
if (/^GO evidence:\s*(.*)/) {
my $evtxt = $1;
foreach (split(/\s+\|\s+/,$evtxt)) {
$self->start_event(EVIDENCE);
$self->event(EVCODE, $_);
$self->end_event(EVIDENCE);
}
}
elsif (/^anatomy:(.*)/) {
my @ids = _parse_ids($1);
$self->event(property_value=>[[type=>'located_in'],
[to=>$_]]) foreach @ids;
}
elsif (/^cell type:(.*)/) {
my @ids = _parse_ids($1);
$self->event(property_value=>[[type=>'located_in'],
[to=>$_]]) foreach @ids;
}
else {
}
}
$self->end_event(ASSOC);
}
}
$fh->close;
$self->pop_stack_to_depth(0);
}
1;
|