/usr/lib/perl5/Zerg.pm is in libzerg-perl 1.0.4-2build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | # Copyright (C) 2002 Apuã Paquola - Instituto de Química -
# Universidade de São Paulo - Brasil
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
package Zerg;
use 5.006;
use strict;
use warnings;
use Errno;
use Carp;
require Exporter;
require DynaLoader;
use AutoLoader;
our @ISA = qw(Exporter DynaLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
# This allows declaration use Zerg ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw(
ALIGNMENT_LENGTH
BLAST_VERSION
CONVERGED
DATABASE
DESCRIPTION_ANNOTATION
DESCRIPTION_EVALUE
DESCRIPTION_HITNAME
DESCRIPTION_SCORE
END_OF_REPORT
EVALUE
GAPS
HSP_METHOD
IDENTITIES
NOHITS
PERCENT_IDENTITIES
PERCENT_POSITIVES
POSITIVES
QUERY_ALI
QUERY_ANNOTATION
QUERY_END
QUERY_FRAME
QUERY_LENGTH
QUERY_NAME
QUERY_ORIENTATION
QUERY_START
REFERENCE
ROUND_NUMBER
ROUND_SEQ_FOUND
ROUND_SEQ_NEW
SCORE
SCORE_BITS
SEARCHING
SUBJECT_ALI
SUBJECT_ANNOTATION
SUBJECT_END
SUBJECT_FRAME
SUBJECT_LENGTH
SUBJECT_NAME
SUBJECT_ORIENTATION
SUBJECT_START
TAIL_OF_REPORT
UNMATCHED
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw(
ALIGNMENT_LENGTH
BLAST_VERSION
CONVERGED
DATABASE
DESCRIPTION_ANNOTATION
DESCRIPTION_EVALUE
DESCRIPTION_HITNAME
DESCRIPTION_SCORE
END_OF_REPORT
EVALUE
GAPS
HSP_METHOD
IDENTITIES
NOHITS
PERCENT_IDENTITIES
PERCENT_POSITIVES
POSITIVES
QUERY_ALI
QUERY_ANNOTATION
QUERY_END
QUERY_FRAME
QUERY_LENGTH
QUERY_NAME
QUERY_ORIENTATION
QUERY_START
REFERENCE
ROUND_NUMBER
ROUND_SEQ_FOUND
ROUND_SEQ_NEW
SCORE
SCORE_BITS
SEARCHING
SUBJECT_ALI
SUBJECT_ANNOTATION
SUBJECT_END
SUBJECT_FRAME
SUBJECT_LENGTH
SUBJECT_NAME
SUBJECT_ORIENTATION
SUBJECT_START
TAIL_OF_REPORT
UNMATCHED
);
our $VERSION = '1.0.4';
sub AUTOLOAD {
# This AUTOLOAD is used to 'autoload' constants from the constant()
# XS function. If a constant is not found then control is passed
# to the AUTOLOAD in AutoLoader.
my $constname;
our $AUTOLOAD;
($constname = $AUTOLOAD) =~ s/.*:://;
croak "& not defined" if $constname eq 'constant';
my $val = constant($constname, @_ ? $_[0] : 0);
if ($! != 0) {
if ($!{EINVAL}) {
$AutoLoader::AUTOLOAD = $AUTOLOAD;
goto &AutoLoader::AUTOLOAD;
}
else {
croak "Your vendor has not defined Zerg macro $constname";
}
}
{
no strict 'refs';
# Fixed between 5.005_53 and 5.005_61
*$AUTOLOAD = sub { $val };
}
goto &$AUTOLOAD;
}
bootstrap Zerg $VERSION;
# Preloaded methods go here.
# Autoload methods go after =cut, and are processed by the autosplit program.
1;
__END__
# Below is stub documentation for your module. You better edit it!
=head1 NAME
Zerg - a lexical scanner for BLAST reports.
=head1 SYNOPSIS
use Zerg;
=head1 DESCRIPTION
This manpage describes the Zerg library and its interface for use with
Perl.
The Zerg library contains a C/flex lexical scanner for BLAST reports
and a set of supporting functions. It is centered on a "get_token"
function that scans the input for specified lexical elements and, when
one is found, returns its code and value to the user.
It is intended to be fast: for that we used flex, which provides
simple regular expression matching and input buffering in the
generated C scanner. And it is intended to be simple in the sense of
providing just a lexical scanner, with no features whose support could
slow down its main function.
=head2 FUNCTIONS
zerg_get_token() is the core function of this module. Each time it is
called, it scans the input BLAST report for the next "interesting"
lexical element and returns its code and value. Codes are listed in
the section "EXPORTED CONSTANTS (TOKEN CODES)". Code zero (not listed)
means end of file.
($code, $value) = Zerg::zerg_get_token();
zerg_open_file($filename) opens $filename in read-only mode and set it
as the input to the scanner. If this function is not called, the
standard input is used.
Zerg::zerg_open_file($filename);
zerg_close_file() closes the file opened with zerg_open_file().
zerg_get_token_offset() returns the byte offset (relative to the
beginning of file) of the last token read. (See section BUGS).
zerg_ignore($code) instructs zerg_get_token not to return when it
finds a token with code $code.
zerg_ignore_all() does zerg_ignore on all token codes.
zerg_unignore($code) instructs zerg_get_token to return when it
finds a token with code $code.
zerg_unignore_all() does zerg_unignore on all token codes.
Example:
Zerg::zerg_ignore_all();
Zerg::zerg_unignore(QUERY_NAME);
Zerg::zerg_unignore(SUBJECT_NAME);
=head2 EXPORTED CONSTANTS (TOKEN CODES)
ALIGNMENT_LENGTH
BLAST_VERSION
CONVERGED
DATABASE
DESCRIPTION_ANNOTATION
DESCRIPTION_EVALUE
DESCRIPTION_HITNAME
DESCRIPTION_SCORE
END_OF_REPORT
EVALUE
GAPS
HSP_METHOD
IDENTITIES
NOHITS
PERCENT_IDENTITIES
PERCENT_POSITIVES
POSITIVES
QUERY_ALI
QUERY_ANNOTATION
QUERY_END
QUERY_FRAME
QUERY_LENGTH
QUERY_NAME
QUERY_ORIENTATION
QUERY_START
REFERENCE
ROUND_NUMBER
ROUND_SEQ_FOUND
ROUND_SEQ_NEW
SCORE
SCORE_BITS
SEARCHING
SUBJECT_ALI
SUBJECT_ANNOTATION
SUBJECT_END
SUBJECT_FRAME
SUBJECT_LENGTH
SUBJECT_NAME
SUBJECT_ORIENTATION
SUBJECT_START
TAIL_OF_REPORT
UNMATCHED
=head2 NOTES ON THE SCANNER
Some BLAST parsers rely on some simple regular expression matches to
conclude about token types and values. For example: an input line
matching /^Query=\s(\S+)/ should make such a "loose" parser to infer
that a token was found, it is a query name and its value is
$1. Although improbable, it is perfectly possible for an anotation
field to match /^Query=\s(\S+)/. Worse than this is the fact that
those parsers are often unable to detect corrupt or truncated BLAST
reports, possibly producing inaccurate information.
The scanner provided by this library is much more stringent: for a
token to match it must be in its place in the context of a BLAST
report. For example: in a single BLAST report, a QUERY_NAME cannot
follow another QUERY_NAME. The scanner can be thought as, and in fact
it is, a big regular expression that matches an entire BLAST report.
A special token code (UNMATCHED) is provided for cases in which the input
text does not match any other lexical rule of the scanner. When an
umnacthed character is found, either the report is corrupt or the
scanner has a bug.
If you are interested in only a few token codes, try to zerg_ignore()
as much codes you can. This will avoid unnecessary function calls that
eat a lot of CPU.
=head1 EXAMPLES
This program prints the code and the value of each token it finds.
#!/usr/bin/perl -w
use strict;
use Zerg;
my ($code, $value);
while((($code, $value)= Zerg::zerg_get_token()) && $code)
{
print "$code\t$value\n";
}
The program below is a "syntax checker". The presence of UNMATCHEDs is
a strong indicator of problems in the BLAST report. (See section NOTES
ON THE SCANNER)
#!/usr/bin/perl -w
use strict;
use Zerg;
my ($code, $value);
Zerg::zerg_ignore_all();
Zerg::zerg_unignore(UNMATCHED);
while((($code, $value)= Zerg::zerg_get_token()) && $code)
{
print "UNMATCHED CHAR:\t$value\n";
}
=head1 BUGS
The tokens DESCRIPTION_ANNOTATION, DESCRIPTION_SCORE and
DESCRIPTION_EVALUE are scanned all at once and released one by one on
user request. So, if the user wants to get any of these fields, they
must be unignored BEFORE scanning DESCRIPTION_ANNOTATION.
zerg_get_token_offset() may return incorrect values for these tokens
and those that are modified by the parser, namely: QUERY_LENGTH,
SUBJECT_LENGTH, EVALUE, GAPS.
=head1 TODO
Add more tokens to the scanner as the need for that appears.
=head1 AUTHOR
Apuã Paquola, IQ-USP Bioinformatics Lab, apua@iq.usp.br
Laszlo Kajan <lkajan@rostlab.org>, Technical University of Munich, Germany
=head1 SEE ALSO
perl(1), flex(1), http://www.bioperl.org, http://www.ncbi.nlm.nih.gov/BLAST
=cut
|