/usr/share/perl5/MAB2/Parser/RAW.pm is in libcatmandu-mab2-perl 0.13-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | package MAB2::Parser::RAW;
our $VERSION = '0.13';
use strict;
use warnings;
use charnames qw< :full >;
use Carp qw(croak);
use Readonly;
Readonly my $LEADER_LEN => 24;
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}};
Readonly my $END_OF_FIELD => qq{\N{INFORMATION SEPARATOR TWO}};
Readonly my $END_OF_RECORD => qq{\N{INFORMATION SEPARATOR THREE}};
sub new {
my $class = shift;
my $file = shift;
my $self = {
filename => undef,
rec_number => 0,
reader => undef,
};
# check for file or filehandle
my $ishandle = eval { fileno($file); };
if ( !$@ && defined $ishandle ) {
$self->{filename} = scalar $file;
$self->{reader} = $file;
}
elsif ( -e $file ) {
open $self->{reader}, '<:encoding(UTF-8)', $file
or croak "cannot read from file $file\n";
$self->{filename} = $file;
}
else {
croak "file or filehande $file does not exists";
}
return ( bless $self, $class );
}
sub next {
my $self = shift;
if ( my $line = $self->{reader}->getline() ) {
$self->{rec_number}++;
my $record = _decode($line);
# get last subfield from 001 as id
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record};
return { _id => $id, record => $record };
}
return;
}
sub _decode {
my $reader = shift;
chomp($reader);
if ( substr( $reader, -1, 1 ) ne $END_OF_RECORD ) {
croak("record terminator not found.");
}
my @record;
if ( substr( $reader, 0, $LEADER_LEN ) =~ m/(\d{5}\wM2.0\d*\s*\w)/ ) {
push( @record, [ 'LDR', '', '_', $1 ] );
}
else {
croak("no valid record leader found.");
}
my @fields = split( $END_OF_FIELD, substr( $reader, $LEADER_LEN, -1 ) );
for my $field (@fields) {
my ( $tag, $ind, $data ) = $field =~ m/(\d{3})([A-Za-z0-9\s])(.*)/
or croak("no valid field structure found.");
if ( $data =~ m/\s*$SUBFIELD_INDICATOR(.*)/ ) {
push(
@record,
[ $tag,
$ind,
map { ( substr( $_, 0, 1 ), substr( $_, 1 ) ) }
split( /$SUBFIELD_INDICATOR/, $1 )
]
);
}
else {
push( @record, [ $tag, $ind, '_', $data ] );
}
}
return \@record;
}
1; # End of MAB2::Parser::RAW
__END__
=pod
=encoding UTF-8
=head1 NAME
MAB2::Parser::RAW - MAB2 RAW format parser
=head1 SYNOPSIS
L<MAB2::Parser::RAW> is a parser for raw MAB2 records.
L<MAB2::Parser::RAW> expects UTF-8 encoded files as input. Otherwise provide a
filehande with a specified I/O layer.
use MAB2::Parser::RAW;
my $parser = MAB2::Parser::RAW->new( $filename );
while ( my $record_hash = $parser->next() ) {
# do something
}
=head1 Arguments
=over
=item C<file>
Path to file with MAB2 Band records.
=item C<fh>
Open filehandle for file with MAB2 Band records.
=back
=head1 METHODS
=head2 new($filename | $filehandle)
=head2 next()
Reads the next record from MAB2 input stream. Returns a Perl hash.
=head2 _decode($record)
Deserialize a raw MAB2 record to an ARRAY of ARRAYs.
=head1 SEEALSO
L<Catmandu::Importer::MAB2>.
=head1 AUTHOR
Johann Rolschewski <jorol@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2013 by Johann Rolschewski.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
|