/usr/share/perl5/MAB2/Parser/Disk.pm is in libcatmandu-mab2-perl 0.13-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | package MAB2::Parser::Disk;
our $VERSION = '0.13';
use strict;
use warnings;
use charnames qw< :full >;
use Carp qw(croak);
use Readonly;
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}};
Readonly my $END_OF_FIELD => qq{\N{LINE FEED}};
Readonly my $END_OF_RECORD => q{};
sub new {
my $class = shift;
my $file = shift;
my $self = {
filename => undef,
rec_number => 0,
reader => undef,
};
# check for file or filehandle
my $ishandle = eval { fileno($file); };
if ( !$@ && defined $ishandle ) {
$self->{filename} = scalar $file;
$self->{reader} = $file;
}
elsif ( -e $file ) {
open $self->{reader}, '<:encoding(UTF-8)', $file
or croak "cannot read from file $file\n";
$self->{filename} = $file;
}
else {
croak "file or filehande $file does not exists";
}
return ( bless $self, $class );
}
sub next {
my $self = shift;
local $/ = $END_OF_RECORD;
if ( my $data = $self->{reader}->getline() ) {
$self->{rec_number}++;
my $record = _decode($data);
# get last subfield from 001 as id
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record};
return { _id => $id, record => $record };
}
return;
}
sub _decode {
my $reader = shift;
chomp($reader);
my @record;
my @fields = split($END_OF_FIELD, $reader);
my $leader = shift @fields;
if( $leader =~ m/^\N{NUMBER SIGN}{3}\s(\d{5}[cdnpu]M2.0\d{7}\s{6}\w)/xms ){
push( @record, [ 'LDR', '', '_', $1 ] );
}
else{
croak "record leader not valid: $leader";
}
# ToDo: skip faulty fields
foreach my $field (@fields) {
croak "incomplete field: \"$field\"" if length($field) <= 4;
my $tag = substr( $field, 0, 3 );
my $ind = substr( $field, 3, 1 );
my $data = substr( $field, 4 );
# check for a 3-digit numeric tag
( $tag =~ m/^[0-9]{3}$/xms ) or croak "Invalid tag: \"$tag\"";
# check if indicator is an single alphabetic character
( $ind =~ m/^[a-z\s]$/xms ) or croak "Invalid indicator: \"$ind\"";
# check if data contains subfield indicators
if ( $data =~ m/^\s*($SUBFIELD_INDICATOR|\$)(.*)/ ) {
my $subfield_indicator = $1;
my @subfields = split( $subfield_indicator, $2 );
( @subfields ) or croak "no subfield data found: \"$tag$ind$data\"";
push(
@record,
[ $tag,
$ind,
map { substr( $_, 0, 1 ), substr( $_, 1 ) } @subfields
]
);
}
else {
push( @record, [ $tag, $ind, '_', $data ] );
}
}
return \@record;
}
1; # End of MAB2::Parser::Disk
__END__
=pod
=encoding UTF-8
=head1 NAME
MAB2::Parser::Disk - MAB2 Diskette format parser
=head1 SYNOPSIS
L<MAB2::Parser::Disk> is a parser for MAB2 Diskette records.
L<MAB2::Parser::Disk> expects UTF-8 encoded files as input. Otherwise provide a
filehande with a specified I/O layer.
use MAB2::Parser::Disk;
my $parser = MAB2::Parser::Disk->new( $filename );
while ( my $record_hash = $parser->next() ) {
# do something
}
=head1 Arguments
=over
=item C<file>
Path to file with MAB2 Diskette records.
=item C<fh>
Open filehandle for file with MAB2 Diskette records.
=back
=head1 METHODS
=head2 new($filename | $filehandle)
=head2 next()
Reads the next record from MAB2 input stream. Returns a Perl hash.
=head2 _decode($record)
Deserialize a raw MAB2 record to an ARRAY of ARRAYs.
=head1 SEE ALSO
L<Catmandu::Importer::MAB2>.
=head1 AUTHOR
Johann Rolschewski <jorol@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2013 by Johann Rolschewski.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
|