/usr/share/perl5/Text/vFile/asData.pm is in libtext-vfile-asdata-perl 0.07-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | package Text::vFile::asData;
use strict;
use warnings;
no warnings 'uninitialized';
use base qw( Class::Accessor::Chained::Fast );
__PACKAGE__->mk_accessors(qw( preserve_params ));
our $VERSION = '0.07';
=head1 NAME
Text::vFile::asData - parse vFile formatted files into data structures
=head1 SYNOPSIS
use Text::vFile::asData;
open my $fh, "foo.ics"
or die "couldn't open ics: $!";
my $data = Text::vFile::asData->new->parse( $fh );
=head1 DESCRIPTION
Text::vFile::asData reads vFile format files, such as vCard (RFC 2426) and
vCalendar (RFC 2445).
=cut
sub _unwrap_lines {
my $self = shift;
my @lines;
for (@_) {
my $line = $_; # $_ may be readonly
$line =~ s{[\r\n]+$}{}; # lines SHOULD end CRLF
if ($line =~ /^[ \t](.*)/) { # Continuation line (RFC Sect. 4.1)
die "Continuation line, but no preceding line" unless @lines;
$lines[-1] .= $1;
next;
}
push @lines, $line;
}
return @lines;
}
sub parse {
my $self = shift;
my $fh = shift;
return $self->parse_lines( <$fh> );
}
# like Text::ParseWords' parse_line, only C-style so the regex engine doesn't
# blow its stack, and it's also got a $limit like split
# this only took a trainride, so I'm pretty sure there are lurking
# corner cases - when I get a tuit I'll take the Text::ParseWords
# tests and run them through it
sub parse_line {
my ($delim, $keep, $text, $limit) = @_;
my ($current, @parts);
my ($quote, $escaped);
while (length $text) {
if ($text =~ s{^(\\)}{}) {
$current .= $1 if $escaped || $keep;
$escaped = !$escaped;
next;
}
if (!$quote && !$escaped && $text =~ s{^$delim}{}) {
push @parts, $current;
$current = undef;
if (defined $limit && @parts == $limit -1) {
return @parts, $text;
}
}
else {
# pull the character off to take a looksee
$text =~ s{(.)}{};
my $char = $1;
if ($char =~ m{['"]} && !$escaped && (!defined($quote) || $char eq $quote)) {
# either it's defined and matches, in which case we
# clear the quote variable, or it's undefined which
# makes this quote an opening quote
$quote = (defined $quote && $char eq $quote) ? undef : $char;
$current .= $char if $keep;
}
else {
$current .= $char;
}
}
$escaped = 0;
}
return @parts, $current;
}
sub parse_lines {
my $self = shift;
my @path;
my $current;
for ($self->_unwrap_lines( @_ )) {
# Ignore leading or trailing blank lines at the top/bottom of the
# input. Not sure about completely blank lines within the input
next if scalar @path == 0 and $_ =~ /^\s*$/;
if (/^BEGIN:(.*)/i) {
push @path, $current;
$current = { type => $1 };
push @{ $path[-1]{objects} }, $current;
next;
}
if (/^END:(.*)/i) {
die "END $1 in $current->{type}"
unless lc $current->{type} eq lc $1;
$current = pop @path;
next;
}
# we'd use Text::ParseWords here, but it likes to segfault.
my ($name, $value) = parse_line( ':', 1, $_, 2);
$value = '' unless defined $value;
my @params = parse_line( ';', 0, $name );
$name = shift @params;
$value = { value => $value };
foreach my $param (@params) {
my ($p_name, $p_value) = split /=/, $param;
push @{ $value->{params} }, { $p_name => $p_value }
if $self->preserve_params;
$value->{param}{ $p_name } = $p_value;
}
push @{ $current->{properties}{ $name } }, $value;
}
# something did a BEGIN but no END - TODO, unwind this nicely as
# it may be more than one level
die "BEGIN $current->{type} without matching END"
if @path;
return $current;
}
# this might not strictly comply
sub generate_lines {
my $self = shift;
my $this = shift;
my @lines;
# XXX all the existence checks are to prevent auto-vivification
# breaking if_diff tests - do we mind, or should the fields have been
# there anyway?
push @lines, "BEGIN:$this->{type}" if exists $this->{type};
if (exists $this->{properties}) {
while (my ($name, $v) = each %{ $this->{properties} } ) {
for my $value (@$v) {
# XXX so we're taking params in preference to param,
# let's be sure to document that when we document this
# method
my $param = join ';', '', map {
my $hash = $_;
map {
"$_" . (defined $hash->{$_} ? "=" . $hash->{$_} : "")
} keys %$hash
} @{ $value->{params} || [ $value->{param} ] };
my $line = "$name$param:$value->{value}";
# wrapping, but done ugly
my @chunks = $line =~ m/(.{1,72})/g;
push @lines, shift @chunks;
push @lines, map { " $_" } @chunks;
}
}
}
if (exists $this->{objects}) {
push @lines, $self->generate_lines( $_ ) for @{ $this->{objects} }
}
push @lines, "END:$this->{type}" if exists $this->{type};
return @lines;
}
1;
__END__
=head1 DATA STRUCTURE
A vFile contains one or more objects, delimited by BEGIN and END tags.
BEGIN:VCARD
...
END:VCARD
Objects may contain sub-objects;
BEGIN:VCALENDAR
...
BEGIN:VEVENT
...
END:VEVENT
...
ENV:VCALENDAR
Each object consists of one or more properties. Each property
consists of a name, zero or more optional parameters, and then a
value. This fragment:
DTSTART;VALUE=DATE:19970317
identifies a property with the name, C<DSTART>, the parameter
C<VALUE>, which has the value C<DATE>, and the property's value is
C<19970317>. Those of you with an XML bent might find this more
recognisable as:
<dtstart value="date">19970317</dtstart>
The return value from the C<parse()> method is a hash ref.
The top level key, C<objects>, refers to an array ref. Each entry in the
array ref is a hash ref with two or three keys.
The value of the first key, C<type>, is a string corresponding to the
type of the object. E.g., C<VCARD>, C<VEVENT>, and so on.
The value of the second key, C<properties>, is a hash ref, with property
names as keys, and an array ref of those property values. It's an array
ref, because some properties may appear within an object multiple times
with different values. For example;
BEGIN:VEVENT
ATTENDEE;CN="Nik Clayton":mailto:nik@FreeBSD.org
ATTENDEE;CN="Richard Clamp":mailto:richardc@unixbeard.net
...
END:VEVENT
Each entry in the array ref is a hash ref with one or two keys.
The first key, C<value>, corresponds to the property's value.
The second key, C<param>, contains a hash ref of the property's
parameters. Keys in this hash ref are the parameter's name, the value
is the parameter's value. (If you enable the C<preserve_params>
option there is an additional key populated, called C<params>. It is
an array ref of hash refs, each hash ref is the parameter's name and
the parameter's value - these are collected in the order they are
encountered to prevent hash collisions as seen in some vCard files)
line.)
The third key in the top level C<objects> hash ref is C<objects>. If
it exists, it indicates that sub-objects were found. The value of
this key is an array ref of sub-objects, with identical keys and
behaviour to that of the top level C<objects> key. This recursive
structure continues, nesting as deeply as there were sub-objects in
the input file.
The C<bin/v2yaml> script that comes with this distribution displays the
format of a vFile as YAML. C<t/03usage.t> has examples of picking out
the relevant information from the data structure.
=head1 AUTHORS
Richard Clamp <richardc@unixbeard.net> and Nik Clayton <nik@FreeBSD.org>
=head1 COPYRIGHT
Copyright 2004, Richard Clamp and Nik Clayton. All Rights Reserved.
This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=head1 CAVEATS
We don't do any decoding of property values, including descaping
C<\,>, we're still undecided as to whether this is a bug.
=head1 BUGS
Aside from the TODO list items, none known.
=head1 SEE ALSO
Text::vFile - parses to objects, doesn't handle nested items
RFC 2426 - vCard specification
RFC 2445 - vCalendar specification
=cut
# Emacs local variables to keep the style consistent
Local Variables:
cperl-indent-level: 4
End:
|