/usr/share/perl5/MARC/Charset/Code.pm is in libmarc-charset-perl 1.35-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | package MARC::Charset::Code;
use strict;
use warnings;
use base qw(Class::Accessor);
use Carp qw(croak);
use Encode qw(encode_utf8);
use MARC::Charset::Constants qw(:all);
MARC::Charset::Code
->mk_accessors(qw(marc ucs name charset is_combining alt
marc_right_half marc_left_half));
=head1 NAME
MARC::Charset::Code - represents a MARC-8/UTF-8 mapping
=head1 SYNOPSIS
=head1 DESCRIPTION
Each mapping from a MARC-8 value to a UTF-8 value is represented by
a MARC::Charset::Code object in a MARC::Charset::Table.
=head1 METHODS
=head2 new()
The constructor.
=head2 name()
A descriptive name for the code point.
=head2 marc()
A string representing the MARC-8 bytes codes.
=head2 ucs()
A string representing the UCS code point in hex.
=head2 charset_code()
The MARC-8 character set code.
=head2 is_combining()
Returns true/false to tell if the character is a combining character.
=head2 marc_left_half()
If the character is the right half of a "double diacritic", returns
a hex string representing the MARC-8 value of the left half.
=head2 marc_right_half()
If the character is the left half of a "double diacritic", returns
a hex string representing the MARC-8 value of the right half.
=head2 to_string()
A stringified version of the object suitable for pretty printing.
=head2 char_value()
Returns the unicode character. Essentially just a helper around
ucs().
=cut
sub char_value
{
return chr(hex(shift->ucs()));
}
=head2 g0_marc_value()
The string representing the MARC-8 encoding
for lookup.
=cut
sub g0_marc_value
{
my $code = shift;
my $marc = $code->marc();
if ($code->charset_name eq 'CJK') {
return
chr(hex(substr($marc,0,2))) .
chr(hex(substr($marc,2,2))) .
chr(hex(substr($marc,4,2)));
} else {
return chr(hex($marc));
}
}
=head2 marc_value()
The string representing the MARC-8 encodingA
for output.
=cut
sub marc_value
{
my $code = shift;
my $marc = $code->marc();
if ($code->charset_name eq 'CJK') {
return
chr(hex(substr($marc,0,2))) .
chr(hex(substr($marc,2,2))) .
chr(hex(substr($marc,4,2)));
} else {
if ($code->default_charset_group() eq 'G0') {
return chr(hex($marc));
} else {
return chr(hex($marc) + 128);
}
}
}
=head2 charset_name()
Returns the name of the character set, instead of the code.
=cut
sub charset_name
{
return MARC::Charset::Constants::charset_name(shift->charset_value());
}
=head2 to_string()
Returns a stringified version of the object.
=cut
sub to_string
{
my $self = shift;
my $str =
$self->name() . ': ' .
'charset_code=' . $self->charset() . ' ' .
'marc=' . $self->marc() . ' ' .
'ucs=' . $self->ucs() . ' ';
$str .= ' combining' if $self->is_combining();
return $str;
}
=head2 marc8_hash_code()
Returns a hash code for this Code object for looking up the object using
MARC8. First portion is the character set code and the second is the
MARC-8 value.
=cut
sub marc8_hash_code
{
my $self = shift;
return sprintf('%s:%s', $self->charset_value(), $self->g0_marc_value());
}
=head2 utf8_hash_code()
Returns a hash code for uniquely identifying a Code by it's UCS value.
=cut
sub utf8_hash_code
{
return int(hex(shift->ucs()));
}
=head2 default_charset_group
Returns 'G0' or 'G1' indicating where the character is typicalling used
in the MARC-8 environment.
=cut
sub default_charset_group
{
my $charset = shift->charset_value();
return 'G0'
if $charset eq ASCII_DEFAULT
or $charset eq GREEK_SYMBOLS
or $charset eq SUBSCRIPTS
or $charset eq SUPERSCRIPTS
or $charset eq BASIC_LATIN
or $charset eq BASIC_ARABIC
or $charset eq BASIC_CYRILLIC
or $charset eq BASIC_GREEK
or $charset eq BASIC_HEBREW
or $charset eq CJK;
return 'G1';
}
=head2 get_marc8_escape
Returns an escape sequence to move to the Code from another marc-8 character
set.
=cut
sub get_escape
{
my $charset = shift->charset_value();
return ESCAPE . $charset
if $charset eq ASCII_DEFAULT
or $charset eq GREEK_SYMBOLS
or $charset eq SUBSCRIPTS
or $charset eq SUPERSCRIPTS;
return ESCAPE . SINGLE_G0_A . $charset
if $charset eq ASCII_DEFAULT
or $charset eq BASIC_LATIN
or $charset eq BASIC_ARABIC
or $charset eq BASIC_CYRILLIC
or $charset eq BASIC_GREEK
or $charset eq BASIC_HEBREW;
return ESCAPE . SINGLE_G1_A . $charset
if $charset eq EXTENDED_ARABIC
or $charset eq EXTENDED_LATIN
or $charset eq EXTENDED_CYRILLIC;
return ESCAPE . MULTI_G0_A . CJK
if $charset eq CJK;
}
=head2 charset_value
Returns the charset value, not the hex sequence.
=cut
sub charset_value
{
return chr(hex(shift->charset()));
}
1;
|