/usr/share/perl5/Text/WrapI18N.pm is in libtext-wrapi18n-perl 0.06-7.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | package Text::WrapI18N;
require Exporter;
use strict;
use warnings;
our @ISA = qw(Exporter);
our @EXPORT = qw(wrap);
our @EXPORT_OK = qw($columns $separator);
our %EXPORT_TAGS = ('all' => [ @EXPORT, @EXPORT_OK ]);
our $VERSION = '0.06';
use vars qw($columns $break $tabstop $separator $huge $unexpand $charmap);
use Text::CharWidth qw(mbswidth mblen);
BEGIN {
$columns = 76;
# $break, $separator, $huge, and $unexpand are not supported yet.
$break = '\s';
$tabstop = 8;
$separator = "\n";
$huge = 'wrap';
$unexpand = 1;
undef $charmap;
}
sub wrap {
my $top1=shift;
my $top2=shift;
my $text=shift;
$text = $top1 . $text;
# $out already-formatted text for output including current line
# $len visible width of the current line without the current word
# $word the current word which might be sent to the next line
# $wlen visible width of the current word
# $c the current character
# $b whether to allow line-breaking after the current character
# $cont_lf true when LF (line feed) characters appear continuously
# $w visible width of the current character
my $out = '';
my $len = 0;
my $word = '';
my $wlen = 0;
my $cont_lf = 0;
my ($c, $w, $b);
$text =~ s/\n+$/\n/;
while(1) {
if (length($text) == 0) {
return $out . $word;
}
($c, $text, $w, $b) = _extract($text);
if ($c eq "\n") {
$out .= $word . $separator;
if (length($text) == 0) {return $out;}
$len = 0;
$text = $top2 . $text;
$word = '' ; $wlen = 0;
next;
} elsif ($w == -1) {
# all control characters other than LF are ignored
next;
}
# when the current line have enough room
# for the curren character
if ($len + $wlen + $w <= $columns) {
if ($c eq ' ' || $b) {
$out .= $word . $c;
$len += $wlen + $w;
$word = ''; $wlen = 0;
} else {
$word .= $c; $wlen += $w;
}
next;
}
# when the current line overflows with the
# current character
if ($c eq ' ') {
# the line ends by space
$out .= $word . $separator;
$len = 0;
$text = $top2 . $text;
$word = ''; $wlen = 0;
} elsif ($wlen + $w <= $columns - length ($top2)) {
# the current word is sent to next line
$out .= $separator;
$len = 0;
$text = $top2 . $word . $c . $text;
$word = ''; $wlen = 0;
} else {
# the current word is too long to fit a line
$out .= $word . $separator;
$len = 0;
$text = $top2 . $c . $text;
$word = ''; $wlen = 0;
}
}
}
# Extract one character from the beginning from the given string.
# Supports multibyte encodings such as UTF-8, EUC-JP, EUC-KR,
# GB2312, and Big5.
#
# return value: (character, rest string, width, line breakable)
# character: a character. This may consist from multiple bytes.
# rest string: given string without the extracted character.
# width: number of columns which the character occupies on screen.
# line breakable: true if the character allows line break after it.
sub _extract {
my $string=shift;
my ($l, $c, $r, $w, $b, $u);
if (length($string) == 0) {
return ('', '', 0, 0);
}
$l = mblen($string);
if ($l == 0 || $l == -1) {
return ('?', substr($string,1), 1, 0);
}
$c = substr($string, 0, $l);
$r = substr($string, $l);
$w = mbswidth($c);
if (!defined($charmap)) {
$charmap = `/usr/bin/locale charmap`;
}
if ($charmap =~ /UTF.8/i) {
# UTF-8
if ($l == 3) {
# U+0800 - U+FFFF
$u = (ord(substr($c,0,1))&0x0f) * 0x1000
+ (ord(substr($c,1,1))&0x3f) * 0x40
+ (ord(substr($c,2,1))&0x3f);
$b = _isCJ($u);
} elsif ($l == 4) {
# U+10000 - U+10FFFF
$u = (ord(substr($c,0,1))&7) * 0x40000
+ (ord(substr($c,1,1))&0x3f) * 0x1000
+ (ord(substr($c,2,1))&0x3f) * 0x40
+ (ord(substr($c,3,1))&0x3f);
$b = _isCJ($u);
} else {
$b = 0;
}
} elsif ($charmap =~ /(^EUC)|(^GB)|(^BIG)/i) {
# East Asian legacy encodings
# (EUC-JP, EUC-KR, GB2312, Big5, Big5HKSCS, and so on)
if (ord(substr($c,0,1)) >= 0x80) {$b = 1;} else {$b = 0;}
} else {
$b = 0;
}
return ($c, $r, $w, $b);
}
# Returns 1 for Chinese and Japanese characters. This means that
# these characters allow line wrapping after this character even
# without whitespaces because these languages don't use whitespaces
# between words.
#
# Character must be given in UCS-4 codepoint value.
sub _isCJ {
my $u=shift;
if ($u >= 0x3000 && $u <= 0x312f) {
if ($u == 0x300a || $u == 0x300c || $u == 0x300e ||
$u == 0x3010 || $u == 0x3014 || $u == 0x3016 ||
$u == 0x3018 || $u == 0x301a) {return 0;}
return 1;
} # CJK punctuations, Hiragana, Katakana, Bopomofo
if ($u >= 0x31a0 && $u <= 0x31bf) {return 1;} # Bopomofo
if ($u >= 0x31f0 && $u <= 0x31ff) {return 1;} # Katakana extension
if ($u >= 0x3400 && $u <= 0x9fff) {return 1;} # Han Ideogram
if ($u >= 0xf900 && $u <= 0xfaff) {return 1;} # Han Ideogram
if ($u >= 0x20000 && $u <= 0x2ffff) {return 1;} # Han Ideogram
return 0;
}
1;
__END__
=head1 NAME
Text::WrapI18N - Line wrapping module with support for multibyte, fullwidth,
and combining characters and languages without whitespaces between words
=head1 SYNOPSIS
use Text::WrapI18N qw(wrap $columns);
wrap(firstheader, nextheader, texts);
=head1 DESCRIPTION
This module intends to be a better Text::Wrap module.
This module is needed to support multibyte character encodings such
as UTF-8, EUC-JP, EUC-KR, GB2312, and Big5. This module also supports
characters with irregular widths, such as combining characters (which
occupy zero columns on terminal, like diacritical marks in UTF-8) and
fullwidth characters (which occupy two columns on terminal, like most
of east Asian characters). Also, minimal handling of languages which
doesn't use whitespaces between words (like Chinese and Japanese) is
supported.
Like Text::Wrap, hyphenation and "kinsoku" processing are not supported,
to keep simplicity.
I<wrap(firstheader, nextheader, texts)> is the main subroutine of
Text::WrapI18N module to execute the line wrapping. Input parameters
and output data emulate Text::Wrap. The texts have to be written in
locale encoding.
=head1 SEE ALSO
locale(5), utf-8(7), charsets(7)
=head1 AUTHOR
Tomohiro KUBOTA, E<lt>kubota@debian.orgE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright 2003 by Tomohiro KUBOTA
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
|