/usr/share/php/texy/src/Texy/modules/TexyTypographyModule.php is in php-texy 2.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | <?php
/**
* This file is part of the Texy! (http://texy.info)
* Copyright (c) 2004 David Grudl (http://davidgrudl.com)
*/
/**
* Typography replacements module.
*
* @author David Grudl
*/
final class TexyTypographyModule extends TexyModule
{
// @see http://www.unicode.org/cldr/data/charts/by_type/misc.delimiters.html
public static $locales = array(
'cs' => array(
'singleQuotes' => array("\xe2\x80\x9a", "\xe2\x80\x98"), // U+201A, U+2018
'doubleQuotes' => array("\xe2\x80\x9e", "\xe2\x80\x9c"), // U+201E, U+201C
),
'en' => array(
'singleQuotes' => array("\xe2\x80\x98", "\xe2\x80\x99"), // U+2018, U+2019
'doubleQuotes' => array("\xe2\x80\x9c", "\xe2\x80\x9d"), // U+201C, U+201D
),
'fr' => array(
'singleQuotes' => array("\xe2\x80\xb9", "\xe2\x80\xba"), // U+2039, U+203A
'doubleQuotes' => array("\xc2\xab", "\xc2\xbb"), // U+00AB, U+00BB
),
'de' => array(
'singleQuotes' => array("\xe2\x80\x9a", "\xe2\x80\x98"), // U+201A, U+2018
'doubleQuotes' => array("\xe2\x80\x9e", "\xe2\x80\x9c"), // U+201E, U+201C
),
'pl' => array(
'singleQuotes' => array("\xe2\x80\x9a", "\xe2\x80\x99"), // U+201A, U+2019
'doubleQuotes' => array("\xe2\x80\x9e", "\xe2\x80\x9d"), // U+201E, U+201D
),
);
/** @var string */
public $locale = 'cs';
/** @var array */
private $pattern;
public function __construct($texy)
{
$this->texy = $texy;
$texy->registerPostLine(array($this, 'postLine'), 'typography');
$texy->addHandler('beforeParse', array($this, 'beforeParse'));
}
/**
* Text pre-processing.
* @param Texy
* @param string
* @return void
*/
public function beforeParse($texy, & $text)
{
// CONTENT_MARKUP mark: \x17-\x1F
// CONTENT_REPLACED mark: \x16
// CONTENT_TEXTUAL mark: \x17
// CONTENT_BLOCK: not used in postLine
if (isset(self::$locales[$this->locale])) {
$locale = self::$locales[$this->locale];
} else { // fall back
$locale = self::$locales['en'];
}
$this->pattern = array(
'#(?<![.\x{2026}])\.{3,4}(?![.\x{2026}])#mu' => "\xe2\x80\xa6", // ellipsis ...
'#(?<=[\d ]|^)-(?=[\d ]|$)#' => "\xe2\x80\x93", // en dash 123-123
'#(?<=[^!*+,/:;<=>@\\\\_|-])--(?=[^!*+,/:;<=>@\\\\_|-])#' => "\xe2\x80\x93", // en dash alphanum--alphanum
'#,-#' => ",\xe2\x80\x93", // en dash ,-
'#(?<!\d)(\d{1,2}\.) (\d{1,2}\.) (\d\d)#' => "\$1\xc2\xa0\$2\xc2\xa0\$3", // date 23. 1. 1978
'#(?<!\d)(\d{1,2}\.) (\d{1,2}\.)#' => "\$1\xc2\xa0\$2", // date 23. 1.
'# --- #' => "\xc2\xa0\xe2\x80\x94 ", // em dash ---
'# ([\x{2013}\x{2014}])#u' => "\xc2\xa0\$1", // behind dash (dash stays at line end)
'# <-{1,2}> #' => " \xe2\x86\x94 ", // left right arrow <-->
'#-{1,}> #' => "\xe2\x86\x92 ", // right arrow -->
'# <-{1,}#' => " \xe2\x86\x90 ", // left arrow <--
'#={1,}> #' => "\xe2\x87\x92 ", // right arrow ==>
'#\\+-#' => "\xc2\xb1", // +-
'#(\d++) x (?=\d)#' => "\$1\xc2\xa0\xc3\x97\xc2\xa0", // dimension sign 123 x 123...
'#(\d++)x(?=\d)#' => "\$1\xc3\x97", // dimension sign 123x123...
'#(?<=\d)x(?= |,|.|$)#m' => "\xc3\x97", // dimension sign 123x
'#(\S ?)\(TM\)#i' => "\$1\xe2\x84\xa2", // trademark (TM)
'#(\S ?)\(R\)#i' => "\$1\xc2\xae", // registered (R)
'#\(C\)( ?\S)#i' => "\xc2\xa9\$1", // copyright (C)
'#\(EUR\)#' => "\xe2\x82\xac", // Euro (EUR)
'#(\d) (?=\d{3})#' => "\$1\xc2\xa0", // (phone) number 1 123 123 123...
'#(?<=[^\s\x17])\s++([\x17-\x1F]++)(?=\s)#u'=> "\$1", // remove intermarkup space phase 1
'#(?<=\s)([\x17-\x1F]++)\s++#u' => "\$1", // remove intermarkup space phase 2
'#(?<=.{50})\s++(?=[\x17-\x1F]*\S{1,6}[\x17-\x1F]*$)#us' => "\xc2\xa0", // space before last short word
// nbsp space between number (optionally followed by dot) and word, symbol, punctation, currency symbol
'#(?<=^| |\.|,|-|\+|\x16|\(|\d\x{A0})([\x17-\x1F]*\d++\.?[\x17-\x1F]*)\s++(?=[\x17-\x1F]*[%'.TexyPatterns::CHAR.'\x{b0}-\x{be}\x{2020}-\x{214f}])#mu'
=> "\$1\xc2\xa0",
// space between preposition and word
'#(?<=^|[^0-9'.TexyPatterns::CHAR.'])([\x17-\x1F]*[ksvzouiKSVZOUIA][\x17-\x1F]*)\s++(?=[\x17-\x1F]*[0-9'.TexyPatterns::CHAR.'])#mus'
=> "\$1\xc2\xa0",
'#(?<!"|\w)"(?!\ |")((?:[^"]++|")+)(?<!\ |")"(?!["'.TexyPatterns::CHAR.'])()#Uu' => $locale['doubleQuotes'][0].'$1'.$locale['doubleQuotes'][1], // double ""
'#(?<!\'|\w)\'(?!\ |\')((?:[^\']++|\')+)(?<!\ |\')\'(?![\''.TexyPatterns::CHAR.'])()#Uu' => $locale['singleQuotes'][0].'$1'.$locale['singleQuotes'][1], // single ''
);
}
public function postLine($text, $preserveSpaces = FALSE)
{
if (!$preserveSpaces) {
$text = TexyRegexp::replace($text, '# {2,}#', ' ');
}
return TexyRegexp::replace($text, $this->pattern);
}
}
|