/usr/share/ucto/tokconfig-rus is in uctodata 0.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | version=0.2
# coding: utf-8
[RULE-ORDER]
URL URL-WWW URL-DOMAIN
E-MAIL ABBREVIATION-KNOWN WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
#WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
%include e-mail
%include smiley
#Ex: (dis)information
WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
#Ex: understand(s)
WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
#Keep dash/underscore connected parts (even if they are in parenthesis)
WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
#Abbreviations with multiple periods
ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
#retain initials
INITIAL=^(?:\p{Lt}|\p{Lu})\.$
#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
#Date
DATE=\p{N}{1,2}/\p{Ps}?\p{N}{1,2}[/]\p{Ps}?\p{N}{2,4}
DATE-REVERSE=\p{N}{4}/\p{N}{1,2}/\p{N}{1,2}
FRACNUMBER=\p{N}+(?:/\p{N}+)+
NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
#Times
TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+
CURRENCY=\p{Sc}
WORD=[\p{L}\p{Mn}]+
PUNCTUATION=\p{P}
UNKNOWN=.
[PREFIXES]
[SUFFIXES]
[ORDINALS]
[TOKENS]
[UNITS]
км
м
ч
сек
мин
мм
мт
см
МБ
ГБ
КБ
[CURRENCY]
[ABBREVIATIONS]
гл
лат
напр
вкл
вм
ок
приб
им
обл
руб
ст
стр
см
шт
тов
пл
США
СЕ
СССР
РФ
пер
[FILTER]
# also filter soft hyphen
\u00AD
[EOSMARKERS]
%include standard-eos
[QUOTES]
%include standard-quotes
%include exotic-quotes
|