/usr/share/ucto/tokconfig-fra is in uctodata 0.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | version=0.2
[RULE-ORDER]
PREFIX SUFFIX WORD-TOKEN URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
NUMBER-YEAR FRACNUMBER TIME NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )\p{L}+
SUFFIX = (?:\A|\p{L})+( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
%include e-mail
%include smiley
#Ex: (dis)information
WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
#Ex: understand(s)
WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
#Keep dash/underscore connected parts (even if they are in parenthesis)
WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
#Abbreviations with multiple periods
ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
#retain initials
INITIAL=^(?:\p{Lt}|\p{Lu})\.$
#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
#Date
DATE=\p{N}{1,2}[/\-]\p{N}{1,2}[/-]\p{N}{2,4}
DATE-REVERSE=\p{N}{4}[/\-]\p{N}{1,2}[/\-]\p{N}{1,2}
FRACNUMBER=\p{N}+(?:/\p{N}+)+
NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
#Times
TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:am|pm)?
#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+
CURRENCY=\p{Sc}
WORD=[\p{L}\p{Mn}]+
PUNCTUATION=\p{P}
UNKNOWN=.
[PREFIXES]
(?:qu|Qu|QU)['`’‘´]
\[lL]['`’‘´]
\[dD]['`’‘´]
\[mM]['`’‘´]
\[nN]['`’‘´]
\[tT]['`’‘´]
\[sS]['`’‘´]
\[cC]['`’‘´]
\[jJ]['`’‘´]
[SUFFIXES]
-je|-JE
-me|-ME
-tu|-TU
-te|-TE
-[tT]
-il|-IL
-se|-SE
-lui|-LUI
-elle|-ELLE
-nous|-NOUS
-vous|-VOUS
-ils|-ILS
-eux|-EUX
-elles|-ELLES
-ce|-CE
-ci|-CI
-là|-LÀ
-la|-LA
-le|-LE
-les|-LES
[TOKENS]
\[lL]['`’‘´]
\[dD]['`’‘´]
\[mM]['`’‘´]
\[nN]['`’‘´]
\[tT]['`’‘´]
\[sS]['`’‘´]
\[cC]['`’‘´]
\[jJ]['`’‘´]
\[aA]ujourd['`’‘´]hui
AUJOURD['`’‘´]HUI
(?:qu|Qu|QU)['`’‘´]
[UNITS]
km
m
cm
mm
g
kg
C
l
s
sec
min
gb
mb
kb
[CURRENCY]
EUR
CAD
[ABBREVIATIONS]
[FILTER]
fl fl
ff ff
ffi ffi
ffl ffl
# also filter soft hyphen
\u00AD
[EOSMARKERS]
%include standard-eos
[QUOTES]
%include standard-quotes
|