This file is indexed.

/usr/share/ucto/tokconfig-nld-twitter is in uctodata 0.4-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
version=0.2
[RULE-ORDER]
URL URL-WWW URL-DOMAIN E-MAIL
WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
WORD-COMPOUND ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY HASHTAG
ADDRESSEE PUNCTUATION-MULTI DATE-REVERSE DATE FRACTIONORDATE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
# to do PREFIXES (is leeg nu) UNITS (uitgecommentarieerd in de c++ code)

[RULES]
%include url
%include e-mail
%include smiley

#Ex (oud)-studente(s)
WORD-PARPREFIX-PARSUFFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})

#Ex: (oud)-studente, (on)zin,
WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*

#Ex: koning(in)
WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})

#Keep dash/underscore connected parts (even if they are in parenthesis)
WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+

#Abbreviations with multiple periods
ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])

#Initials glued to a longer word: A.F.Zetterij -> A.F. Zetterij
INITIALS=(\p{L}(?:\.\p{L})+\.)\p{Lu}\p{L}{3,999}+

#retain initials
INITIAL=^(?:\p{Lt}|\p{Lu})\.$

HASHTAG=#[\p{L}\p{Mn}\p{N}_\-]+

ADDRESSEE=@[\p{L}\p{Mn}\p{N}_\-]+

#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}

#Date
DATE=\p{N}{1,2}[/\-]\p{N}{1,2}[/\-]\p{N}{2,4}
DATE-REVERSE=\p{N}{4}[/\-]\p{N}{1,2}[/\-]\p{N}{1,2}

FRACTIONORDATE=\p{N}{1,2}[/\-]\p{N}{1,2}

FRACNUMBER=\p{N}+(?:/\p{N}+)+

NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)

#Times
TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N}{1,2})?(am|pm|AM|PM)?

#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+


CURRENCY=\p{Sc}

WORD=[\p{L}\p{Mn}]+

PUNCTUATION=\p{P}

UNKNOWN=.

[PREFIXES]

[ATTACHEDSUFFIXES]
\['`’‘´][sS]
\['`’‘´][tT]
\['`’‘´][nN]
\['`’‘´][rR]

[ORDINALS]
e|E
de|DE
ste|STE
er|ER


[TOKENS]
\['`’‘´][sS]
\['`’‘´][kK]
\['`’‘´][mM]
\['`’‘´][nN]
\['`’‘´][tT]
\['`’‘´](?:ie|IE)

[UNITS]
km
m
cm
mm
g
kg
C
l
s
sec
min
gb
mb
kb


[CURRENCY]
EUR
hfl
fl
f

[ABBREVIATIONS]
%include nld_afk

[FILTER]
%include ligatures
# also filter soft hyphen
\u00AD

[EOSMARKERS]
%include standard-eos

[QUOTES]
%include standard-quotes
%include exotic-quotes