/usr/share/apertium/interchunk.rnc is in apertium 3.4.2~r68466-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | # Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# Draft of DTD for the structural transfer rule files
#
# Sergio Ortiz, Gema Ramírez-Sánchez, Mireia Ginestí, Mikel L. Forcada,
# 2005.07.29.
condition =
and
| or
| not
| equal
| begins-with
| begins-with-list
| ends-with
| ends-with-list
| contains-substring
| in
container = var | clip
sentence = let | out | choose | modify-case | call-macro | append
value =
b
| clip
| lit
| lit-tag
| var
| get-case-from
| case-of
| concat
| chunk
stringvalue = clip | lit | var | get-case-from | case-of
interchunk =
element interchunk {
attlist.interchunk,
section-def-cats,
section-def-attrs,
section-def-vars,
section-def-lists?,
section-def-macros?,
section-rules
}
attlist.interchunk &= empty
# 'interchunk' is the root element containing the whole structural
# interchunk rule file.
section-def-cats =
element section-def-cats { attlist.section-def-cats, def-cat+ }
attlist.section-def-cats &= empty
# The 'def-cats' section defines the categories used to build the
# patterns used in rules
def-cat = element def-cat { attlist.def-cat, cat-item+ }
attlist.def-cat &=
attribute n { xsd:ID },
attribute c { text }?
# Each 'def-cat' defines one category in terms of a list of
# category items and has a unique name 'n', which is mandatory
cat-item = element cat-item { attlist.cat-item, empty }
attlist.cat-item &=
attribute lemma { text }?,
attribute tags { text },
attribute c { text }?
# Each 'cat-item' (category item) represents a set of lexical forms
# and has a mandatory attribute 'tags' whose value is a sequence of
# dot-separated tag names; this sequence is a subsequence of the
# tag sequence defining each possible lexical form. For example,
# tags="n.f" would match all lexical forms containing this tag
# sequence, such as "^casa<n><f><pl>$".
#
# In addition, an optional attribute, "lemma", may be used to
# define lexical forms having a particular substring in their lemma
section-def-attrs =
element section-def-attrs { attlist.section-def-attrs, def-attr+ }
attlist.section-def-attrs &= empty
# The 'def-attrs' section defines the attributes that will be
# identified in matched lexical forms
def-attr = element def-attr { attlist.def-attr, attr-item+ }
attlist.def-attr &=
attribute n { xsd:ID },
attribute c { text }?
# Each def-attr defines one attribute in terms of a list of
# attribute items and has a mandatory unique name n
attr-item = element attr-item { attlist.attr-item, empty }
attlist.attr-item &=
attribute tags { text }?,
attribute c { text }?
# Each 'attr-item' specifies a subsequence of the tags in
# that lexical form (attribute 'tags')
section-def-vars =
element section-def-vars { attlist.section-def-vars, def-var+ }
attlist.section-def-vars &= empty
# The 'def-vars' section defines the global variables
# that will be used to transfer information between rules
def-var = element def-var { attlist.def-var, empty }
attlist.def-var &=
attribute n { xsd:ID },
attribute v { text }?,
attribute c { text }?
# The definition of a global variable has a mandatory unique name 'n' that
# will be used to refer to it. A value of initialization can also be specified
# by means the 'v' attribute. The default value of the initialization is the
# empty string.
section-def-lists =
element section-def-lists { attlist.section-def-lists, def-list+ }
attlist.section-def-lists &= empty
# Element 'section-def-lists' encloses a set of list definitions
def-list = element def-list { attlist.def-list, list-item+ }
attlist.def-list &=
attribute n { xsd:ID },
attribute c { text }?
# The 'def-list' element defines a named list to search with the 'in'
# element. Attribute 'n' sets the name of the list
list-item = element list-item { attlist.list-item, empty }
attlist.list-item &=
attribute v { text },
attribute c { text }?
# Attribute 'v' of 'list-item' element contains the value to be added to
# the list being defined
section-def-macros =
element section-def-macros { attlist.section-def-macros, def-macro+ }
attlist.section-def-macros &= empty
#
# The 'def-macros' section defines macros containing portions of
# code frequently used in the action part of rules
#
def-macro = element def-macro { attlist.def-macro, sentence+ }
attlist.def-macro &= attribute n { xsd:ID }
attlist.def-macro &=
attribute npar { text },
attribute c { text }?
# Macro definition:
#
# A macro has a mandatory name (the value of 'n'), a number of parameters
# (the value of 'npar') and a body containing arguments and statements.
section-rules = element section-rules { attlist.section-rules, rule+ }
attlist.section-rules &= empty
# The rules section contains a sequence of one or more rules
rule = element rule { attlist.rule, pattern, action }
attlist.rule &= attribute comment { text }?
# Each rule has a pattern and an action
# * attribute 'comment' allows to put in comments about the purpose of
# the rule being defined
pattern = element pattern { attlist.pattern, pattern-item+ }
attlist.pattern &= empty
# The pattern is specified in terms of pattern items, each one
# representing a lexical form in the matched pattern
pattern-item = element pattern-item { attlist.pattern-item, empty }
attlist.pattern-item &= attribute n { xsd:IDREF }
# Each attribute to be activated is referred to by its name in the def-cats section
action = element action { attlist.action, sentence* }
attlist.action &= attribute c { text }?
# Encloses the procedural part of a rule
choose = element choose { attlist.choose, when+, otherwise? }
attlist.choose &= attribute c { text }?
# The choose statement is a selection statement (similar to a case
# statement) composed of one or more tested cases and an optional
# otherwise
when = element when { attlist.when, test, sentence* }
attlist.when &= attribute c { text }?
# Each tested case is a block of zero or more statements
otherwise = element otherwise { attlist.otherwise, sentence+ }
attlist.otherwise &= attribute c { text }?
# The otherwise case is also a block of one or more statements
test = element test { attlist.test, condition }
attlist.test &= attribute c { text }?
# The test in a tested case may be a conjunction, a disjunction, or
# a negation of simpler tests, as well as a simple equality test
and = element and { attlist.and, condition, condition+ }
attlist.and &= empty
# Each conjuntion test contains two or more simpler tests
or = element or { attlist.or, condition, condition+ }
attlist.or &= empty
# Each disjunction test contains two or more simpler tests
not = element not { attlist.not, condition }
attlist.not &= empty
# The negation of a simpler test is a test itself
equal = element equal { attlist.equal, value, value }
attlist.equal &= attribute caseless { "no" | "yes" }?
# The simplest test is an equality test. The right part and the
# left part of the equality may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
begins-with = element begins-with { attlist.begins-with, value, value }
attlist.begins-with &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the beginning.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
ends-with = element ends-with { attlist.ends-with, value, value }
attlist.ends-with &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the end.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
begins-with-list =
element begins-with-list { attlist.begins-with-list, value, \list }
attlist.begins-with-list &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the beginning.
# First parts of the test may be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. The second part
# must be always a list. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
ends-with-list =
element ends-with-list { attlist.ends-with-list, value, \list }
attlist.ends-with-list &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the end.
# First parts of the test may be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. The second part
# must be always a list. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
contains-substring =
element contains-substring {
attlist.contains-substring, value, value
}
attlist.contains-substring &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of
# a variable ('var') defined in the def-vars section. When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
in = element in { attlist.in, value, \list }
attlist.in &= attribute caseless { "no" | "yes" }?
# 'in' performs a search of a value in a list. If 'caseless' is set to yes,
# this search is performed without attending to the case
\list = element list { attlist.list, empty }
attlist.list &= attribute n { xsd:IDREF }
# 'list' refers, with the name in attribute 'n', a list defined before in
# the 'section-def-list' section
let = element let { attlist.let, container, value }
attlist.let &= empty
# An assignment statement ('let') assigns the value of a clip (see
# below), a literal string ('lit'), a literal tag('lit-tag') or the
# value of a global variable ('var') to either a global variable ('var')
# or a clip
append = element append { attlist.append, value+ }
attlist.append &= attribute n { xsd:IDREF }
# This instruction appends the value of a clip (see
# below), a literal string ('lit'), a literal tag('lit-tag') or the
# value of a global variable ('var') to either a global variable ('var')
# or a clip, identified by the "n" attribute
out = element out { attlist.out, (b | chunk | var)+ }
attlist.out &= attribute c { text }?
# 'out' is an output statement; it may output blanks or chunks
modify-case =
element modify-case { attlist.modify-case, container, stringvalue }
attlist.modify-case &= empty
# The first argument of 'modify-case' copy the case of the second
# argument.
call-macro = element call-macro { attlist.call-macro, with-param* }
attlist.call-macro &= attribute n { xsd:IDREF }
# A macro may be called anywhere by name with one or more
# arguments
with-param = element with-param { attlist.with-param, empty }
attlist.with-param &= attribute pos { text }
# The attribute pos in each argument is used to refer to a lexical
# form in the current rule. For example, if a 2-parameter macro
# has been defined to perform noun-adjective agreement operations,
# it may be used with arguments 1 and 2 in a noun-adjective rule,
# with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
# arguments 1 and 3 in a noun-adverb-adjective rule, and with
# arguments 2 and 1 in an adjective-noun rule
clip = element clip { attlist.clip, empty }
attlist.clip &=
attribute pos { text },
attribute part { text },
attribute c { text }?
# A 'clip' is a substring of a source-language or target-language
# lexical form, extracted according to an attribute:
#
# * 'pos' is an index (1, 2, 3...) used to select a lexical form
# inside the rule;
#
# * the value of 'part' is the name of an attribute defined in
# def-attrs, but may take also the values 'lem' (referring to
# the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
# (lemma queue) and 'whole' (referring to the whole lexical form).
#
lit = element lit { attlist.lit, empty }
attlist.lit &= attribute v { text }
# A literal string value: the value of the literal is the value of
# the 'v' attribute
lit-tag = element lit-tag { attlist.lit-tag, empty }
attlist.lit-tag &= attribute v { text }
# A literal string value: the value of the literal is the value of
# the 'v' attribute
var = element var { attlist.var, empty }
attlist.var &= attribute n { xsd:IDREF }
# Each 'var' is a variable identifier: the attribute n is the name
# of the variable. When it is in an 'out', a 'test', or the right
# part of a 'let', it represents the value of the variable; when in
# the left part of a 'let' it represents the reference of the
# variable.
get-case-from =
element get-case-from { attlist.get-case-from, (clip | lit | var) }
attlist.get-case-from &= attribute pos { text }
# Atención, falta modificar todos los comentarios donde intervenga
# get-case-from
case-of = element case-of { attlist.case-of, empty }
attlist.case-of &=
attribute pos { text },
attribute part { text }
# A 'case-of' is a value representing the case of a "clip". This value
# will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
# (all uppercase).
#
# * 'pos' is an index (1, 2, 3...) used to select a lexical form
# inside the rule;
#
# * the value of 'part' is the name of an attribute defined in
# def-attrs, but may take also the values 'lem' (referring to
# the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
# (lemma queue) and 'whole' (referring to the whole lexical form).
concat = element concat { attlist.concat, value+ }
attlist.concat &= empty
# Concatenates a sequence of values
chunk = element chunk { attlist.chunk, value+ }
attlist.chunk &= empty
# Encloses a chunk
pseudolemma = element pseudolemma { attlist.pseudolemma, value }
attlist.pseudolemma &= empty
b = element b { attlist.b, empty }
attlist.b &= attribute pos { text }?
start = interchunk | pseudolemma
# 'b' is a [super]blanks item, indexed by pos; for example, a 'b'
# with pos="2" refers to the [super]blanks (including format data
# encapsulated by the de-formatter) between lexical form 2 and
# lexical form 3. Managing [super]blanks explicitly allows for the
# correct placement of format when the result of structural
# transfer has more or less lexical items than the original or has
# been reordered in some way. If attribute "pos" is not specified, then
# a single blank (ASCII 32) is generated.
|