/usr/share/apertium/interchunk.rnc

# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
# 
#  Draft of DTD for the structural transfer rule files 
# 
#  Sergio Ortiz, Gema Ramírez-Sánchez, Mireia Ginestí, Mikel L. Forcada, 
#  2005.07.29. 

condition =
  and
  | or
  | not
  | equal
  | begins-with
  | begins-with-list
  | ends-with
  | ends-with-list
  | contains-substring
  | in
container = var | clip
sentence = let | out | choose | modify-case | call-macro | append
value =
  b
  | clip
  | lit
  | lit-tag
  | var
  | get-case-from
  | case-of
  | concat
  | chunk
stringvalue = clip | lit | var | get-case-from | case-of
interchunk =
  element interchunk {
    attlist.interchunk,
    section-def-cats,
    section-def-attrs,
    section-def-vars,
    section-def-lists?,
    section-def-macros?,
    section-rules
  }
attlist.interchunk &= empty
# 'interchunk' is the root element containing the whole structural
# interchunk rule file.  
section-def-cats =
  element section-def-cats { attlist.section-def-cats, def-cat+ }
attlist.section-def-cats &= empty
#      The 'def-cats' section defines the categories used to build the
# patterns used in rules
def-cat = element def-cat { attlist.def-cat, cat-item+ }
attlist.def-cat &=
  attribute n { xsd:ID },
  attribute c { text }?
# Each 'def-cat' defines one category in terms of a list of
# category items and has a unique name 'n', which is mandatory
cat-item = element cat-item { attlist.cat-item, empty }
attlist.cat-item &=
  attribute lemma { text }?,
  attribute tags { text },
  attribute c { text }?
#      Each 'cat-item' (category item) represents a set of lexical forms
# and has a mandatory attribute 'tags' whose value is a sequence of
# dot-separated tag names; this sequence is a subsequence of the
# tag sequence defining each possible lexical form. For example,
# tags="n.f" would match all lexical forms containing this tag
# sequence, such as "^casa<n><f><pl>$".
# 
# In addition, an optional attribute, "lemma", may be used to
# define lexical forms having a particular substring in their lemma
section-def-attrs =
  element section-def-attrs { attlist.section-def-attrs, def-attr+ }
attlist.section-def-attrs &= empty
# The 'def-attrs' section defines the attributes that will be
# identified in matched lexical forms 
def-attr = element def-attr { attlist.def-attr, attr-item+ }
attlist.def-attr &=
  attribute n { xsd:ID },
  attribute c { text }?
# Each def-attr defines one attribute in terms of a list of
# attribute items and has a mandatory unique name n 
attr-item = element attr-item { attlist.attr-item, empty }
attlist.attr-item &=
  attribute tags { text }?,
  attribute c { text }?
# Each 'attr-item' specifies a subsequence of the tags in
# that lexical form (attribute 'tags')
section-def-vars =
  element section-def-vars { attlist.section-def-vars, def-var+ }
attlist.section-def-vars &= empty
# The 'def-vars' section defines the global variables
# that will be used to transfer information between rules
def-var = element def-var { attlist.def-var, empty }
attlist.def-var &=
  attribute n { xsd:ID },
  attribute v { text }?,
  attribute c { text }?
# The definition of a global variable has a mandatory unique name 'n' that
# will be used to refer to it. A value of initialization can also be specified
# by means the 'v' attribute.  The default value of the initialization is the
# empty string.
section-def-lists =
  element section-def-lists { attlist.section-def-lists, def-list+ }
attlist.section-def-lists &= empty
# Element 'section-def-lists' encloses a set of list definitions
def-list = element def-list { attlist.def-list, list-item+ }
attlist.def-list &=
  attribute n { xsd:ID },
  attribute c { text }?
# The 'def-list' element defines a named list to search with the 'in' 
# element.  Attribute 'n' sets the name of the list
list-item = element list-item { attlist.list-item, empty }
attlist.list-item &=
  attribute v { text },
  attribute c { text }?
# Attribute 'v' of 'list-item' element contains the value to be added to 
# the list being defined     
section-def-macros =
  element section-def-macros { attlist.section-def-macros, def-macro+ }
attlist.section-def-macros &= empty
# 
# The 'def-macros' section defines macros containing portions of
# code frequently used in the action part of rules
#
def-macro = element def-macro { attlist.def-macro, sentence+ }
attlist.def-macro &= attribute n { xsd:ID }
attlist.def-macro &=
  attribute npar { text },
  attribute c { text }?
# Macro definition:
# 
# A macro has a mandatory name (the value of 'n'), a number of parameters
# (the value of 'npar') and a body containing arguments and statements.  
section-rules = element section-rules { attlist.section-rules, rule+ }
attlist.section-rules &= empty
# The rules section contains a sequence of one or more rules
rule = element rule { attlist.rule, pattern, action }
attlist.rule &= attribute comment { text }?
# Each rule has a pattern and an action 
# * attribute 'comment' allows to put in comments about the purpose of
#   the rule being defined
pattern = element pattern { attlist.pattern, pattern-item+ }
attlist.pattern &= empty
# The pattern is specified in terms of pattern items, each one
# representing a lexical form in the matched pattern 
pattern-item = element pattern-item { attlist.pattern-item, empty }
attlist.pattern-item &= attribute n { xsd:IDREF }
# Each attribute to be activated is referred to by its name in the def-cats section 
action = element action { attlist.action, sentence* }
attlist.action &= attribute c { text }?
# Encloses the procedural part of a rule
choose = element choose { attlist.choose, when+, otherwise? }
attlist.choose &= attribute c { text }?
# The choose statement is a selection statement (similar to a case
# statement) composed of one or more tested cases and an optional
# otherwise 
when = element when { attlist.when, test, sentence* }
attlist.when &= attribute c { text }?
# Each tested case is a block of zero or more statements 
otherwise = element otherwise { attlist.otherwise, sentence+ }
attlist.otherwise &= attribute c { text }?
# The otherwise case is also a block of one or more statements 
test = element test { attlist.test, condition }
attlist.test &= attribute c { text }?
# The test in a tested case may be a conjunction, a disjunction, or
# a negation of simpler tests, as well as a simple equality test
and = element and { attlist.and, condition, condition+ }
attlist.and &= empty
# Each conjuntion test contains two or more simpler tests 
or = element or { attlist.or, condition, condition+ }
attlist.or &= empty
# Each disjunction test contains two or more simpler tests 
not = element not { attlist.not, condition }
attlist.not &= empty
# The negation of a simpler test is a test itself 
equal = element equal { attlist.equal, value, value }
attlist.equal &= attribute caseless { "no" | "yes" }?
# The simplest test is an equality test. The right part and the
# left part of the equality may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
begins-with = element begins-with { attlist.begins-with, value, value }
attlist.begins-with &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the beginning.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
ends-with = element ends-with { attlist.ends-with, value, value }
attlist.ends-with &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the end.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
begins-with-list =
  element begins-with-list { attlist.begins-with-list, value, \list }
attlist.begins-with-list &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the beginning.
# First parts of the test may be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section. The second part
# must be always a list.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
ends-with-list =
  element ends-with-list { attlist.ends-with-list, value, \list }
attlist.ends-with-list &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part at the end.
# First parts of the test may be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section. The second part
# must be always a list.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
contains-substring =
  element contains-substring {
    attlist.contains-substring, value, value
  }
attlist.contains-substring &= attribute caseless { "no" | "yes" }?
# Tests if the left part contains the right part.
# Both parts of the test may both be a clip (see below), a
# literal string ('lit'), a literal tag ('lit-tag') or the value of 
# a variable ('var') defined in the def-vars section.  When the attribute
# 'caseless' is set to 'yes', the comparison is made without attending
# to the case.
in = element in { attlist.in, value, \list }
attlist.in &= attribute caseless { "no" | "yes" }?
# 'in' performs a search of a value in a list.  If 'caseless' is set to yes,
# this search is performed without attending to the case
\list = element list { attlist.list, empty }
attlist.list &= attribute n { xsd:IDREF }
# 'list' refers, with the name in attribute 'n', a list defined before in
# the 'section-def-list' section
let = element let { attlist.let, container, value }
attlist.let &= empty
# An assignment statement ('let') assigns the value of a clip (see
# below), a literal string ('lit'), a literal tag('lit-tag') or the 
# value of a global variable ('var') to either a global variable ('var') 
# or a clip
append = element append { attlist.append, value+ }
attlist.append &= attribute n { xsd:IDREF }
# This instruction appends the value of a clip (see
# below), a literal string ('lit'), a literal tag('lit-tag') or the 
# value of a global variable ('var') to either a global variable ('var') 
# or a clip, identified by the "n" attribute
out = element out { attlist.out, (b | chunk | var)+ }
attlist.out &= attribute c { text }?
# 'out' is an output statement; it may output blanks or chunks
modify-case =
  element modify-case { attlist.modify-case, container, stringvalue }
attlist.modify-case &= empty
# The first argument of 'modify-case' copy the case of the second 
# argument.
call-macro = element call-macro { attlist.call-macro, with-param* }
attlist.call-macro &= attribute n { xsd:IDREF }
# A macro may be called anywhere by name with one or more
# arguments
with-param = element with-param { attlist.with-param, empty }
attlist.with-param &= attribute pos { text }
# The attribute pos in each argument is used to refer to a lexical
# form in the current rule. For example, if a 2-parameter macro
# has been defined to perform noun-adjective agreement operations,
# it may be used with arguments 1 and 2 in a noun-adjective rule,
# with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
# arguments 1 and 3 in a noun-adverb-adjective rule, and with
# arguments 2 and 1 in an adjective-noun rule 
clip = element clip { attlist.clip, empty }
attlist.clip &=
  attribute pos { text },
  attribute part { text },
  attribute c { text }?
# A 'clip' is a substring of a source-language or target-language
# lexical form, extracted according to an attribute:
# 
# * 'pos' is an index (1, 2, 3...) used to select a lexical form
#    inside the rule;
# 
# * the value of 'part' is the name of an attribute defined in
#   def-attrs, but may take also the values 'lem' (referring to
#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
#   (lemma queue) and 'whole' (referring to the whole lexical form).
#
lit = element lit { attlist.lit, empty }
attlist.lit &= attribute v { text }
# A literal string value: the value of the literal is the value of
# the 'v' attribute
lit-tag = element lit-tag { attlist.lit-tag, empty }
attlist.lit-tag &= attribute v { text }
# A literal string value: the value of the literal is the value of
# the 'v' attribute
var = element var { attlist.var, empty }
attlist.var &= attribute n { xsd:IDREF }
# Each 'var' is a variable identifier: the attribute n is the name
# of the variable. When it is in an 'out', a 'test', or the right
# part of a 'let', it represents the value of the variable; when in
# the left part of a 'let' it represents the reference of the
# variable. 
get-case-from =
  element get-case-from { attlist.get-case-from, (clip | lit | var) }
attlist.get-case-from &= attribute pos { text }
# Atención, falta modificar todos los comentarios donde intervenga
# get-case-from
case-of = element case-of { attlist.case-of, empty }
attlist.case-of &=
  attribute pos { text },
  attribute part { text }
# A 'case-of' is a value representing the case of a "clip".  This value 
# will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
# (all uppercase).
# 
# * 'pos' is an index (1, 2, 3...) used to select a lexical form
#    inside the rule;
# 
# * the value of 'part' is the name of an attribute defined in
#   def-attrs, but may take also the values 'lem' (referring to
#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
#   (lemma queue) and 'whole' (referring to the whole lexical form).
concat = element concat { attlist.concat, value+ }
attlist.concat &= empty
# Concatenates a sequence of values
chunk = element chunk { attlist.chunk, value+ }
attlist.chunk &= empty
# Encloses a chunk      
pseudolemma = element pseudolemma { attlist.pseudolemma, value }
attlist.pseudolemma &= empty
b = element b { attlist.b, empty }
attlist.b &= attribute pos { text }?
start = interchunk | pseudolemma
# 'b' is a [super]blanks item, indexed by pos; for example, a 'b'
# with pos="2" refers to the [super]blanks (including format data
# encapsulated by the de-formatter) between lexical form 2 and
# lexical form 3. Managing [super]blanks explicitly allows for the
# correct placement of format when the result of structural
# transfer has more or less lexical items than the original or has
# been reordered in some way.  If attribute "pos" is not specified, then
# a single blank (ASCII 32) is generated.
apertium 3.4.2~r68466-4 / usr / share / apertium / interchunk.rnc