/usr/share/festival/upc_catalan/upc_catalan_tokenpos.scm is in festival-ca 3.0.6-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | ;;; Copyright (C) 2009-2011 Antonio Bonafonte et al.
;;; Universitat Politècnica de Catalunya, Barcelona, Spain
;;;
;;; This script is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation,
;;; version 2.1 of the License.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; We want to acknowledge the original English version lib/tokenpos.scm
;;; by the "Centre for Speech Technology Research;University of Edinburgh, UK"
;;; available in Festival which served as a reference to this script.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Functions used in identifying token types.
;;;
(defvar catalan-regex-upcase-letters "[A-ZÀÁÈÉÌÍÒÓÙÚÏÜÑÇ]")
(defvar catalan-regex-downcase-letters "[a-zàéèíòóúïüñç]")
(defvar catalan-regex-all-letters "[a-zàéèíòóúïüñçA-ZÀÁÈÉÌÍÒÓÙÚÏÜÑÇ·]")
(defvar catalan-regex-all-letters-and-numbers "[a-zàéèíòóúïüñçA-ZÀÁÈÉÌÍÒÓÙÚÏÜÑÇ·0-9]")
(define (catala_tok_rex sc)
"(cat_tok_rex sc)
Returns 1 if King like title is within 3 tokens before or 2 after."
(let ((kings '(rei papa pare duc tsar emperador cèsar baró comte)))
(if (or (member_string
(catala_downcase_string (item.feat sc "R:Token.pp.name"))
kings)
(member_string
(catala_downcase_string (item.feat sc "R:Token.pp.p.name"))
kings)
(member_string
(catala_downcase_string (item.feat sc "R:Token.n.name"))
kings))
"1"
"0")))
(define (catala_tok_queen sc)
"(cat_tok_queen sc)
Returns 1 if Queen like title is within 3 tokens before or 2 after."
(let ((queens '(reina duquessa tsarina emperadriu baronessa comtessa)))
(if (or (member_string
(catala_downcase_string (item.feat sc "R:Token.pp.name"))
queens)
(member_string
(catala_downcase_string (item.feat sc "R:Token.pp.p.name"))
queens)
(member_string
(catala_downcase_string (item.feat sc "R:Token.n.name"))
queens))
"1"
"0")))
(define (catala_tok_rex_names sc)
"(cat_tok_rex sc)
Returns 1 if King like title is within 1 token before."
(let ((kings '( ;; Noblessa Catalana
ramon berenguer arnau guifré
;; Reis espanyols
carles felip ferran josep amadeu alfons joan carles
;; Papes catòlics
alexandre sixte pius víctor calixte urbà fèlix marcel silvestre juli anastasi benet
inocenci bonifaci benedicte gregori honori martí pau climent
;; Altres reis
enric lluís jordi eduard guillem ricard napoleó pere albert jaume)))
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
kings)
"1"
"0")))
(define (catala_tok_queen_names sc)
"(cat_tok_rex sc)
Returns 1 if King like title is within 1 token before."
(let ((queens '(caterina alexandra isabel maria joana cristina)))
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
queens)
"1"
"0")))
(define (catala_start_caps name)
"(catala_start_caps name)
Returns 1 if name start with a caps letter and the 2nd letter isn't a cap letter"
(if (and (string-matches (string-append (car (symbolexplode name))) catalan-regex-upcase-letters) (string-matches (string-append (car (cdr (symbolexplode name)))) catalan-regex-downcase-letters)
)
"1"
"0"
)
)
(define (tok_allcaps sc)
"(tok_allcaps sc)
Returns 1 if sc's name is all capitals, 0 otherwise"
(if (string-matches (item.name sc) (string-append catalan-regex-upcase-letters "+"))
"1"
"0"))
(define (catala_two_caps name)
"(catala_two_caps name)
Returns 1 if name has two capitals letters (one at start) with a no capital letter between them at least, 0 otherwise"
(let ((explode (symbolexplode name)) (letter) (output "0"))
(set! letter (car explode))
(if (string-matches letter catalan-regex-upcase-letters)
(begin
(set! explode (cdr explode))
(set! letter (car explode))
(if (string-matches letter catalan-regex-downcase-letters)
(begin
(set! explode (cdr explode))
(set! letter (car explode))
(while (not (eq? nil letter))
(if (string-matches letter catalan-regex-upcase-letters)
(begin
(set! letter nil)
(set! output "1")
)
(begin
(set! explode (cdr explode))
(set! letter (car explode))
)
)
)
)
)
)
)
output
)
)
(define (catala_words_ordinals_ms sc)
"(catala_words_ordinals_ms sc )
Returns 1 if a list's word is within 1 token before or 1 after. Sing. & male words"
(let ((words '(article vers acte llibre concurs aniversari capítol volúm)))
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
words)
"1"
(if (member_string (catala_downcase_string (item.feat sc "R:Token.n.name"))
words)
"1"
"0"))))
(define (catala_words_ordinals_fs sc)
"(catala_words_ordinals_ms sc )
Returns 1 if a list's word is within 1 token before or 1 after. Sing. & Female words "
(let ((words '(secció setmana part frase escena llibreta posició secció guerra assamblea jornada edició olimpiada)))
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
words)
"1"
(if (member_string (catala_downcase_string (item.feat sc "R:Token.n.name"))
words)
"1"
"0"))))
(define (catala_words_ordinals_mp sc)
"(catala_words_ordinals_ms sc )
Returns 1 if a list's word is within 1 token before or 1 after. Pl. & Male words "
(let ((words '())) ;; I don't know any word now :P
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
words)
"1"
(if (member_string (catala_downcase_string (item.feat sc "R:Token.n.name"))
words)
"1"
"0"))))
(define (catala_words_ordinals_fp sc)
"(catala_words_ordinals_ms sc )
Returns 1 if a list's word is within 1 token before or 1 after. Pl. & Female words "
(let ((words '(jornades edicions olimpiades)))
(if (member_string (catala_downcase_string (item.feat sc "R:Token.p.name"))
words)
"1"
(if (member_string (catala_downcase_string (item.feat sc "R:Token.n.name"))
words)
"1"
"0"))))
(define (catala_telph sc)
"(catala_telph sc)
Returns 1 if telephone or some call verb form is within 4, 3, 2 o 1 tokens before."
(let ((telph '(telèfon telf. telf tel. tel
trucant trucam trucava truca trucada trucades trucant trucar trucaran
trucarem trucaren trucares trucareu trucaria trucarien trucaries
trucarà trucaràs trucaré trucaríem trucaríeu trucat trucats trucava
trucaven trucaves truco trucà trucàrem trucàreu trucàvem trucàveu truquem
truquen truques truquessin truquessis truqueu truqui truquin truquis truqués
truquéssim truquéssiu truquí)))
(if (or (member_string
(catala_downcase_string (item.feat sc "pp.name"))
telph)
(member_string
(catala_downcase_string (item.feat sc "pp.p.name"))
telph)
(member_string
(catala_downcase_string (item.feat sc "pp.pp.name"))
telph)
(member_string
(catala_downcase_string (item.feat sc "p.name"))
telph))
"1"
"0"
)
)
)
(provide 'upc_catalan_tokenpos)
|