/usr/share/pyshared/xml2po/modes/docbook.py is in gnome-doc-utils 0.20.10-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | # -*- coding: utf-8 -*-
# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo@gnome.org>.
#
# This file is part of xml2po.
#
# xml2po is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# xml2po is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with xml2po; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# This implements special instructions for handling DocBook XML documents
# in a better way.
#
# This means:
# — better handling of nested complicated tags (i.e. definitions of
# ignored-tags and final-tags)
# — support for merging translator-credits back into DocBook articles
# — support for setting a language
#
# We use "currentXmlMode" class name for all modes
# -- it might be better to have it named docbookXmlMode, but it will make loading harder;
# it is also not necessary until we start supporting extracting strings from more
# than one document type at the same time
#
import re
import libxml2
import os
import sys
try:
# Hashlib is new in Python 2.5
from hashlib import md5 as md5_new
except ImportError:
from md5 import new as md5_new
from basic import basicXmlMode
class docbookXmlMode(basicXmlMode):
"""Class for special handling of DocBook document types.
It sets lang attribute on article elements, and adds translators
to articleinfo/copyright."""
def __init__(self):
self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
'screenshot' ]
def getIgnoredTags(self):
"Returns array of tags to be ignored."
return self.objects + self.lists
def getFinalTags(self):
"Returns array of tags to be considered 'final'."
return ['para', 'formalpara', 'simpara',
'releaseinfo', 'revnumber', 'title',
'date', 'term', 'programlisting'] + self.objects + self.lists
def getSpacePreserveTags(self):
"Returns array of tags in which spaces are to be preserved."
return [
'classsynopsisinfo',
'computeroutput',
'funcsynopsisinfo',
'literallayout',
'programlisting',
'screen',
'synopsis',
'userinput'
]
def getStringForTranslators(self):
"""Returns string which will be used to credit translators."""
return "translator-credits"
def getCommentForTranslators(self):
"""Returns a comment to be added next to string for crediting translators."""
return """Put one translator per line, in the form of NAME <EMAIL>, YEAR1, YEAR2"""
def _find_articleinfo(self, node):
if node.name == 'articleinfo' or node.name == 'bookinfo':
return node
child = node.children
while child:
ret = self._find_articleinfo(child)
if ret:
return ret
child = child.next
return None
def _find_lastcopyright(self, node):
if not node.children:
return None
last = node.lastChild()
tmp = last
while tmp:
if tmp.name == "copyright":
last = tmp
break
tmp = tmp.prev
return last
def _md5_for_file(self, filename):
hash = md5_new()
input = open(filename, "rb")
read = input.read(4096)
while read:
hash.update(read)
read = input.read(4096)
input.close()
return hash.hexdigest()
def _output_images(self, node, msg):
if node and node.type=='element' and node.name=='imagedata':
# Use .fileref to construct new message
attr = node.prop("fileref")
if attr:
dir = os.path.dirname(msg.filename)
fullpath = os.path.join(dir, attr)
if os.path.exists(fullpath):
hash = self._md5_for_file(fullpath)
else:
hash = "THIS FILE DOESN'T EXIST"
print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
"When image changes, this message will be marked fuzzy or untranslated for you.\n"+
"It doesn't matter what you translate it to: it's not used at all.")
elif node and node.children:
child = node.children
while child:
self._output_images(child,msg)
child = child.next
def preProcessXml(self, doc, msg):
"""Add additional messages of interest here."""
root = doc.getRootElement()
self._output_images(root,msg)
def postProcessXmlTranslation(self, doc, language, translators):
"""Sets a language and translators in "doc" tree.
"translators" is a string consisted of "Name <email>, years" pairs
of each translator, separated by newlines."""
root = doc.getRootElement()
# DocBook documents can be something other than article, handle that as well in the future
while root and root.name != 'article' and root.name != 'book':
root = root.next
if root and (root.name == 'article' or root.name == 'book'):
root.setProp('lang', language)
else:
return
if translators == self.getStringForTranslators():
return
elif translators:
# Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
ai = self._find_articleinfo(root)
if not ai:
return
# Now, lets do one translator at a time
lines = translators.split("\n")
for line in lines:
line = line.strip()
match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?,\s*(.*)$", line)
if match:
last = self._find_lastcopyright(ai)
copy = libxml2.newNode("copyright")
if last:
copy = last.addNextSibling(copy)
else:
ai.addChild(copy)
if match.group(3):
copy.newChild(None, "year", match.group(3).encode('utf-8'))
if match.group(1) and match.group(2):
holder = match.group(1)+"(%s)" % match.group(2)
elif match.group(1):
holder = match.group(1)
elif match.group(2):
holder = match.group(2)
else:
holder = "???"
copy.newChild(None, "holder", holder.encode('utf-8'))
# Perform some tests when ran standalone
if __name__ == '__main__':
test = docbookXmlMode()
print "Ignored tags : " + repr(test.getIgnoredTags())
print "Final tags : " + repr(test.getFinalTags())
print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
print "Credits from string: '%s'" % test.getStringForTranslators()
print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
|