/usr/share/pyshared/dbtexmf/dblatex/rawparse.py is in dblatex 0.3.4-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import re
from texcodec import LatexCodec, TexCodec
from texhyphen import BasicHyphenator, UrlHyphenator
def utf8(u):
return u.encode("utf8")
class RawKey:
def __init__(self, key, incr):
self.key = key
self.depth = incr
self.pos = -1
self.len = len(key)
class RawLatexParser:
def __init__(self,
key_in=utf8(u"\u0370t"), key_out=utf8(u"\u0371t"),
codec=None, output_encoding="latin-1"):
self.key_in = RawKey(key_in, 1)
self.key_out = RawKey(key_out, -1)
self.depth = 0
self.hyphenate = 0
self.codec = codec or LatexCodec(output_encoding=output_encoding)
#self.hyphenator = BasicHyphenator(codec=self.codec)
self.hyphenator = UrlHyphenator(codec=self.codec)
# hyphenation patterns
self.hypon = re.compile(utf8(u"\u0370h"))
self.hypof = re.compile(utf8(u"\u0371h"))
def parse(self, line):
lout = ""
while (line):
self.key_in.pos = line.find(self.key_in.key)
self.key_out.pos = line.find(self.key_out.key)
if (self.key_out.pos == -1 or
(self.key_in.pos >= 0 and
(self.key_in.pos < self.key_out.pos))):
key = self.key_in
else:
key = self.key_out
if key.pos != -1:
text = line[:key.pos]
line = line[key.pos + key.len:]
else:
text = line
line = ""
if (text):
if self.depth > 0:
lout += self.translate(text)
else:
text, hon = self.hypon.subn("", text)
text, hof = self.hypof.subn("", text)
self.hyphenate += (hon - hof)
lout += text
if key.pos != -1:
self.depth += key.depth
return lout
def translate(self, text):
# Now hyphenate if needed
if self.hyphenate:
text = self.hyphenator.hyphenate(text)
else:
text = self.codec.decode(text)
text = self.codec.encode(text)
return text
class RawUtfParser(RawLatexParser):
"Just encode from UTF-8 without latex escaping"
def __init__(self, codec=None, output_encoding="latin-1"):
texcodec = codec or TexCodec(output_encoding=output_encoding)
RawLatexParser.__init__(self, utf8(u"\u0370u"), utf8(u"\u0371u"),
texcodec)
def translate(self, text):
# Currently no hyphenation stuff, just encode
text = self.codec.decode(text)
return self.codec.encode(text)
|