/usr/share/pyshared/hachoir_regex/tools.py is in python-hachoir-regex 1.0.5-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | # -*- coding: UTF-8 -*-
import re
regex_control_code = re.compile("([\x00-\x1f\x7f])")
controlchars = tuple({
# Don't use "\0", because "\0"+"0"+"1" = "\001" = "\1" (1 character)
# Same rease to not use octal syntax ("\1")
ord("\n"): r"\n",
ord("\r"): r"\r",
ord("\t"): r"\t",
ord("\a"): r"\a",
ord("\b"): r"\b",
}.get(code, '\\x%02x' % code)
for code in xrange(128)
)
def makePrintable(data, charset, quote=None, to_unicode=False, smart=True):
r"""
Prepare a string to make it printable in the specified charset.
It escapes control characters. Characters with codes bigger than 127
are escaped if data type is 'str' or if charset is "ASCII".
Examples with Unicode:
>>> aged = unicode("âgé", "UTF-8")
>>> repr(aged) # text type is 'unicode'
"u'\\xe2g\\xe9'"
>>> makePrintable("abc\0", "UTF-8")
'abc\\0'
>>> makePrintable(aged, "latin1")
'\xe2g\xe9'
>>> makePrintable(aged, "latin1", quote='"')
'"\xe2g\xe9"'
Examples with string encoded in latin1:
>>> aged_latin = unicode("âgé", "UTF-8").encode("latin1")
>>> repr(aged_latin) # text type is 'str'
"'\\xe2g\\xe9'"
>>> makePrintable(aged_latin, "latin1")
'\\xe2g\\xe9'
>>> makePrintable("", "latin1")
''
>>> makePrintable("a", "latin1", quote='"')
'"a"'
>>> makePrintable("", "latin1", quote='"')
'(empty)'
>>> makePrintable("abc", "latin1", quote="'")
"'abc'"
Control codes:
>>> makePrintable("\0\x03\x0a\x10 \x7f", "latin1")
'\\0\\3\\n\\x10 \\x7f'
Quote character may also be escaped (only ' and "):
>>> print makePrintable("a\"b", "latin-1", quote='"')
"a\"b"
>>> print makePrintable("a\"b", "latin-1", quote="'")
'a"b'
>>> print makePrintable("a'b", "latin-1", quote="'")
'a\'b'
"""
if data:
if not isinstance(data, unicode):
data = unicode(data, "ISO-8859-1")
charset = "ASCII"
data = regex_control_code.sub(
lambda regs: controlchars[ord(regs.group(1))], data)
if quote:
if quote in "\"'":
data = data.replace(quote, '\\' + quote)
data = ''.join((quote, data, quote))
elif quote:
data = "(empty)"
data = data.encode(charset, "backslashreplace")
if smart:
# Replace \x00\x01 by \0\1
data = re.sub(r"\\x0([0-7])(?=[^0-7]|$)", r"\\\1", data)
if to_unicode:
data = unicode(data, charset)
return data
|