/usr/share/pyshared/hachoir_regex/pattern.py is in python-hachoir-regex 1.0.5-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | from hachoir_regex import RegexEmpty, RegexOr, parse, createString
from hachoir_regex.tools import makePrintable
class Pattern:
"""
Abstract class used to define a pattern used in pattern matching
"""
def __init__(self, user):
self.user = user
class StringPattern(Pattern):
"""
Static string pattern
"""
def __init__(self, text, user=None):
Pattern.__init__(self, user)
self.text = text
def __str__(self):
return makePrintable(self.text, 'ASCII', to_unicode=True)
def __repr__(self):
return "<StringPattern '%s'>" % self
class RegexPattern(Pattern):
"""
Regular expression pattern
"""
def __init__(self, regex, user=None):
Pattern.__init__(self, user)
self.regex = parse(regex)
self._compiled_regex = None
def __str__(self):
return makePrintable(str(self.regex), 'ASCII', to_unicode=True)
def __repr__(self):
return "<RegexPattern '%s'>" % self
def match(self, data):
return self.compiled_regex.match(data)
def _getCompiledRegex(self):
if self._compiled_regex is None:
self._compiled_regex = self.regex.compile(python=True)
return self._compiled_regex
compiled_regex = property(_getCompiledRegex)
class PatternMatching:
"""
Fast pattern matching class: match multiple patterns at the same time.
Create your patterns:
>>> p=PatternMatching()
>>> p.addString("a")
>>> p.addString("b")
>>> p.addRegex("[cd]e")
Search patterns:
>>> for item in p.search("a b ce"):
... print item
...
(0, 1, <StringPattern 'a'>)
(2, 3, <StringPattern 'b'>)
(4, 6, <RegexPattern '[cd]e'>)
"""
def __init__(self):
self.string_patterns = []
self.string_dict = {}
self.regex_patterns = []
self._need_commit = True
# Following attributes are generated by _commit() method
self._regex = None
self._compiled_regex = None
self._max_length = None
def commit(self):
"""
Generate whole regex merging all (string and regex) patterns
"""
if not self._need_commit:
return
self._need_commit = False
length = 0
regex = None
for item in self.string_patterns:
if regex:
regex |= createString(item.text)
else:
regex = createString(item.text)
length = max(length, len(item.text))
for item in self.regex_patterns:
if regex:
regex |= item.regex
else:
regex = item.regex
length = max(length, item.regex.maxLength())
if not regex:
regex = RegexEmpty()
self._regex = regex
self._compiled_regex = regex.compile(python=True)
self._max_length = length
def addString(self, magic, user=None):
item = StringPattern(magic, user)
if item.text in self.string_dict:
# Skip duplicates
return
self.string_patterns.append(item)
self.string_dict[item.text] = item
self._need_commit = True
def addRegex(self, regex, user=None):
item = RegexPattern(regex, user)
if item.regex.maxLength() is None:
raise ValueError("Regular expression with no maximum size has forbidden")
self.regex_patterns.append(item)
self._need_commit = True
def getPattern(self, data):
"""
Get pattern item matching data.
Raise KeyError if no pattern does match it.
"""
# Try in string patterns
try:
return self.string_dict[data]
except KeyError:
pass
# Try in regex patterns
for item in self.regex_patterns:
if item.match(data):
return item
raise KeyError("Unable to get pattern item")
def search(self, data):
"""
Search patterns in data.
Return a generator of tuples: (start, end, item)
"""
if not self.max_length:
# No pattern: returns nothing
return
for match in self.compiled_regex.finditer(data):
item = self.getPattern(match.group(0))
yield (match.start(0), match.end(0), item)
def __str__(self):
return makePrintable(str(self.regex), 'ASCII', to_unicode=True)
def _getAttribute(self, name):
self.commit()
return getattr(self, name)
def _getRegex(self):
return self._getAttribute("_regex")
regex = property(_getRegex)
def _getCompiledRegex(self):
return self._getAttribute("_compiled_regex")
compiled_regex = property(_getCompiledRegex)
def _getMaxLength(self):
return self._getAttribute("_max_length")
max_length = property(_getMaxLength)
if __name__ == "__main__":
import doctest, sys
failure, nb_test = doctest.testmod()
if failure:
sys.exit(1)
|