/usr/share/pyshared/hachoir_parser/guess.py is in python-hachoir-parser 1.3.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | """
Parser list managment:
- createParser() find the best parser for a file.
"""
import os
from hachoir_core.error import warning, info, HACHOIR_ERRORS
from hachoir_parser import ValidateError, HachoirParserList
from hachoir_core.stream import FileInputStream
from hachoir_core.i18n import _
class QueryParser(object):
fallback = None
other = None
def __init__(self, tags):
self.validate = True
self.use_fallback = False
self.parser_args = None
self.db = HachoirParserList.getInstance()
self.parsers = set(self.db)
parsers = []
for tag in tags:
if not self.parsers:
break
parsers += self._getByTag(tag)
if self.fallback is None:
self.fallback = len(parsers) == 1
if self.parsers:
other = len(parsers)
parsers += list(self.parsers)
self.other = parsers[other]
self.parsers = parsers
def __iter__(self):
return iter(self.parsers)
def translate(self, name, value):
if name == "filename":
filename = os.path.basename(value).split(".")
if len(filename) <= 1:
value = ""
else:
value = filename[-1].lower()
name = "file_ext"
return name, value
def _getByTag(self, tag):
if tag is None:
self.parsers.clear()
return []
elif callable(tag):
parsers = [ parser for parser in self.parsers if tag(parser) ]
for parser in parsers:
self.parsers.remove(parser)
elif tag[0] == "class":
self.validate = False
return [ tag[1] ]
elif tag[0] == "args":
self.parser_args = tag[1]
return []
else:
tag = self.translate(*tag)
parsers = []
if tag is not None:
key = tag[0]
byname = self.db.bytag.get(key,{})
if tag[1] is None:
values = byname.itervalues()
else:
values = byname.get(tag[1],()),
if key == "id" and values:
self.validate = False
for value in values:
for parser in value:
if parser in self.parsers:
parsers.append(parser)
self.parsers.remove(parser)
return parsers
def parse(self, stream, fallback=True):
fb = None
warn = warning
for parser in self.parsers:
try:
parser_obj = parser(stream, validate=self.validate)
if self.parser_args:
for key, value in self.parser_args.iteritems():
setattr(parser_obj, key, value)
return parser_obj
except ValidateError, err:
res = unicode(err)
if fallback and self.fallback:
fb = parser
except HACHOIR_ERRORS, err:
res = unicode(err)
if warn:
if parser == self.other:
warn = info
warn(_("Skip parser '%s': %s") % (parser.__name__, res))
fallback = False
if self.use_fallback and fb:
warning(_("Force use of parser '%s'") % fb.__name__)
return fb(stream)
def guessParser(stream):
return QueryParser(stream.tags).parse(stream)
def createParser(filename, real_filename=None, tags=None):
"""
Create a parser from a file or returns None on error.
Options:
- filename (unicode): Input file name ;
- real_filename (str|unicode): Real file name.
"""
if not tags:
tags = []
stream = FileInputStream(filename, real_filename, tags=tags)
return guessParser(stream)
|