/usr/share/pyshared/MAT/parser.py is in mat 0.4.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | ''' Parent class of all parser
'''
import os
import shutil
import tempfile
import hachoir_core
import hachoir_editor
import mat
NOMETA = frozenset((
'.bmp', # "raw" image
'.rdf', # text
'.txt', # plain text
'.xml', # formated text (XML)
'.rels', # openXML formated text
))
FIELD = object()
class GenericParser(object):
''' Parent class of all parsers
'''
def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
self.filename = ''
self.parser = parser
self.mime = mime
self.backup = backup
self.is_writable = is_writable
self.editor = hachoir_editor.createEditor(parser)
try:
self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
except TypeError: # get rid of "decoding Unicode is not supported"
self.filename = filename
self.basename = os.path.basename(filename)
_, output = tempfile.mkstemp()
self.output = hachoir_core.cmd_line.unicodeFilename(output)
def __del__(self):
''' Remove tempfile if it was not used
'''
if os.path.exists(self.output):
mat.secure_remove(self.output)
def is_clean(self):
'''
Check if the file is clean from harmful metadatas
'''
for field in self.editor:
if self._should_remove(field):
return self._is_clean(self.editor)
return True
def _is_clean(self, fieldset):
for field in fieldset:
remove = self._should_remove(field)
if remove is True:
return False
if remove is FIELD:
if not self._is_clean(field):
return False
return True
def remove_all(self):
''' Remove all compromising fields
'''
state = self._remove_all(self.editor)
hachoir_core.field.writeIntoFile(self.editor, self.output)
self.do_backup()
return state
def _remove_all(self, fieldset):
''' Recursive way to handle tree metadatas
'''
try:
for field in fieldset:
remove = self._should_remove(field)
if remove is True:
self._remove(fieldset, field.name)
if remove is FIELD:
self._remove_all(field)
return True
except:
return False
def _remove(self, fieldset, field):
''' Delete the given field
'''
del fieldset[field]
def get_meta(self):
''' Return a dict with all the meta of the file
'''
metadata = {}
self._get_meta(self.editor, metadata)
return metadata
def _get_meta(self, fieldset, metadata):
''' Recursive way to handle tree metadatas
'''
for field in fieldset:
remove = self._should_remove(field)
if remove:
try:
metadata[field.name] = field.value
except:
metadata[field.name] = 'harmful content'
if remove is FIELD:
self._get_meta(field, None)
def _should_remove(self, key):
''' Return True if the field is compromising
abstract method
'''
raise NotImplementedError
def create_backup_copy(self):
''' Create a backup copy
'''
shutil.copy2(self.filename, self.filename + '.bak')
def do_backup(self):
''' Keep a backup of the file if asked.
The process of double-renaming is not very elegant,
but it greatly simplify new strippers implementation.
'''
if self.backup:
shutil.move(self.filename, self.filename + '.bak')
else:
mat.secure_remove(self.filename)
shutil.move(self.output, self.filename)
|