/usr/share/pyshared/MAT/archive.py is in mat 0.4.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | ''' Take care of archives formats
'''
import logging
import os
import shutil
import tempfile
import zipfile
import mat
import parser
import tarfile
class GenericArchiveStripper(parser.GenericParser):
''' Represent a generic archive
'''
def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
super(GenericArchiveStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
self.compression = ''
self.add2archive = kwargs['add2archive']
self.tempdir = tempfile.mkdtemp()
def __del__(self):
''' Remove the files inside the temp dir,
then remove the temp dir
'''
for root, dirs, files in os.walk(self.tempdir):
for item in files:
path_file = os.path.join(root, item)
mat.secure_remove(path_file)
shutil.rmtree(self.tempdir)
def remove_all(self):
''' Virtual method to remove all metadata
'''
raise NotImplementedError
class ZipStripper(GenericArchiveStripper):
''' Represent a zip file
'''
def is_file_clean(self, fileinfo):
''' Check if a ZipInfo object is clean of metadatas added
by zip itself, independently of the corresponding file metadatas
'''
if fileinfo.comment:
return False
elif fileinfo.date_time:
return False
elif fileinfo.create_system:
return False
elif fileinfo.create_version:
return False
return True
def is_clean(self):
'''
Check if the given file is clean from harmful metadata
'''
zipin = zipfile.ZipFile(self.filename, 'r')
if zipin.comment != '':
logging.debug('%s has a comment' % self.filename)
return False
for item in zipin.infolist():
# I have not found a way to remove the crap added by zipfile :/
# if not self.is_file_clean(item):
# logging.debug('%s from %s has compromising zipinfo' %
# (item.filename, self.filename))
# return False
zipin.extract(item, self.tempdir)
name = os.path.join(self.tempdir, item.filename)
if os.path.isfile(name):
try:
cfile = mat.create_class_file(name, False,
add2archive=self.add2archive)
if not cfile.is_clean():
return False
except:
# best solution I have found
logging.info('%s\'s fileformat is not supported, or is a \
harmless format' % item.filename)
_, ext = os.path.splitext(name)
bname = os.path.basename(item.filename)
if ext not in parser.NOMETA:
if bname != 'mimetype' and bname != '.rels':
return False
zipin.close()
return True
def get_meta(self):
'''
Return all the metadata of a ZipFile (don't return metadatas
of contained files : should it ?)
'''
zipin = zipfile.ZipFile(self.filename, 'r')
metadata = {}
for field in zipin.infolist():
zipmeta = {}
zipmeta['comment'] = field.comment
zipmeta['modified'] = field.date_time
zipmeta['system'] = field.create_system
zipmeta['zip_version'] = field.create_version
metadata[field.filename] = zipmeta
metadata["%s comment" % self.filename] = zipin.comment
zipin.close()
return metadata
def remove_all(self):
'''
So far, the zipfile module does not allow to write a ZipInfo
object into a zipfile (and it's a shame !) : so data added
by zipfile itself could not be removed. It's a big concern.
Is shipping a patched version of zipfile.py a good idea ?
'''
zipin = zipfile.ZipFile(self.filename, 'r')
zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
for item in zipin.infolist():
zipin.extract(item, self.tempdir)
name = os.path.join(self.tempdir, item.filename)
if os.path.isfile(name):
try:
cfile = mat.create_class_file(name, False,
add2archive=self.add2archive)
cfile.remove_all()
logging.debug('Processing %s from %s' % (item.filename,
self.filename))
zipout.write(name, item.filename)
except:
logging.info('%s\'s format is not supported or harmless' %
item.filename)
_, ext = os.path.splitext(name)
if self.add2archive or ext in parser.NOMETA:
zipout.write(name, item.filename)
zipout.comment = ''
zipin.close()
zipout.close()
logging.info('%s treated' % self.filename)
self.do_backup()
return True
class TarStripper(GenericArchiveStripper):
'''
Represent a tarfile archive
'''
def _remove(self, current_file):
'''
remove the meta added by tar itself to the file
'''
current_file.mtime = 0
current_file.uid = 0
current_file.gid = 0
current_file.uname = ''
current_file.gname = ''
return current_file
def remove_all(self):
tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8')
tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8')
for item in tarin.getmembers():
tarin.extract(item, self.tempdir)
name = os.path.join(self.tempdir, item.name)
if item.type == '0': # is item a regular file ?
# no backup file
try:
cfile = mat.create_class_file(name, False,
add2archive=self.add2archive)
cfile.remove_all()
tarout.add(name, item.name, filter=self._remove)
except:
logging.info('%s\' format is not supported or harmless' %
item.name)
_, ext = os.path.splitext(name)
if self.add2archive or ext in parser.NOMETA:
tarout.add(name, item.name, filter=self._remove)
tarin.close()
tarout.close()
self.do_backup()
return True
def is_file_clean(self, current_file):
'''
Check metadatas added by tar
'''
if current_file.mtime != 0:
return False
elif current_file.uid != 0:
return False
elif current_file.gid != 0:
return False
elif current_file.uname != '':
return False
elif current_file.gname != '':
return False
else:
return True
def is_clean(self):
'''
Check if the file is clean from harmful metadatas
'''
tarin = tarfile.open(self.filename, 'r' + self.compression)
for item in tarin.getmembers():
if not self.is_file_clean(item):
tarin.close()
return False
tarin.extract(item, self.tempdir)
name = os.path.join(self.tempdir, item.name)
if item.type == '0': # is item a regular file ?
try:
class_file = mat.create_class_file(name,
False, add2archive=self.add2archive) # no backup file
if not class_file.is_clean():
tarin.close()
return False
except:
logging.error('%s\'s format is not supported or harmless' %
item.filename)
_, ext = os.path.splitext(name)
if ext not in parser.NOMETA:
tarin.close()
return False
tarin.close()
return True
def get_meta(self):
'''
Return a dict with all the meta of the file
'''
tarin = tarfile.open(self.filename, 'r' + self.compression)
metadata = {}
for current_file in tarin.getmembers():
if current_file.type == '0':
if not self.is_file_clean(current_file): # if there is meta
current_meta = {}
current_meta['mtime'] = current_file.mtime
current_meta['uid'] = current_file.uid
current_meta['gid'] = current_file.gid
current_meta['uname'] = current_file.uname
current_meta['gname'] = current_file.gname
metadata[current_file.name] = current_meta
tarin.close()
return metadata
class GzipStripper(TarStripper):
'''
Represent a tar.gz archive
'''
def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
self.compression = ':gz'
class Bzip2Stripper(TarStripper):
'''
Represents a tar.bz2 archive
'''
def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
self.compression = ':bz2'
|