def is_clean(self, list_unsupported=False): """ Check if the file is clean from harmful metadatas When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. :param bool list_unsupported: """ ret_list = [] tarin = tarfile.open(self.filename, 'r' + self.compression) for item in tarin.getmembers(): if not self.is_file_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising tarinfo', item.name, self.filename) return False tarin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.name) if item.isfile(): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: if not cfile.is_clean(): logging.debug('%s from %s has metadata', item.name.decode("utf8"), self.filename) if not list_unsupported: return False # Nested archives are treated like unsupported files elif isinstance(cfile, GenericArchiveStripper): ret_list.append(item.name) else: logging.info("%s's format is not supported or harmless", item.name) if os.path.splitext(path)[1] not in parser.NOMETA: if not list_unsupported: return False ret_list.append(item.name) tarin.close() if list_unsupported: return ret_list return True
def is_clean(self, list_unsupported=False): """ Check if the file is clean from harmful metadatas When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. :param bool list_unsupported: """ ret_list = [] tarin = tarfile.open(self.filename, 'r' + self.compression) for item in tarin.getmembers(): if not self.is_file_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising tarinfo', item.name, self.filename) return False tarin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.name) if item.isfile(): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: if not cfile.is_clean(): logging.debug('%s from %s has metadata', item.name.decode("utf8"), self.filename) if not list_unsupported: return False # Nested archives are treated like unsupported files elif isinstance(cfile, GenericArchiveStripper): ret_list.append(item.name) else: logging.error("%s's format is not supported or harmless", item.name) if os.path.splitext(path)[1] not in parser.NOMETA: if not list_unsupported: return False ret_list.append(item.name) tarin.close() if list_unsupported: return ret_list return True
def is_clean(self): ''' Check if the given file is clean from harmful metadata ''' zipin = zipfile.ZipFile(self.filename, 'r') if zipin.comment != '': logging.debug('%s has a comment' % self.filename) return False for item in zipin.infolist(): #I have not found a way to remove the crap added by zipfile :/ #if not self.is_file_clean(item): # logging.debug('%s from %s has compromizing zipinfo' % # (item.filename, self.filename)) # return False zipin.extract(item, self.tempdir) name = os.path.join(self.tempdir, item.filename) if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, self.add2archive) if not cfile.is_clean(): return False except: #best solution I have found logging.info('%s\'s fileformat is not supported, or is a \ harmless format' % item.filename) _, ext = os.path.splitext(name) bname = os.path.basename(item.filename) if ext not in parser.NOMETA: if bname != 'mimetype' and bname != '.rels': return False zipin.close() return True
def get_meta(self): """ Return a dict with all the meta of the tarfile """ tarin = tarfile.open(self.filename, 'r' + self.compression) metadata = {} for item in tarin.getmembers(): current_meta = {} if item.isfile(): tarin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.name) class_file = mat.create_class_file( path, False, add2archive=self.add2archive) if class_file is not None: meta = class_file.get_meta() if meta: current_meta['file'] = str(meta) else: logging.error("%s's format is not supported or harmless", item.name) if not self.is_file_clean(item): # if there is meta current_meta['mtime'] = item.mtime current_meta['uid'] = item.uid current_meta['gid'] = item.gid current_meta['uname'] = item.uname current_meta['gname'] = item.gname metadata[item.name] = str(current_meta) tarin.close() return metadata
def get_meta(self): """ Return all the metadata of a zip archive""" zipin = zipfile.ZipFile(self.filename, 'r') metadata = {} if zipin.comment != '': metadata['comment'] = zipin.comment for item in zipin.infolist(): zipinfo_meta = self.__get_zipinfo_meta(item) if zipinfo_meta != {}: # zipinfo metadata metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta) zipin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.filename) if os.path.isfile(path): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: cfile_meta = cfile.get_meta() if cfile_meta != {}: metadata[item.filename] = str(cfile_meta) else: logging.info( '%s\'s fileformat is not supported or harmless', item.filename) zipin.close() return metadata
def get_meta(self): """ Return a dict with all the meta of the tarfile """ tarin = tarfile.open(self.filename, 'r' + self.compression) metadata = {} for item in tarin.getmembers(): current_meta = {} if item.isfile(): tarin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.name) class_file = mat.create_class_file(path, False, add2archive=self.add2archive) if class_file is not None: meta = class_file.get_meta() if meta: current_meta['file'] = str(meta) else: logging.error("%s's format is not supported or harmless", item.name) if not self.is_file_clean(item): # if there is meta current_meta['mtime'] = item.mtime current_meta['uid'] = item.uid current_meta['gid'] = item.gid current_meta['uname'] = item.uname current_meta['gname'] = item.gname metadata[item.name] = str(current_meta) tarin.close() return metadata
def remove_all(self, whitelist=None): """ Remove all harmful metadata from the tarfile. The method will also add every files matching whitelist in the produced archive. """ if not whitelist: whitelist = [] tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') for item in tarin.getmembers(): tarin.extract(item, self.tempdir) if item.isfile(): path = os.path.join(self.tempdir, item.name) cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: # Handle read-only files inside archive old_stat = os.stat(path).st_mode os.chmod(path, old_stat | stat.S_IWUSR) cfile.remove_all() os.chmod(path, old_stat) elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: logging.debug('%s\' format is either not supported or harmless' % item.name) elif item.name in whitelist: logging.debug('%s is not supported, but MAT was told to add it anyway.' % item.name) else: # Don't add the file to the archive logging.debug('%s will not be added' % item.name) continue tarout.add(unicode(path.decode('utf-8')), unicode(item.name.decode('utf-8')), filter=self._remove_tar_added) tarin.close() tarout.close() self.do_backup() return True
def _remove_all(self, method): ''' So far, the zipfile module does not allow to write a ZipInfo object into a zipfile (and it's a shame !) : so data added by zipfile itself could not be removed. It's a big concern. Is shiping a patched version of zipfile.py a good idea ? ''' zipin = zipfile.ZipFile(self.filename, 'r') zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.infolist(): zipin.extract(item, self.tempdir) name = os.path.join(self.tempdir, item.filename) if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, self.add2archive) if method is 'normal': cfile.remove_all() else: cfile.remove_all_ugly() logging.debug('Processing %s from %s' % (item.filename, self.filename)) zipout.write(name, item.filename) except: logging.info('%s\'s format is not supported or harmless' % item.filename) _, ext = os.path.splitext(name) if self.add2archive or ext in parser.NOMETA: zipout.write(name, item.filename) zipout.comment = '' zipin.close() zipout.close() logging.info('%s treated' % self.filename) self.do_backup()
def _remove_all(self, method): tarin = tarfile.open(self.filename, 'r' + self.compression) tarout = tarfile.open(self.output, 'w' + self.compression) for item in tarin.getmembers(): tarin.extract(item, self.tempdir) name = os.path.join(self.tempdir, item.name) if item.type is '0': # is item a regular file ? #no backup file try: cfile = mat.create_class_file(name, False, self.add2archive) if method is 'normal': cfile.remove_all() else: cfile.remove_all_ugly() tarout.add(name, item.name, filter=self._remove) except: logging.info('%s\' format is not supported or harmless' % item.name) _, ext = os.path.splitext(name) if self.add2archive or ext in parser.NOMETA: tarout.add(name, item.name, filter=self._remove) tarin.close() tarout.close() self.do_backup()
def is_clean(self): ''' Check if the file is clean from harmful metadatas ''' tarin = tarfile.open(self.filename, 'r' + self.compression) for item in tarin.getmembers(): if not self.is_file_clean(item): tarin.close() return False tarin.extract(item, self.tempdir) name = os.path.join(self.tempdir, item.name) if item.type is '0': # is item a regular file ? try: class_file = mat.create_class_file( name, False, self.add2archive) # no backup file if not class_file.is_clean(): tarin.close() return False except: logging.error('%s\'s foramt is not supported or harmless' % item.filename) _, ext = os.path.splitext(name) if ext not in parser.NOMETA: tarin.close() return False tarin.close() return True
def is_clean(self): ''' Check if the file is clean from harmful metadatas ''' tarin = tarfile.open(self.filename, 'r' + self.compression) for item in tarin.getmembers(): if not self.is_file_clean(item): tarin.close() return False tarin.extract(item, self.tempdir) name = os.path.join(self.tempdir, item.name) if item.type is '0': # is item a regular file ? try: class_file = mat.create_class_file(name, False, self.add2archive) # no backup file if not class_file.is_clean(): tarin.close() return False except: logging.error('%s\'s foramt is not supported or harmless' % item.filename) _, ext = os.path.splitext(name) if ext not in parser.NOMETA: tarin.close() return False tarin.close() return True
def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None): """ Remove all metadata from a zip archive, even thoses added by Python's zipfile itself. It will not add files starting with "begining_blacklist", or ending with "ending_blacklist". This method also add files present in whitelist to the archive. :param list whitelist: Add those files to the produced archive, regardless if they are harmful or not :param list beginning_blacklist: If the file starts with $ending_blacklist, it will _not_ be added :param list ending_blacklist: If the file end with $ending_blacklist, it will _not_ be added """ if not ending_blacklist: ending_blacklist = [] if not beginning_blacklist: beginning_blacklist = [] if not whitelist: whitelist = [] zipin = zipfile.ZipFile(self.filename, 'r') zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.infolist(): zipin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.filename) beginning = any((True for f in beginning_blacklist if item.filename.startswith(f))) ending = any((True for f in ending_blacklist if item.filename.endswith(f))) if os.path.isfile(path) and not beginning and not ending: cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: # Handle read-only files inside archive old_stat = os.stat(path).st_mode os.chmod(path, old_stat | stat.S_IWUSR) cfile.remove_all() os.chmod(path, old_stat) logging.debug('Processing %s from %s', item.filename, self.filename) elif item.filename not in whitelist: logging.info("%s's format is not supported or harmless", item.filename) _, ext = os.path.splitext(path) if not (self.add2archive or ext in parser.NOMETA): continue zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH) zinfo.compress_type = zipfile.ZIP_DEFLATED zinfo.create_system = 3 # Linux zinfo.comment = '' with open(path, 'r') as f: zipout.writestr(zinfo, f.read()) # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS)) # zipout.write(path, item.filename) zipin.close() zipout.close() logging.info('%s processed', self.filename) self.do_backup() return True
def _remove_all(self, method): ''' FIXME ? There is a patch implementing the Zipfile.remove() method here : http://bugs.python.org/issue6818 ''' zipin = zipfile.ZipFile(self.filename, 'r') zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.namelist(): name = os.path.join(self.tempdir, item) _, ext = os.path.splitext(name) if item.endswith('manifest.xml'): # contain the list of all files present in the archive zipin.extract(item, self.tempdir) for line in fileinput.input(name, inplace=1): #remove the line which contains "meta.xml" line = line.strip() if not 'meta.xml' in line: print line zipout.write(name, item) elif ext in parser.NOMETA or item == 'mimetype': #keep NOMETA files, and the "manifest" file if item != 'meta.xml': # contains the metadata zipin.extract(item, self.tempdir) zipout.write(name, item) else: zipin.extract(item, self.tempdir) if os.path.isfile(name): try: cfile = mat.create_class_file(name, False, self.add2archive) if method == 'normal': cfile.remove_all() else: cfile.remove_all_ugly() logging.debug('Processing %s from %s' % (item, self.filename)) zipout.write(name, item) except: logging.info('%s\' fileformat is not supported' % item) if self.add2archive: zipout.write(name, item) zipout.comment = '' logging.info('%s treated' % self.filename) zipin.close() zipout.close() self.do_backup()
def is_clean(self, list_unsupported=False): """ Check if the given file is clean from harmful metadata When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. :param bool list_unsupported: Should the list of unsupported files be returned """ ret_list = [] zipin = zipfile.ZipFile(self.filename, 'r') if zipin.comment != '' and not list_unsupported: logging.debug('%s has a comment', self.filename) return False for item in zipin.infolist(): zipin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.filename) if not self.__is_zipfile_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising zipinfo', item.filename, self.filename) return False if os.path.isfile(path): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: if not cfile.is_clean(): logging.debug('%s from %s has metadata', item.filename, self.filename) if not list_unsupported: return False else: logging.info( '%s\'s fileformat is not supported or harmless.', item.filename) _, ext = os.path.splitext(path) if os.path.basename(item.filename) not in ('mimetype', '.rels'): if ext not in parser.NOMETA: if not list_unsupported: return False ret_list.append(item.filename) zipin.close() if list_unsupported: return ret_list return True
def is_clean(self, list_unsupported=False): """ Check if the given file is clean from harmful metadata When list_unsupported is True, the method returns a list of all non-supported/archives files contained in the archive. :param bool list_unsupported: Should the list of unsupported files be returned """ ret_list = [] zipin = zipfile.ZipFile(self.filename, 'r') if zipin.comment != '' and not list_unsupported: logging.debug('%s has a comment' % self.filename) return False for item in zipin.infolist(): zipin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.filename) if not self.__is_zipfile_clean(item) and not list_unsupported: logging.debug('%s from %s has compromising zipinfo' % (item.filename, self.filename)) return False if os.path.isfile(path): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: if not cfile.is_clean(): logging.debug('%s from %s has metadata' % (item.filename, self.filename)) if not list_unsupported: return False else: logging.info('%s\'s fileformat is not supported or harmless.' % item.filename) basename, ext = os.path.splitext(path) if os.path.basename(item.filename) not in ('mimetype', '.rels'): if ext not in parser.NOMETA: if not list_unsupported: return False ret_list.append(item.filename) zipin.close() if list_unsupported: return ret_list return True
def _remove_all(self, method): ''' FIXME ? There is a patch implementing the Zipfile.remove() method here : http://bugs.python.org/issue6818 ''' zipin = zipfile.ZipFile(self.filename, 'r') zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True) for item in zipin.namelist(): name = os.path.join(self.tempdir, item) _, ext = os.path.splitext(name) if item.startswith('docProps/'): # metadatas pass elif ext in parser.NOMETA or item == '.rels': #keep parser.NOMETA files, and the file named ".rels" zipin.extract(item, self.tempdir) zipout.write(name, item) else: zipin.extract(item, self.tempdir) if os.path.isfile(name): # don't care about folders try: cfile = mat.create_class_file(name, False, self.add2archive) if method == 'normal': cfile.remove_all() else: cfile.remove_all_ugly() logging.debug('Processing %s from %s' % (item, self.filename)) zipout.write(name, item) except: logging.info('%s\' fileformat is not supported' % item) if self.add2archive: zipout.write(name, item) zipout.comment = '' logging.info('%s treated' % self.filename) zipin.close() zipout.close() self.do_backup()
def get_meta(self): """ Return all the metadata of a zip archive""" zipin = zipfile.ZipFile(self.filename, 'r') metadata = {} if zipin.comment != '': metadata['comment'] = zipin.comment for item in zipin.infolist(): zipinfo_meta = self.__get_zipinfo_meta(item) if zipinfo_meta != {}: # zipinfo metadata metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta) zipin.extract(item, self.tempdir) path = os.path.join(self.tempdir, item.filename) if os.path.isfile(path): cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: cfile_meta = cfile.get_meta() if cfile_meta != {}: metadata[item.filename] = str(cfile_meta) else: logging.info('%s\'s fileformat is not supported or harmless', item.filename) zipin.close() return metadata
def remove_all(self, whitelist=None): """ Remove all harmful metadata from the tarfile. The method will also add every files matching whitelist in the produced archive. :param list whitelist: Files to add the to produced archive, regardless if they are considered harmfull. """ if not whitelist: whitelist = [] tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8') tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8') for item in tarin.getmembers(): tarin.extract(item, self.tempdir) if item.isfile(): path = os.path.join(self.tempdir, item.name) cfile = mat.create_class_file(path, False, add2archive=self.add2archive) if cfile is not None: # Handle read-only files inside archive old_stat = os.stat(path).st_mode os.chmod(path, old_stat | stat.S_IWUSR) cfile.remove_all() os.chmod(path, old_stat) elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA: logging.debug('%s\' format is either not supported or harmless' % item.name) elif item.name in whitelist: logging.debug('%s is not supported, but MAT was told to add it anyway.' % item.name) else: # Don't add the file to the archive logging.debug('%s will not be added' % item.name) continue tarout.add(unicode(path.decode('utf-8')), unicode(item.name.decode('utf-8')), filter=self._remove_tar_added) tarin.close() tarout.close() self.do_backup() return True