Пример #1
0
 def is_clean(self, list_unsupported=False):
     """ Check if the file is clean from harmful metadatas
         When list_unsupported is True, the method returns a list
         of all non-supported/archives files contained in the
         archive.
         :param bool list_unsupported:
     """
     ret_list = []
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     for item in tarin.getmembers():
         if not self.is_file_clean(item) and not list_unsupported:
             logging.debug('%s from %s has compromising tarinfo', item.name, self.filename)
             return False
         tarin.extract(item, self.tempdir)
         path = os.path.join(self.tempdir, item.name)
         if item.isfile():
             cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
             if cfile is not None:
                 if not cfile.is_clean():
                     logging.debug('%s from %s has metadata', item.name.decode("utf8"), self.filename)
                     if not list_unsupported:
                         return False
                     # Nested archives are treated like unsupported files
                     elif isinstance(cfile, GenericArchiveStripper):
                         ret_list.append(item.name)
             else:
                 logging.info("%s's format is not supported or harmless", item.name)
                 if os.path.splitext(path)[1] not in parser.NOMETA:
                     if not list_unsupported:
                         return False
                     ret_list.append(item.name)
     tarin.close()
     if list_unsupported:
         return ret_list
     return True
Пример #2
0
 def is_clean(self, list_unsupported=False):
     """ Check if the file is clean from harmful metadatas
         When list_unsupported is True, the method returns a list
         of all non-supported/archives files contained in the
         archive.
         :param bool list_unsupported:
     """
     ret_list = []
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     for item in tarin.getmembers():
         if not self.is_file_clean(item) and not list_unsupported:
             logging.debug('%s from %s has compromising tarinfo', item.name, self.filename)
             return False
         tarin.extract(item, self.tempdir)
         path = os.path.join(self.tempdir, item.name)
         if item.isfile():
             cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
             if cfile is not None:
                 if not cfile.is_clean():
                     logging.debug('%s from %s has metadata', item.name.decode("utf8"), self.filename)
                     if not list_unsupported:
                         return False
                     # Nested archives are treated like unsupported files
                     elif isinstance(cfile, GenericArchiveStripper):
                         ret_list.append(item.name)
             else:
                 logging.error("%s's format is not supported or harmless", item.name)
                 if os.path.splitext(path)[1] not in parser.NOMETA:
                     if not list_unsupported:
                         return False
                     ret_list.append(item.name)
     tarin.close()
     if list_unsupported:
         return ret_list
     return True
    def is_clean(self):
        '''
            Check if the given file is clean from harmful metadata
        '''
        zipin = zipfile.ZipFile(self.filename, 'r')
        if zipin.comment != '':
            logging.debug('%s has a comment' % self.filename)
            return False
        for item in zipin.infolist():
            #I have not found a way to remove the crap added by zipfile :/
            #if not self.is_file_clean(item):
            #    logging.debug('%s from %s has compromizing zipinfo' %
            #        (item.filename, self.filename))
            #    return False
            zipin.extract(item, self.tempdir)
            name = os.path.join(self.tempdir, item.filename)
            if os.path.isfile(name):
                try:
                    cfile = mat.create_class_file(name, False,
                                                  self.add2archive)
                    if not cfile.is_clean():
                        return False
                except:
                    #best solution I have found
                    logging.info('%s\'s fileformat is not supported, or is a \
harmless format' % item.filename)
                    _, ext = os.path.splitext(name)
                    bname = os.path.basename(item.filename)
                    if ext not in parser.NOMETA:
                        if bname != 'mimetype' and bname != '.rels':
                            return False
        zipin.close()
        return True
Пример #4
0
    def get_meta(self):
        """ Return a dict with all the meta of the tarfile
        """
        tarin = tarfile.open(self.filename, 'r' + self.compression)
        metadata = {}
        for item in tarin.getmembers():
            current_meta = {}
            if item.isfile():
                tarin.extract(item, self.tempdir)
                path = os.path.join(self.tempdir, item.name)
                class_file = mat.create_class_file(
                    path, False, add2archive=self.add2archive)
                if class_file is not None:
                    meta = class_file.get_meta()
                    if meta:
                        current_meta['file'] = str(meta)
                else:
                    logging.error("%s's format is not supported or harmless",
                                  item.name)

                if not self.is_file_clean(item):  # if there is meta
                    current_meta['mtime'] = item.mtime
                    current_meta['uid'] = item.uid
                    current_meta['gid'] = item.gid
                    current_meta['uname'] = item.uname
                    current_meta['gname'] = item.gname
                    metadata[item.name] = str(current_meta)
        tarin.close()
        return metadata
Пример #5
0
 def get_meta(self):
     """ Return all the metadata of a zip archive"""
     zipin = zipfile.ZipFile(self.filename, 'r')
     metadata = {}
     if zipin.comment != '':
         metadata['comment'] = zipin.comment
     for item in zipin.infolist():
         zipinfo_meta = self.__get_zipinfo_meta(item)
         if zipinfo_meta != {}:  # zipinfo metadata
             metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta)
         zipin.extract(item, self.tempdir)
         path = os.path.join(self.tempdir, item.filename)
         if os.path.isfile(path):
             cfile = mat.create_class_file(path,
                                           False,
                                           add2archive=self.add2archive)
             if cfile is not None:
                 cfile_meta = cfile.get_meta()
                 if cfile_meta != {}:
                     metadata[item.filename] = str(cfile_meta)
             else:
                 logging.info(
                     '%s\'s fileformat is not supported or harmless',
                     item.filename)
     zipin.close()
     return metadata
Пример #6
0
    def get_meta(self):
        """ Return a dict with all the meta of the tarfile
        """
        tarin = tarfile.open(self.filename, 'r' + self.compression)
        metadata = {}
        for item in tarin.getmembers():
            current_meta = {}
            if item.isfile():
                tarin.extract(item, self.tempdir)
                path = os.path.join(self.tempdir, item.name)
                class_file = mat.create_class_file(path, False, add2archive=self.add2archive)
                if class_file is not None:
                    meta = class_file.get_meta()
                    if meta:
                        current_meta['file'] = str(meta)
                else:
                    logging.error("%s's format is not supported or harmless", item.name)

                if not self.is_file_clean(item):  # if there is meta
                    current_meta['mtime'] = item.mtime
                    current_meta['uid'] = item.uid
                    current_meta['gid'] = item.gid
                    current_meta['uname'] = item.uname
                    current_meta['gname'] = item.gname
                    metadata[item.name] = str(current_meta)
        tarin.close()
        return metadata
Пример #7
0
 def remove_all(self, whitelist=None):
     """ Remove all harmful metadata from the tarfile.
         The method will also add every files matching
         whitelist in the produced archive.
     """
     if not whitelist:
         whitelist = []
     tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8')
     tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8')
     for item in tarin.getmembers():
         tarin.extract(item, self.tempdir)
         if item.isfile():
             path = os.path.join(self.tempdir, item.name)
             cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
             if cfile is not None:
                 # Handle read-only files inside archive
                 old_stat = os.stat(path).st_mode
                 os.chmod(path, old_stat | stat.S_IWUSR)
                 cfile.remove_all()
                 os.chmod(path, old_stat)
             elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA:
                 logging.debug('%s\' format is either not supported or harmless' % item.name)
             elif item.name in whitelist:
                 logging.debug('%s is not supported, but MAT was told to add it anyway.'
                               % item.name)
             else:  # Don't add the file to the archive
                 logging.debug('%s will not be added' % item.name)
                 continue
             tarout.add(unicode(path.decode('utf-8')),
                        unicode(item.name.decode('utf-8')),
                        filter=self._remove_tar_added)
     tarin.close()
     tarout.close()
     self.do_backup()
     return True
Пример #8
0
 def _remove_all(self, method):
     '''
         So far, the zipfile module does not allow to write a ZipInfo
         object into a zipfile (and it's a shame !) : so data added
         by zipfile itself could not be removed. It's a big concern.
         Is shiping a patched version of zipfile.py a good idea ?
     '''
     zipin = zipfile.ZipFile(self.filename, 'r')
     zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
     for item in zipin.infolist():
         zipin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.filename)
         if os.path.isfile(name):
             try:
                 cfile = mat.create_class_file(name, False,
                     self.add2archive)
                 if method is 'normal':
                     cfile.remove_all()
                 else:
                     cfile.remove_all_ugly()
                 logging.debug('Processing %s from %s' % (item.filename,
                     self.filename))
                 zipout.write(name, item.filename)
             except:
                 logging.info('%s\'s format is not supported or harmless' %
                     item.filename)
                 _, ext = os.path.splitext(name)
                 if self.add2archive or ext in parser.NOMETA:
                     zipout.write(name, item.filename)
     zipout.comment = ''
     zipin.close()
     zipout.close()
     logging.info('%s treated' % self.filename)
     self.do_backup()
Пример #9
0
 def _remove_all(self, method):
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     tarout = tarfile.open(self.output, 'w' + self.compression)
     for item in tarin.getmembers():
         tarin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.name)
         if item.type is '0':  # is item a regular file ?
             #no backup file
             try:
                 cfile = mat.create_class_file(name, False,
                 self.add2archive)
                 if method is 'normal':
                     cfile.remove_all()
                 else:
                     cfile.remove_all_ugly()
                 tarout.add(name, item.name, filter=self._remove)
             except:
                 logging.info('%s\' format is not supported or harmless' %
                     item.name)
                 _, ext = os.path.splitext(name)
                 if self.add2archive or ext in parser.NOMETA:
                     tarout.add(name, item.name, filter=self._remove)
     tarin.close()
     tarout.close()
     self.do_backup()
 def _remove_all(self, method):
     '''
         So far, the zipfile module does not allow to write a ZipInfo
         object into a zipfile (and it's a shame !) : so data added
         by zipfile itself could not be removed. It's a big concern.
         Is shiping a patched version of zipfile.py a good idea ?
     '''
     zipin = zipfile.ZipFile(self.filename, 'r')
     zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
     for item in zipin.infolist():
         zipin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.filename)
         if os.path.isfile(name):
             try:
                 cfile = mat.create_class_file(name, False,
                                               self.add2archive)
                 if method is 'normal':
                     cfile.remove_all()
                 else:
                     cfile.remove_all_ugly()
                 logging.debug('Processing %s from %s' %
                               (item.filename, self.filename))
                 zipout.write(name, item.filename)
             except:
                 logging.info('%s\'s format is not supported or harmless' %
                              item.filename)
                 _, ext = os.path.splitext(name)
                 if self.add2archive or ext in parser.NOMETA:
                     zipout.write(name, item.filename)
     zipout.comment = ''
     zipin.close()
     zipout.close()
     logging.info('%s treated' % self.filename)
     self.do_backup()
Пример #11
0
    def is_clean(self):
        '''
            Check if the given file is clean from harmful metadata
        '''
        zipin = zipfile.ZipFile(self.filename, 'r')
        if zipin.comment != '':
            logging.debug('%s has a comment' % self.filename)
            return False
        for item in zipin.infolist():
            #I have not found a way to remove the crap added by zipfile :/
            #if not self.is_file_clean(item):
            #    logging.debug('%s from %s has compromizing zipinfo' %
            #        (item.filename, self.filename))
            #    return False
            zipin.extract(item, self.tempdir)
            name = os.path.join(self.tempdir, item.filename)
            if os.path.isfile(name):
                try:
                    cfile = mat.create_class_file(name, False,
                        self.add2archive)
                    if not cfile.is_clean():
                        return False
                except:
                    #best solution I have found
                    logging.info('%s\'s fileformat is not supported, or is a \
harmless format' % item.filename)
                    _, ext = os.path.splitext(name)
                    bname = os.path.basename(item.filename)
                    if ext not in parser.NOMETA:
                        if bname != 'mimetype' and bname != '.rels':
                            return False
        zipin.close()
        return True
 def is_clean(self):
     '''
         Check if the file is clean from harmful metadatas
     '''
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     for item in tarin.getmembers():
         if not self.is_file_clean(item):
             tarin.close()
             return False
         tarin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.name)
         if item.type is '0':  # is item a regular file ?
             try:
                 class_file = mat.create_class_file(
                     name, False, self.add2archive)  # no backup file
                 if not class_file.is_clean():
                     tarin.close()
                     return False
             except:
                 logging.error('%s\'s foramt is not supported or harmless' %
                               item.filename)
                 _, ext = os.path.splitext(name)
                 if ext not in parser.NOMETA:
                     tarin.close()
                     return False
     tarin.close()
     return True
 def _remove_all(self, method):
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     tarout = tarfile.open(self.output, 'w' + self.compression)
     for item in tarin.getmembers():
         tarin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.name)
         if item.type is '0':  # is item a regular file ?
             #no backup file
             try:
                 cfile = mat.create_class_file(name, False,
                                               self.add2archive)
                 if method is 'normal':
                     cfile.remove_all()
                 else:
                     cfile.remove_all_ugly()
                 tarout.add(name, item.name, filter=self._remove)
             except:
                 logging.info('%s\' format is not supported or harmless' %
                              item.name)
                 _, ext = os.path.splitext(name)
                 if self.add2archive or ext in parser.NOMETA:
                     tarout.add(name, item.name, filter=self._remove)
     tarin.close()
     tarout.close()
     self.do_backup()
Пример #14
0
 def is_clean(self):
     '''
         Check if the file is clean from harmful metadatas
     '''
     tarin = tarfile.open(self.filename, 'r' + self.compression)
     for item in tarin.getmembers():
         if not self.is_file_clean(item):
             tarin.close()
             return False
         tarin.extract(item, self.tempdir)
         name = os.path.join(self.tempdir, item.name)
         if item.type is '0':  # is item a regular file ?
             try:
                 class_file = mat.create_class_file(name,
                     False, self.add2archive)  # no backup file
                 if not class_file.is_clean():
                     tarin.close()
                     return False
             except:
                 logging.error('%s\'s foramt is not supported or harmless' %
                     item.filename)
                 _, ext = os.path.splitext(name)
                 if ext not in parser.NOMETA:
                     tarin.close()
                     return False
     tarin.close()
     return True
Пример #15
0
    def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None):
        """ Remove all metadata from a zip archive, even thoses
            added by Python's zipfile itself. It will not add
            files starting with "begining_blacklist", or ending with
            "ending_blacklist". This method also add files present in
            whitelist to the archive.

            :param list whitelist: Add those files to the produced archive, regardless if they are harmful or not
            :param list beginning_blacklist: If the file starts with $ending_blacklist, it will _not_ be added
            :param list ending_blacklist: If the file end with $ending_blacklist, it will _not_ be added
        """
        if not ending_blacklist:
            ending_blacklist = []
        if not beginning_blacklist:
            beginning_blacklist = []
        if not whitelist:
            whitelist = []
        zipin = zipfile.ZipFile(self.filename, 'r')
        zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
        for item in zipin.infolist():
            zipin.extract(item, self.tempdir)
            path = os.path.join(self.tempdir, item.filename)

            beginning = any((True for f in beginning_blacklist if item.filename.startswith(f)))
            ending = any((True for f in ending_blacklist if item.filename.endswith(f)))

            if os.path.isfile(path) and not beginning and not ending:
                cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
                if cfile is not None:
                    # Handle read-only files inside archive
                    old_stat = os.stat(path).st_mode
                    os.chmod(path, old_stat | stat.S_IWUSR)
                    cfile.remove_all()
                    os.chmod(path, old_stat)
                    logging.debug('Processing %s from %s', item.filename, self.filename)
                elif item.filename not in whitelist:
                    logging.info("%s's format is not supported or harmless", item.filename)
                    _, ext = os.path.splitext(path)
                    if not (self.add2archive or ext in parser.NOMETA):
                        continue
                zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH)
                zinfo.compress_type = zipfile.ZIP_DEFLATED
                zinfo.create_system = 3  # Linux
                zinfo.comment = ''
                with open(path, 'r') as f:
                    zipout.writestr(zinfo, f.read())
                # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS))
                # zipout.write(path, item.filename)
        zipin.close()
        zipout.close()

        logging.info('%s processed', self.filename)
        self.do_backup()
        return True
Пример #16
0
    def remove_all(self, whitelist=None, beginning_blacklist=None, ending_blacklist=None):
        """ Remove all metadata from a zip archive, even thoses
            added by Python's zipfile itself. It will not add
            files starting with "begining_blacklist", or ending with
            "ending_blacklist". This method also add files present in
            whitelist to the archive.

            :param list whitelist: Add those files to the produced archive, regardless if they are harmful or not
            :param list beginning_blacklist: If the file starts with $ending_blacklist, it will _not_ be added
            :param list ending_blacklist: If the file end with $ending_blacklist, it will _not_ be added
        """
        if not ending_blacklist:
            ending_blacklist = []
        if not beginning_blacklist:
            beginning_blacklist = []
        if not whitelist:
            whitelist = []
        zipin = zipfile.ZipFile(self.filename, 'r')
        zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
        for item in zipin.infolist():
            zipin.extract(item, self.tempdir)
            path = os.path.join(self.tempdir, item.filename)

            beginning = any((True for f in beginning_blacklist if item.filename.startswith(f)))
            ending = any((True for f in ending_blacklist if item.filename.endswith(f)))

            if os.path.isfile(path) and not beginning and not ending:
                cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
                if cfile is not None:
                    # Handle read-only files inside archive
                    old_stat = os.stat(path).st_mode
                    os.chmod(path, old_stat | stat.S_IWUSR)
                    cfile.remove_all()
                    os.chmod(path, old_stat)
                    logging.debug('Processing %s from %s', item.filename, self.filename)
                elif item.filename not in whitelist:
                    logging.info("%s's format is not supported or harmless", item.filename)
                    _, ext = os.path.splitext(path)
                    if not (self.add2archive or ext in parser.NOMETA):
                        continue
                zinfo = zipfile.ZipInfo(item.filename, date_time=ZIP_EPOCH)
                zinfo.compress_type = zipfile.ZIP_DEFLATED
                zinfo.create_system = 3  # Linux
                zinfo.comment = ''
                with open(path, 'r') as f:
                    zipout.writestr(zinfo, f.read())
                # os.utime(path, (ZIP_EPOCH_SECONDS, ZIP_EPOCH_SECONDS))
                # zipout.write(path, item.filename)
        zipin.close()
        zipout.close()

        logging.info('%s processed', self.filename)
        self.do_backup()
        return True
Пример #17
0
    def _remove_all(self, method):
        '''
            FIXME ?
            There is a patch implementing the Zipfile.remove()
            method here : http://bugs.python.org/issue6818
        '''
        zipin = zipfile.ZipFile(self.filename, 'r')
        zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)

        for item in zipin.namelist():
            name = os.path.join(self.tempdir, item)
            _, ext = os.path.splitext(name)

            if item.endswith('manifest.xml'):
            # contain the list of all files present in the archive
                zipin.extract(item, self.tempdir)
                for line in fileinput.input(name, inplace=1):
                    #remove the line which contains "meta.xml"
                    line = line.strip()
                    if not 'meta.xml' in line:
                        print line
                zipout.write(name, item)

            elif ext in parser.NOMETA or item == 'mimetype':
                #keep NOMETA files, and the "manifest" file
                if item != 'meta.xml':  # contains the metadata
                    zipin.extract(item, self.tempdir)
                    zipout.write(name, item)

            else:
                zipin.extract(item, self.tempdir)
                if os.path.isfile(name):
                    try:
                        cfile = mat.create_class_file(name, False,
                            self.add2archive)
                        if method == 'normal':
                            cfile.remove_all()
                        else:
                            cfile.remove_all_ugly()
                        logging.debug('Processing %s from %s' % (item,
                            self.filename))
                        zipout.write(name, item)
                    except:
                        logging.info('%s\' fileformat is not supported' % item)
                        if self.add2archive:
                            zipout.write(name, item)
        zipout.comment = ''
        logging.info('%s treated' % self.filename)
        zipin.close()
        zipout.close()
        self.do_backup()
Пример #18
0
    def _remove_all(self, method):
        '''
            FIXME ?
            There is a patch implementing the Zipfile.remove()
            method here : http://bugs.python.org/issue6818
        '''
        zipin = zipfile.ZipFile(self.filename, 'r')
        zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)

        for item in zipin.namelist():
            name = os.path.join(self.tempdir, item)
            _, ext = os.path.splitext(name)

            if item.endswith('manifest.xml'):
                # contain the list of all files present in the archive
                zipin.extract(item, self.tempdir)
                for line in fileinput.input(name, inplace=1):
                    #remove the line which contains "meta.xml"
                    line = line.strip()
                    if not 'meta.xml' in line:
                        print line
                zipout.write(name, item)

            elif ext in parser.NOMETA or item == 'mimetype':
                #keep NOMETA files, and the "manifest" file
                if item != 'meta.xml':  # contains the metadata
                    zipin.extract(item, self.tempdir)
                    zipout.write(name, item)

            else:
                zipin.extract(item, self.tempdir)
                if os.path.isfile(name):
                    try:
                        cfile = mat.create_class_file(name, False,
                                                      self.add2archive)
                        if method == 'normal':
                            cfile.remove_all()
                        else:
                            cfile.remove_all_ugly()
                        logging.debug('Processing %s from %s' %
                                      (item, self.filename))
                        zipout.write(name, item)
                    except:
                        logging.info('%s\' fileformat is not supported' % item)
                        if self.add2archive:
                            zipout.write(name, item)
        zipout.comment = ''
        logging.info('%s treated' % self.filename)
        zipin.close()
        zipout.close()
        self.do_backup()
Пример #19
0
    def is_clean(self, list_unsupported=False):
        """ Check if the given file is clean from harmful metadata
            When list_unsupported is True, the method returns a list
            of all non-supported/archives files contained in the
            archive.

            :param bool list_unsupported: Should the list of unsupported files be returned
        """
        ret_list = []
        zipin = zipfile.ZipFile(self.filename, 'r')
        if zipin.comment != '' and not list_unsupported:
            logging.debug('%s has a comment', self.filename)
            return False
        for item in zipin.infolist():
            zipin.extract(item, self.tempdir)
            path = os.path.join(self.tempdir, item.filename)
            if not self.__is_zipfile_clean(item) and not list_unsupported:
                logging.debug('%s from %s has compromising zipinfo',
                              item.filename, self.filename)
                return False
            if os.path.isfile(path):
                cfile = mat.create_class_file(path,
                                              False,
                                              add2archive=self.add2archive)
                if cfile is not None:
                    if not cfile.is_clean():
                        logging.debug('%s from %s has metadata', item.filename,
                                      self.filename)
                        if not list_unsupported:
                            return False
                else:
                    logging.info(
                        '%s\'s fileformat is not supported or harmless.',
                        item.filename)
                    _, ext = os.path.splitext(path)
                    if os.path.basename(item.filename) not in ('mimetype',
                                                               '.rels'):
                        if ext not in parser.NOMETA:
                            if not list_unsupported:
                                return False
                            ret_list.append(item.filename)
        zipin.close()
        if list_unsupported:
            return ret_list
        return True
Пример #20
0
    def is_clean(self, list_unsupported=False):
        """ Check if the given file is clean from harmful metadata
            When list_unsupported is True, the method returns a list
            of all non-supported/archives files contained in the
            archive.

            :param bool list_unsupported: Should the list of unsupported files be returned
        """
        ret_list = []
        zipin = zipfile.ZipFile(self.filename, 'r')
        if zipin.comment != '' and not list_unsupported:
            logging.debug('%s has a comment' % self.filename)
            return False
        for item in zipin.infolist():
            zipin.extract(item, self.tempdir)
            path = os.path.join(self.tempdir, item.filename)
            if not self.__is_zipfile_clean(item) and not list_unsupported:
                logging.debug('%s from %s has compromising zipinfo' %
                              (item.filename, self.filename))
                return False
            if os.path.isfile(path):
                cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
                if cfile is not None:
                    if not cfile.is_clean():
                        logging.debug('%s from %s has metadata' % (item.filename, self.filename))
                        if not list_unsupported:
                            return False
                else:
                    logging.info('%s\'s fileformat is not supported or harmless.'
                                 % item.filename)
                    basename, ext = os.path.splitext(path)
                    if os.path.basename(item.filename) not in ('mimetype', '.rels'):
                        if ext not in parser.NOMETA:
                            if not list_unsupported:
                                return False
                            ret_list.append(item.filename)
        zipin.close()
        if list_unsupported:
            return ret_list
        return True
Пример #21
0
 def _remove_all(self, method):
     '''
         FIXME ?
         There is a patch implementing the Zipfile.remove()
         method here : http://bugs.python.org/issue6818
     '''
     zipin = zipfile.ZipFile(self.filename, 'r')
     zipout = zipfile.ZipFile(self.output, 'w',
         allowZip64=True)
     for item in zipin.namelist():
         name = os.path.join(self.tempdir, item)
         _, ext = os.path.splitext(name)
         if item.startswith('docProps/'):  # metadatas
             pass
         elif ext in parser.NOMETA or item == '.rels':
             #keep parser.NOMETA files, and the file named ".rels"
             zipin.extract(item, self.tempdir)
             zipout.write(name, item)
         else:
             zipin.extract(item, self.tempdir)
             if os.path.isfile(name):  # don't care about folders
                 try:
                     cfile = mat.create_class_file(name, False,
                         self.add2archive)
                     if method == 'normal':
                         cfile.remove_all()
                     else:
                         cfile.remove_all_ugly()
                     logging.debug('Processing %s from %s' % (item,
                         self.filename))
                     zipout.write(name, item)
                 except:
                     logging.info('%s\' fileformat is not supported' % item)
                     if self.add2archive:
                         zipout.write(name, item)
     zipout.comment = ''
     logging.info('%s treated' % self.filename)
     zipin.close()
     zipout.close()
     self.do_backup()
Пример #22
0
 def get_meta(self):
     """ Return all the metadata of a zip archive"""
     zipin = zipfile.ZipFile(self.filename, 'r')
     metadata = {}
     if zipin.comment != '':
         metadata['comment'] = zipin.comment
     for item in zipin.infolist():
         zipinfo_meta = self.__get_zipinfo_meta(item)
         if zipinfo_meta != {}:  # zipinfo metadata
             metadata[item.filename + "'s zipinfo"] = str(zipinfo_meta)
         zipin.extract(item, self.tempdir)
         path = os.path.join(self.tempdir, item.filename)
         if os.path.isfile(path):
             cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
             if cfile is not None:
                 cfile_meta = cfile.get_meta()
                 if cfile_meta != {}:
                     metadata[item.filename] = str(cfile_meta)
             else:
                 logging.info('%s\'s fileformat is not supported or harmless', item.filename)
     zipin.close()
     return metadata
Пример #23
0
 def _remove_all(self, method):
     '''
         FIXME ?
         There is a patch implementing the Zipfile.remove()
         method here : http://bugs.python.org/issue6818
     '''
     zipin = zipfile.ZipFile(self.filename, 'r')
     zipout = zipfile.ZipFile(self.output, 'w', allowZip64=True)
     for item in zipin.namelist():
         name = os.path.join(self.tempdir, item)
         _, ext = os.path.splitext(name)
         if item.startswith('docProps/'):  # metadatas
             pass
         elif ext in parser.NOMETA or item == '.rels':
             #keep parser.NOMETA files, and the file named ".rels"
             zipin.extract(item, self.tempdir)
             zipout.write(name, item)
         else:
             zipin.extract(item, self.tempdir)
             if os.path.isfile(name):  # don't care about folders
                 try:
                     cfile = mat.create_class_file(name, False,
                                                   self.add2archive)
                     if method == 'normal':
                         cfile.remove_all()
                     else:
                         cfile.remove_all_ugly()
                     logging.debug('Processing %s from %s' %
                                   (item, self.filename))
                     zipout.write(name, item)
                 except:
                     logging.info('%s\' fileformat is not supported' % item)
                     if self.add2archive:
                         zipout.write(name, item)
     zipout.comment = ''
     logging.info('%s treated' % self.filename)
     zipin.close()
     zipout.close()
     self.do_backup()
Пример #24
0
 def remove_all(self, whitelist=None):
     """ Remove all harmful metadata from the tarfile.
         The method will also add every files matching
         whitelist in the produced archive.
         :param list whitelist: Files to add the to produced archive,
                 regardless if they are considered harmfull.
     """
     if not whitelist:
         whitelist = []
     tarin = tarfile.open(self.filename, 'r' + self.compression, encoding='utf-8')
     tarout = tarfile.open(self.output, 'w' + self.compression, encoding='utf-8')
     for item in tarin.getmembers():
         tarin.extract(item, self.tempdir)
         if item.isfile():
             path = os.path.join(self.tempdir, item.name)
             cfile = mat.create_class_file(path, False, add2archive=self.add2archive)
             if cfile is not None:
                 # Handle read-only files inside archive
                 old_stat = os.stat(path).st_mode
                 os.chmod(path, old_stat | stat.S_IWUSR)
                 cfile.remove_all()
                 os.chmod(path, old_stat)
             elif self.add2archive or os.path.splitext(item.name)[1] in parser.NOMETA:
                 logging.debug('%s\' format is either not supported or harmless' % item.name)
             elif item.name in whitelist:
                 logging.debug('%s is not supported, but MAT was told to add it anyway.'
                               % item.name)
             else:  # Don't add the file to the archive
                 logging.debug('%s will not be added' % item.name)
                 continue
             tarout.add(unicode(path.decode('utf-8')),
                        unicode(item.name.decode('utf-8')),
                        filter=self._remove_tar_added)
     tarin.close()
     tarout.close()
     self.do_backup()
     return True