Exemplo n.º 1
0
    def getParser(self, fileHash, folder=None, original=False):
        """
        returns a parser and stores cleaned file if not already available
        """
        if folder:
            file_path = os.path.join(folder, fileHash)
            if os.path.exists(file_path):
                if folder in self.cleaned_folder.values():
                    return YACParser(filename=file_path, skip_guess_encoding=True)
                else:
                    return YACParser(filename=file_path)
        else:
            if not original:
                for f in self.cleaned_folder:
                    cleaned_path = os.path.join(self.cleaned_folder[f], fileHash)
                    if os.path.exists(cleaned_path):
                        return YACParser(filename=cleaned_path, skip_guess_encoding=True)

            for f in self.submit_folder:
                submit_path = os.path.join(self.submit_folder[f], fileHash)
                cleaned_path = os.path.join(self.cleaned_folder[f], fileHash)
                if os.path.exists(submit_path):
                    table = YACParser(filename=submit_path)
                    if not os.path.exists(cleaned_path):
                        cleaned = table.generate()
                        storeContent(cleaned, cleaned_path, md5=fileHash)
                    return table
        return None
Exemplo n.º 2
0
    def submit(self, file=None, url=None, content=None, toFolder=None):
        """
        1) retrieve and compute hash of original content

        2) store submitted content using hash as filename IFF not exist
            optional URL as symlink

        :param file:
        :param url:
        :param content:
        :param toFolder:
        :return: the md5 of the original file
        """
        if toFolder in self.submit_folder and toFolder in self.cleaned_folder:
            s_folder = self.submit_folder[toFolder]
            c_folder = self.cleaned_folder[toFolder]
        else:
            return None

        if file:
            md5 = storeFile(file, s_folder)
        elif url:
            md5 = storeURL(url, s_folder, max_file_size=self.max_file_size)
        elif content:
            md5 = storeContent(content, s_folder)
        else:
            return None

        # check if cleaned exists
        submitted_path=os.path.join(s_folder, md5)
        cleaned_path = os.path.join(c_folder, md5)
        # at first look for stored cleaned version
        if os.path.exists(cleaned_path):
            return md5
        else:
            # generate and store cleaned version
            table = YACParser(filename=submitted_path)
            cleaned = table.generate()
            storeContent(cleaned, c_folder, md5=md5)
            return md5