Пример #1
0
def get_mimetype(file_object, mimetype_only=False):
    """
    Determine a file's mimetype by calling the system's libmagic
    library via python-magic.
    """
    file_mimetype = None
    file_mime_encoding = None

    temporary_file_object = NamedTemporaryFile()
    file_object.seek(0)
    copyfileobj(fsrc=file_object, fdst=temporary_file_object)
    file_object.seek(0)
    temporary_file_object.seek(0)

    kwargs = {'mime': True}

    if not mimetype_only:
        kwargs['mime_encoding'] = True

    try:
        mime = magic.Magic(**kwargs)

        if mimetype_only:
            file_mimetype = mime.from_file(filename=temporary_file_object.name)
        else:
            file_mimetype, file_mime_encoding = mime.from_file(
                filename=temporary_file_object.name
            ).split('; charset=')
    finally:
        temporary_file_object.close()

    return file_mimetype, file_mime_encoding
Пример #2
0
    def verify_file(self,
                    file_object,
                    signature_file=None,
                    all_keys=False,
                    key_fingerprint=None,
                    key_id=None):
        keys = self._preload_keys(all_keys=all_keys,
                                  key_fingerprint=key_fingerprint,
                                  key_id=key_id)

        if signature_file:
            # Save the original data and invert the argument order
            # Signature first, file second
            temporary_file_object = NamedTemporaryFile()
            temporary_filename = temporary_file_object.name
            shutil.copyfileobj(fsrc=file_object, fdst=temporary_file_object)
            temporary_file_object.seek(0)

            signature_file_buffer = io.BytesIO()
            signature_file_buffer.write(signature_file.read())
            signature_file_buffer.seek(0)
            signature_file.seek(0)
            verify_result = gpg_backend.verify_file(
                file_object=signature_file_buffer,
                data_filename=temporary_filename,
                keys=keys)
            signature_file_buffer.close()
            temporary_file_object.close()
        else:
            verify_result = gpg_backend.verify_file(file_object=file_object,
                                                    keys=keys)

        logger.debug('verify_result.status: %s', verify_result.status)

        if verify_result:
            # Signed and key present
            logger.debug(msg='signed and key present')
            return SignatureVerification(verify_result.__dict__)
        elif verify_result.status == 'no public key' and not (
                key_fingerprint or all_keys or key_id):
            # Signed but key not present, retry with key fetch
            logger.debug(msg='no public key')
            file_object.seek(0)
            return self.verify_file(file_object=file_object,
                                    signature_file=signature_file,
                                    key_id=verify_result.key_id)
        elif verify_result.key_id:
            # Signed, retried and key still not found
            logger.debug(msg='signed, retried and key still not found')
            return SignatureVerification(verify_result.__dict__)
        else:
            logger.debug(msg='file not signed')
            raise VerificationError('File not signed')
Пример #3
0
    def convert(self, *args, **kwargs):
        super(Python, self).convert(*args, **kwargs)

        if self.mime_type == 'application/pdf' and pdftoppm:
            new_file_object = NamedTemporaryFile()
            input_filepath = new_file_object.name
            self.file_object.seek(0)
            shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
            self.file_object.seek(0)
            new_file_object.seek(0)

            image_buffer = io.BytesIO()
            try:
                pdftoppm(input_filepath,
                         f=self.page_number + 1,
                         l=self.page_number + 1,
                         _out=image_buffer)
                image_buffer.seek(0)
                return Image.open(image_buffer)
            finally:
                new_file_object.close()
Пример #4
0
    def _process(self, document_version):
        if self.command_exiftool:
            temporary_fileobject = NamedTemporaryFile()

            try:
                document_version.save_to_file(file_object=temporary_fileobject)
                temporary_fileobject.seek(0)
                try:
                    result = self.command_exiftool(temporary_fileobject.name)
                except sh.ErrorReturnCode_1 as exception:
                    result = json.loads(s=exception.stdout)[0]
                    if result.get('Error', '') == 'Unknown file type':
                        # Not a fatal error
                        return result
                else:
                    return json.loads(s=result.stdout)[0]
            finally:
                temporary_fileobject.close()
        else:
            logger.warning(
                'EXIFTool binary not found, not processing document '
                'version: %s', document_version)
Пример #5
0
    def execute(self, file_object, page_number):
        logger.debug('Parsing PDF page: %d', page_number)

        temporary_file_object = NamedTemporaryFile()
        copyfileobj(fsrc=file_object, fdst=temporary_file_object)
        temporary_file_object.seek(0)

        command = []
        command.append(self.pdftotext_path)
        command.append('-f')
        command.append(str(page_number))
        command.append('-l')
        command.append(str(page_number))
        command.append(temporary_file_object.name)
        command.append('-')

        proc = subprocess.Popen(command,
                                close_fds=True,
                                stderr=subprocess.PIPE,
                                stdout=subprocess.PIPE)
        return_code = proc.wait()
        if return_code != 0:
            logger.error(proc.stderr.readline())
            temporary_file_object.close()

            raise ParserError

        output = proc.stdout.read()
        temporary_file_object.close()

        if output == b'\x0c':
            logger.debug('Parser didn\'t return any output')
            return ''

        if output[-3:] == b'\x0a\x0a\x0c':
            return output[:-3]

        return output