Exemplo n.º 1
0
def convert_sla_to_pdf(
    file_content: typing.IO[bytes],
    input_extension: typing.Optional[str],  # example: '.dxf'
    cache_path: str,
    output_filepath: str,
    mimetype: str,
) -> BytesIO:
    logger = logging.getLogger(LOGGER_NAME)
    logger.debug("converting file bytes {} to pdf file {}".format(
        file_content, output_filepath))  # nopep8
    if not input_extension:
        input_extension = mimetypes_storage.guess_extension(mimetype,
                                                            strict=False)
    temporary_input_content_path = output_filepath
    if input_extension:
        temporary_input_content_path += input_extension
    with create_flag_file(output_filepath):

        logger.debug("conversion is based on temporary file {}".format(
            temporary_input_content_path))  # nopep8

        if not os.path.exists(output_filepath):
            write_file_content(
                file_content,
                output_filepath=temporary_input_content_path)  # nopep8
            logger.debug("temporary file written: {}".format(
                temporary_input_content_path))  # nopep8
            logger.debug("converting {} to pdf into folder {}".format(
                temporary_input_content_path, cache_path))
            with Xvfb():
                check_call(
                    [
                        "scribus",
                        "-g",
                        "-py",
                        SCRIPT_PATH,
                        output_filepath,
                        "--",
                        temporary_input_content_path,
                    ],
                    stdout=DEVNULL,
                    stderr=STDOUT,
                )

        # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
        # according to input file name, for homogeneity we prefer to rename it
        logger.debug("renaming output file {} to {}".format(
            output_filepath + ".pdf", output_filepath))

        logger.info("Removing temporary copy file {}".format(
            temporary_input_content_path))  # nopep8
        os.remove(temporary_input_content_path)

    with open(output_filepath, "rb") as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output
Exemplo n.º 2
0
def convert_sla_to_pdf(
        file_content: typing.IO[bytes],
        input_extension: str,  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str) -> BytesIO:
    logger = logging.getLogger(LOGGER_NAME)
    logger.debug('converting file bytes {} to pdf file {}'.format(
        file_content, output_filepath))  # nopep8
    if not input_extension:
        input_extension = mimetypes.guess_extension(mimetype)
    temporary_input_content_path = output_filepath + input_extension  # nopep8
    flag_file_path = create_flag_file(output_filepath)

    logger.debug('conversion is based on temporary file {}'.format(
        temporary_input_content_path))  # nopep8

    if not os.path.exists(output_filepath):
        write_file_content(
            file_content,
            output_filepath=temporary_input_content_path)  # nopep8
        logger.debug('temporary file written: {}'.format(
            temporary_input_content_path))  # nopep8
        logger.debug('converting {} to pdf into folder {}'.format(
            temporary_input_content_path, cache_path))
        with Xvfb() as xvfb:
            result = check_call([
                'scribus', '-g', '-py', SCRIPT_PATH, output_filepath, '--',
                temporary_input_content_path
            ],
                                stdout=DEVNULL,
                                stderr=STDOUT)

    # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
    # according to input file name, for homogeneity we prefer to rename it
    logger.debug('renaming output file {} to {}'.format(
        output_filepath + '.pdf', output_filepath))

    logger.debug('Removing flag file {}'.format(flag_file_path))
    os.remove(flag_file_path)

    logger.info('Removing temporary copy file {}'.format(
        temporary_input_content_path))  # nopep8
    os.remove(temporary_input_content_path)

    with open(output_filepath, 'rb') as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output
def convert_office_document_to_pdf(
    file_content: typing.IO[bytes],
    input_extension: typing.Optional[str],  # example: '.dxf'
    cache_path: str,
    output_filepath: str,
    mimetype: str,
) -> BytesIO:
    logger = logging.getLogger(LOGGER_NAME)
    logger.debug(
        "converting file bytes {} to pdf file {}".format(file_content, output_filepath)
    )  # nopep8
    if not input_extension:
        input_extension = mimetypes.guess_extension(mimetype)
    if not input_extension:
        raise InputExtensionNotFound("unable to found input extension from mimetype")  # nopep8
    temporary_input_content_path = output_filepath + input_extension  # nopep8
    flag_file_path = create_flag_file(output_filepath)

    logger.debug(
        "conversion is based on temporary file {}".format(temporary_input_content_path)
    )  # nopep8

    if not os.path.exists(output_filepath):
        write_file_content(file_content, output_filepath=temporary_input_content_path)  # nopep8
        logger.debug("temporary file written: {}".format(temporary_input_content_path))  # nopep8
        logger.debug(
            "converting {} to pdf into folder {}".format(temporary_input_content_path, cache_path)
        )
        # INFO - jumenzel - 2019-03-12 - Do not allow multiple concurrent libreoffice calls to avoid issue.
        # INFO - jumenzel - 2019-03-12 - Should we allow running multiple libreoffice instances ?
        #   see https://github.com/algoo/preview-generator/issues/77
        with LIBREOFFICE_CALL_LOCK:
            check_call(
                [
                    "libreoffice",
                    "--headless",
                    "--convert-to",
                    "pdf:writer_pdf_Export",
                    temporary_input_content_path,
                    "--outdir",
                    cache_path,
                    "PageRange=1",
                    "-env:UserInstallation=file:///tmp/LibreOffice_Conversion_${USER}",  # nopep8
                ],
                stdout=DEVNULL,
                stderr=STDOUT,
            )
    # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
    # according to input file name, for homogeneity we prefer to rename it
    # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
    # in its name it won't have the double ".pdf"
    if os.path.exists(output_filepath + ".pdf"):
        logger.debug(
            "renaming output file {} to {}".format(output_filepath + ".pdf", output_filepath)
        )
        os.rename(output_filepath + ".pdf", output_filepath)

    with contextlib.suppress(FileNotFoundError):
        logger.info(
            "Removing temporary copy file {}".format(temporary_input_content_path)
        )  # nopep8
        os.remove(temporary_input_content_path)

    logger.debug("Removing flag file {}".format(flag_file_path))
    os.remove(flag_file_path)

    with open(output_filepath, "rb") as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output
Exemplo n.º 4
0
    def convert_office_document_to_pdf(
        self,
        file_content: typing.IO[bytes],
        input_extension: typing.Optional[str],  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str,
    ) -> BytesIO:
        logger = logging.getLogger(LOGGER_NAME)
        logger.debug(
            "converting file bytes {} to pdf file {}".format(file_content, output_filepath)
        )  # nopep8
        if not input_extension:
            input_extension = mimetypes_storage.guess_extension(mimetype, strict=False)
        if not input_extension:
            raise InputExtensionNotFound("unable to found input extension from mimetype")  # nopep8
        temporary_input_content_path = output_filepath + input_extension  # nopep8
        flag_file_path = create_flag_file(output_filepath)

        logger.debug(
            "conversion is based on temporary file {}".format(temporary_input_content_path)
        )  # nopep8

        if not os.path.exists(output_filepath):
            write_file_content(file_content, output_filepath=temporary_input_content_path)  # nopep8
            logger.debug(
                "temporary file written: {}".format(temporary_input_content_path)
            )  # nopep8
            logger.debug(
                "converting {} to pdf into folder {}".format(
                    temporary_input_content_path, cache_path
                )
            )

            libreoffice_lock = self._get_libreoffice_lock(cache_path)
            cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest()
            with libreoffice_lock:
                check_call(
                    [
                        "libreoffice",
                        "--headless",
                        "--convert-to",
                        "pdf:writer_pdf_Export",
                        temporary_input_content_path,
                        "--outdir",
                        cache_path,
                        "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format(
                            cache_path_hash
                        ),  # nopep8
                    ],
                    stdout=DEVNULL,
                    stderr=STDOUT,
                )
        # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
        # according to input file name, for homogeneity we prefer to rename it
        # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
        # in its name it won't have the double ".pdf"
        if os.path.exists(output_filepath + ".pdf"):
            logger.debug(
                "renaming output file {} to {}".format(output_filepath + ".pdf", output_filepath)
            )
            os.rename(output_filepath + ".pdf", output_filepath)

        with contextlib.suppress(FileNotFoundError):
            logger.info(
                "Removing temporary copy file {}".format(temporary_input_content_path)
            )  # nopep8
            os.remove(temporary_input_content_path)

        logger.debug("Removing flag file {}".format(flag_file_path))
        os.remove(flag_file_path)

        with open(output_filepath, "rb") as pdf_handle:
            pdf_handle.seek(0, 0)
            content_as_bytes = pdf_handle.read()
            output = BytesIO(content_as_bytes)
            output.seek(0, 0)
            return output
Exemplo n.º 5
0
def convert_office_document_to_pdf(
        file_content: typing.IO[bytes],
        input_extension: str,  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str) -> BytesIO:
    logging.debug('converting file bytes {} to pdf file {}'.format(
        file_content, output_filepath))  # nopep8
    if not input_extension:
        input_extension = mimetypes.guess_extension(mimetype)
    if not input_extension:
        raise InputExtensionNotFound(
            'unable to found input extension from mimetype')  # nopep8
    temporary_input_content_path = output_filepath + input_extension  # nopep8
    flag_file_path = create_flag_file(output_filepath)

    logging.debug('conversion is based on temporary file {}'.format(
        temporary_input_content_path))  # nopep8

    if not os.path.exists(output_filepath):
        write_file_content(
            file_content,
            output_filepath=temporary_input_content_path)  # nopep8
        logging.debug('temporary file written: {}'.format(
            temporary_input_content_path))  # nopep8
        logging.debug('converting {} to pdf into folder {}'.format(
            temporary_input_content_path, cache_path))
        check_call(
            [
                'libreoffice',
                '--headless',
                '--convert-to',
                'pdf:writer_pdf_Export',
                temporary_input_content_path,
                '--outdir',
                cache_path,
                '-env:UserInstallation=file:///tmp/LibreOffice_Conversion_${USER}',  # nopep8
            ],
            stdout=DEVNULL,
            stderr=STDOUT)
    # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
    # according to input file name, for homogeneity we prefer to rename it
    # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
    # in its name it won't have the double ".pdf"
    if os.path.exists(output_filepath + '.pdf'):
        logging.debug('renaming output file {} to {}'.format(
            output_filepath + '.pdf', output_filepath))
        os.rename(output_filepath + '.pdf', output_filepath)

    with contextlib.suppress(FileNotFoundError):
        logging.info('Removing temporary copy file {}'.format(
            temporary_input_content_path))  # nopep8
        os.remove(temporary_input_content_path)

    logging.debug('Removing flag file {}'.format(flag_file_path))
    os.remove(flag_file_path)

    with open(output_filepath, 'rb') as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output
    def convert_office_document_to_pdf(
        self,
        file_content: typing.IO[bytes],
        input_extension: typing.Optional[str],  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str,
    ) -> BytesIO:
        logger = logging.getLogger(LOGGER_NAME)
        logger.debug(
            "converting file bytes {} to pdf file {}".format(file_content, output_filepath)
        )  # nopep8
        if not input_extension:
            input_extension = mimetypes_storage.guess_extension(mimetype, strict=False)
        if not input_extension:
            raise InputExtensionNotFound("unable to found input extension from mimetype")  # nopep8
        temporary_input_content_path = output_filepath + input_extension  # nopep8
        with create_flag_file(output_filepath):
            logger.debug(
                "conversion is based on temporary file {}".format(temporary_input_content_path)
            )  # nopep8

            if not os.path.exists(output_filepath):
                write_file_content(file_content, output_filepath=temporary_input_content_path)
                logger.debug(
                    "temporary file written: {}".format(temporary_input_content_path)
                )  # nopep8
                logger.debug(
                    "converting {} to pdf into folder {}".format(
                        temporary_input_content_path, cache_path
                    )
                )

                libreoffice_lock = self._get_libreoffice_lock(cache_path)
                cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest()
                with libreoffice_lock:
                    process = Popen(
                        [
                            "libreoffice",
                            "--headless",
                            "--convert-to",
                            "pdf:writer_pdf_Export",
                            temporary_input_content_path,
                            "--outdir",
                            cache_path,
                            "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format(
                                cache_path_hash
                            ),  # nopep8
                        ],
                        stdout=DEVNULL,
                        stderr=STDOUT,
                    )
                    process_timeout = LIBREOFFICE_PROCESS_TIMEOUT
                    if process_timeout is not None:
                        stop_process_timeout = process_timeout / 10  # type: typing.Optional[float]
                    else:
                        stop_process_timeout = None
                    try:
                        process.communicate(timeout=process_timeout)
                    except Exception:
                        try:
                            # INFO - SG - 2021-04-16
                            # we waited long enough (or we got another exception), give a little time to the process
                            # to exit cleanly
                            logger.warning(
                                "The preview generation for {} took too long, aborting it".format(
                                    temporary_input_content_path
                                )
                            )
                            process.terminate()
                            process.communicate(timeout=stop_process_timeout)
                            raise
                        except Exception:
                            # too slow to exit… let's kill
                            process.kill()
                            process.communicate(timeout=stop_process_timeout)
                            raise

            # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
            # according to input file name, for homogeneity we prefer to rename it
            # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
            # in its name it won't have the double ".pdf"
            if os.path.exists(output_filepath + ".pdf"):
                logger.debug(
                    "renaming output file {} to {}".format(
                        output_filepath + ".pdf", output_filepath
                    )
                )
                os.rename(output_filepath + ".pdf", output_filepath)

            with contextlib.suppress(FileNotFoundError):
                logger.info(
                    "Removing temporary copy file {}".format(temporary_input_content_path)
                )  # nopep8
                os.remove(temporary_input_content_path)

        with open(output_filepath, "rb") as pdf_handle:
            pdf_handle.seek(0, 0)
            content_as_bytes = pdf_handle.read()
            output = BytesIO(content_as_bytes)
            output.seek(0, 0)
            return output