Exemplo n.º 1
0
def convert_sla_to_pdf(
    file_content: typing.IO[bytes],
    input_extension: typing.Optional[str],  # example: '.dxf'
    cache_path: str,
    output_filepath: str,
    mimetype: str,
) -> BytesIO:
    logger = logging.getLogger(LOGGER_NAME)
    logger.debug("converting file bytes {} to pdf file {}".format(
        file_content, output_filepath))  # nopep8
    if not input_extension:
        input_extension = mimetypes_storage.guess_extension(mimetype,
                                                            strict=False)
    temporary_input_content_path = output_filepath
    if input_extension:
        temporary_input_content_path += input_extension
    with create_flag_file(output_filepath):

        logger.debug("conversion is based on temporary file {}".format(
            temporary_input_content_path))  # nopep8

        if not os.path.exists(output_filepath):
            write_file_content(
                file_content,
                output_filepath=temporary_input_content_path)  # nopep8
            logger.debug("temporary file written: {}".format(
                temporary_input_content_path))  # nopep8
            logger.debug("converting {} to pdf into folder {}".format(
                temporary_input_content_path, cache_path))
            with Xvfb():
                check_call(
                    [
                        "scribus",
                        "-g",
                        "-py",
                        SCRIPT_PATH,
                        output_filepath,
                        "--",
                        temporary_input_content_path,
                    ],
                    stdout=DEVNULL,
                    stderr=STDOUT,
                )

        # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
        # according to input file name, for homogeneity we prefer to rename it
        logger.debug("renaming output file {} to {}".format(
            output_filepath + ".pdf", output_filepath))

        logger.info("Removing temporary copy file {}".format(
            temporary_input_content_path))  # nopep8
        os.remove(temporary_input_content_path)

    with open(output_filepath, "rb") as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output
Exemplo n.º 2
0
 def image_to_jpeg_wand(self, file_path: str, preview_dims: ImgDims,
                        dest_path: str,
                        mimetype: typing.Optional[str]) -> None:
     try:
         with self._convert_image(file_path, preview_dims) as img:
             img.save(filename=dest_path)
     except (CoderError, CoderFatalError, CoderWarning) as e:
         assert mimetype
         file_ext = mimetypes_storage.guess_extension(mimetype,
                                                      strict=False) or ""
         if file_ext:
             file_path = file_ext.lstrip(".") + ":" + file_path
             with self._convert_image(file_path, preview_dims) as img:
                 img.save(filename=dest_path)
         else:
             raise e
Exemplo n.º 3
0
    def _imagemagick_convert(self,
                             source_path: str,
                             dest_path: str,
                             mimetype: typing.Optional[str] = None) -> int:
        """
        Try convert using both explicit or implicit input type convert.
        """
        assert mimetype != ""
        # INFO - G.M - 2019-11-14 - use explicit input type to clarify conversion for imagemagick
        do_an_explicit_convert = False
        input_file_extension = ""  # type: str
        if mimetype is not None:
            input_file_extension = mimetypes_storage.guess_extension(
                mimetype, strict=False) or ""
            if input_file_extension:
                do_an_explicit_convert = True

        if do_an_explicit_convert:
            explicit_source_path = "{}:{}".format(
                input_file_extension.lstrip("."), source_path)
            build_image_result_code = check_call(
                [
                    "convert", explicit_source_path, "-layers", "merge",
                    dest_path
                ],
                stdout=DEVNULL,
                stderr=STDOUT,
            )
            # INFO - G.M - 2019-11-14 - if explicit convert failed, fallback to
            # implicit input type convert
            if build_image_result_code != 0:
                build_image_result_code = check_call(
                    ["convert", source_path, "-layers", "merge", dest_path],
                    stdout=DEVNULL,
                    stderr=STDOUT,
                )
        else:
            build_image_result_code = check_call(
                ["convert", source_path, "-layers", "merge", dest_path],
                stdout=DEVNULL,
                stderr=STDOUT,
            )

        return build_image_result_code
Exemplo n.º 4
0
 def get_file_extension(self, mime: str) -> typing.Optional[str]:
     """
     Get one valid file extension related to the given mimetype.
     """
     return mimetypes_storage.guess_extension(mime, strict=False)
Exemplo n.º 5
0
    def convert_office_document_to_pdf(
        self,
        file_content: typing.IO[bytes],
        input_extension: typing.Optional[str],  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str,
    ) -> BytesIO:
        logger = logging.getLogger(LOGGER_NAME)
        logger.debug(
            "converting file bytes {} to pdf file {}".format(file_content, output_filepath)
        )  # nopep8
        if not input_extension:
            input_extension = mimetypes_storage.guess_extension(mimetype, strict=False)
        if not input_extension:
            raise InputExtensionNotFound("unable to found input extension from mimetype")  # nopep8
        temporary_input_content_path = output_filepath + input_extension  # nopep8
        flag_file_path = create_flag_file(output_filepath)

        logger.debug(
            "conversion is based on temporary file {}".format(temporary_input_content_path)
        )  # nopep8

        if not os.path.exists(output_filepath):
            write_file_content(file_content, output_filepath=temporary_input_content_path)  # nopep8
            logger.debug(
                "temporary file written: {}".format(temporary_input_content_path)
            )  # nopep8
            logger.debug(
                "converting {} to pdf into folder {}".format(
                    temporary_input_content_path, cache_path
                )
            )

            libreoffice_lock = self._get_libreoffice_lock(cache_path)
            cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest()
            with libreoffice_lock:
                check_call(
                    [
                        "libreoffice",
                        "--headless",
                        "--convert-to",
                        "pdf:writer_pdf_Export",
                        temporary_input_content_path,
                        "--outdir",
                        cache_path,
                        "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format(
                            cache_path_hash
                        ),  # nopep8
                    ],
                    stdout=DEVNULL,
                    stderr=STDOUT,
                )
        # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
        # according to input file name, for homogeneity we prefer to rename it
        # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
        # in its name it won't have the double ".pdf"
        if os.path.exists(output_filepath + ".pdf"):
            logger.debug(
                "renaming output file {} to {}".format(output_filepath + ".pdf", output_filepath)
            )
            os.rename(output_filepath + ".pdf", output_filepath)

        with contextlib.suppress(FileNotFoundError):
            logger.info(
                "Removing temporary copy file {}".format(temporary_input_content_path)
            )  # nopep8
            os.remove(temporary_input_content_path)

        logger.debug("Removing flag file {}".format(flag_file_path))
        os.remove(flag_file_path)

        with open(output_filepath, "rb") as pdf_handle:
            pdf_handle.seek(0, 0)
            content_as_bytes = pdf_handle.read()
            output = BytesIO(content_as_bytes)
            output.seek(0, 0)
            return output
    def convert_office_document_to_pdf(
        self,
        file_content: typing.IO[bytes],
        input_extension: typing.Optional[str],  # example: '.dxf'
        cache_path: str,
        output_filepath: str,
        mimetype: str,
    ) -> BytesIO:
        logger = logging.getLogger(LOGGER_NAME)
        logger.debug(
            "converting file bytes {} to pdf file {}".format(file_content, output_filepath)
        )  # nopep8
        if not input_extension:
            input_extension = mimetypes_storage.guess_extension(mimetype, strict=False)
        if not input_extension:
            raise InputExtensionNotFound("unable to found input extension from mimetype")  # nopep8
        temporary_input_content_path = output_filepath + input_extension  # nopep8
        with create_flag_file(output_filepath):
            logger.debug(
                "conversion is based on temporary file {}".format(temporary_input_content_path)
            )  # nopep8

            if not os.path.exists(output_filepath):
                write_file_content(file_content, output_filepath=temporary_input_content_path)
                logger.debug(
                    "temporary file written: {}".format(temporary_input_content_path)
                )  # nopep8
                logger.debug(
                    "converting {} to pdf into folder {}".format(
                        temporary_input_content_path, cache_path
                    )
                )

                libreoffice_lock = self._get_libreoffice_lock(cache_path)
                cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest()
                with libreoffice_lock:
                    process = Popen(
                        [
                            "libreoffice",
                            "--headless",
                            "--convert-to",
                            "pdf:writer_pdf_Export",
                            temporary_input_content_path,
                            "--outdir",
                            cache_path,
                            "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format(
                                cache_path_hash
                            ),  # nopep8
                        ],
                        stdout=DEVNULL,
                        stderr=STDOUT,
                    )
                    process_timeout = LIBREOFFICE_PROCESS_TIMEOUT
                    if process_timeout is not None:
                        stop_process_timeout = process_timeout / 10  # type: typing.Optional[float]
                    else:
                        stop_process_timeout = None
                    try:
                        process.communicate(timeout=process_timeout)
                    except Exception:
                        try:
                            # INFO - SG - 2021-04-16
                            # we waited long enough (or we got another exception), give a little time to the process
                            # to exit cleanly
                            logger.warning(
                                "The preview generation for {} took too long, aborting it".format(
                                    temporary_input_content_path
                                )
                            )
                            process.terminate()
                            process.communicate(timeout=stop_process_timeout)
                            raise
                        except Exception:
                            # too slow to exit… let's kill
                            process.kill()
                            process.communicate(timeout=stop_process_timeout)
                            raise

            # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
            # according to input file name, for homogeneity we prefer to rename it
            # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
            # in its name it won't have the double ".pdf"
            if os.path.exists(output_filepath + ".pdf"):
                logger.debug(
                    "renaming output file {} to {}".format(
                        output_filepath + ".pdf", output_filepath
                    )
                )
                os.rename(output_filepath + ".pdf", output_filepath)

            with contextlib.suppress(FileNotFoundError):
                logger.info(
                    "Removing temporary copy file {}".format(temporary_input_content_path)
                )  # nopep8
                os.remove(temporary_input_content_path)

        with open(output_filepath, "rb") as pdf_handle:
            pdf_handle.seek(0, 0)
            content_as_bytes = pdf_handle.read()
            output = BytesIO(content_as_bytes)
            output.seek(0, 0)
            return output
Exemplo n.º 7
0
def convert_office_document_to_pdf(
    file_content: typing.IO[bytes],
    input_extension: typing.Optional[str],  # example: '.dxf'
    cache_path: str,
    output_filepath: str,
    mimetype: str,
) -> BytesIO:
    logger = logging.getLogger(LOGGER_NAME)
    logger.debug("converting file bytes {} to pdf file {}".format(
        file_content, output_filepath))  # nopep8
    if not input_extension:
        input_extension = mimetypes_storage.guess_extension(mimetype,
                                                            strict=False)
    if not input_extension:
        raise InputExtensionNotFound(
            "unable to found input extension from mimetype")  # nopep8
    temporary_input_content_path = output_filepath + input_extension  # nopep8
    flag_file_path = create_flag_file(output_filepath)

    logger.debug("conversion is based on temporary file {}".format(
        temporary_input_content_path))  # nopep8

    if not os.path.exists(output_filepath):
        write_file_content(
            file_content,
            output_filepath=temporary_input_content_path)  # nopep8
        logger.debug("temporary file written: {}".format(
            temporary_input_content_path))  # nopep8
        logger.debug("converting {} to pdf into folder {}".format(
            temporary_input_content_path, cache_path))
        # INFO - jumenzel - 2019-03-12 - Do not allow multiple concurrent libreoffice calls to avoid issue.
        # INFO - jumenzel - 2019-03-12 - Should we allow running multiple libreoffice instances ?
        #   see https://github.com/algoo/preview-generator/issues/77
        with LIBREOFFICE_CALL_LOCK:
            check_call(
                [
                    "libreoffice",
                    "--headless",
                    "--convert-to",
                    "pdf:writer_pdf_Export",
                    temporary_input_content_path,
                    "--outdir",
                    cache_path,
                    "-env:UserInstallation=file:///tmp/LibreOffice_Conversion_${USER}",  # nopep8
                ],
                stdout=DEVNULL,
                stderr=STDOUT,
            )
    # HACK - D.A. - 2018-05-31 - name is defined by libreoffice
    # according to input file name, for homogeneity we prefer to rename it
    # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension
    # in its name it won't have the double ".pdf"
    if os.path.exists(output_filepath + ".pdf"):
        logger.debug("renaming output file {} to {}".format(
            output_filepath + ".pdf", output_filepath))
        os.rename(output_filepath + ".pdf", output_filepath)

    with contextlib.suppress(FileNotFoundError):
        logger.info("Removing temporary copy file {}".format(
            temporary_input_content_path))  # nopep8
        os.remove(temporary_input_content_path)

    logger.debug("Removing flag file {}".format(flag_file_path))
    os.remove(flag_file_path)

    with open(output_filepath, "rb") as pdf_handle:
        pdf_handle.seek(0, 0)
        content_as_bytes = pdf_handle.read()
        output = BytesIO(content_as_bytes)
        output.seek(0, 0)
        return output