def convert_sla_to_pdf( file_content: typing.IO[bytes], input_extension: typing.Optional[str], # example: '.dxf' cache_path: str, output_filepath: str, mimetype: str, ) -> BytesIO: logger = logging.getLogger(LOGGER_NAME) logger.debug("converting file bytes {} to pdf file {}".format( file_content, output_filepath)) # nopep8 if not input_extension: input_extension = mimetypes_storage.guess_extension(mimetype, strict=False) temporary_input_content_path = output_filepath if input_extension: temporary_input_content_path += input_extension with create_flag_file(output_filepath): logger.debug("conversion is based on temporary file {}".format( temporary_input_content_path)) # nopep8 if not os.path.exists(output_filepath): write_file_content( file_content, output_filepath=temporary_input_content_path) # nopep8 logger.debug("temporary file written: {}".format( temporary_input_content_path)) # nopep8 logger.debug("converting {} to pdf into folder {}".format( temporary_input_content_path, cache_path)) with Xvfb(): check_call( [ "scribus", "-g", "-py", SCRIPT_PATH, output_filepath, "--", temporary_input_content_path, ], stdout=DEVNULL, stderr=STDOUT, ) # HACK - D.A. - 2018-05-31 - name is defined by libreoffice # according to input file name, for homogeneity we prefer to rename it logger.debug("renaming output file {} to {}".format( output_filepath + ".pdf", output_filepath)) logger.info("Removing temporary copy file {}".format( temporary_input_content_path)) # nopep8 os.remove(temporary_input_content_path) with open(output_filepath, "rb") as pdf_handle: pdf_handle.seek(0, 0) content_as_bytes = pdf_handle.read() output = BytesIO(content_as_bytes) output.seek(0, 0) return output
def image_to_jpeg_wand(self, file_path: str, preview_dims: ImgDims, dest_path: str, mimetype: typing.Optional[str]) -> None: try: with self._convert_image(file_path, preview_dims) as img: img.save(filename=dest_path) except (CoderError, CoderFatalError, CoderWarning) as e: assert mimetype file_ext = mimetypes_storage.guess_extension(mimetype, strict=False) or "" if file_ext: file_path = file_ext.lstrip(".") + ":" + file_path with self._convert_image(file_path, preview_dims) as img: img.save(filename=dest_path) else: raise e
def _imagemagick_convert(self, source_path: str, dest_path: str, mimetype: typing.Optional[str] = None) -> int: """ Try convert using both explicit or implicit input type convert. """ assert mimetype != "" # INFO - G.M - 2019-11-14 - use explicit input type to clarify conversion for imagemagick do_an_explicit_convert = False input_file_extension = "" # type: str if mimetype is not None: input_file_extension = mimetypes_storage.guess_extension( mimetype, strict=False) or "" if input_file_extension: do_an_explicit_convert = True if do_an_explicit_convert: explicit_source_path = "{}:{}".format( input_file_extension.lstrip("."), source_path) build_image_result_code = check_call( [ "convert", explicit_source_path, "-layers", "merge", dest_path ], stdout=DEVNULL, stderr=STDOUT, ) # INFO - G.M - 2019-11-14 - if explicit convert failed, fallback to # implicit input type convert if build_image_result_code != 0: build_image_result_code = check_call( ["convert", source_path, "-layers", "merge", dest_path], stdout=DEVNULL, stderr=STDOUT, ) else: build_image_result_code = check_call( ["convert", source_path, "-layers", "merge", dest_path], stdout=DEVNULL, stderr=STDOUT, ) return build_image_result_code
def get_file_extension(self, mime: str) -> typing.Optional[str]: """ Get one valid file extension related to the given mimetype. """ return mimetypes_storage.guess_extension(mime, strict=False)
def convert_office_document_to_pdf( self, file_content: typing.IO[bytes], input_extension: typing.Optional[str], # example: '.dxf' cache_path: str, output_filepath: str, mimetype: str, ) -> BytesIO: logger = logging.getLogger(LOGGER_NAME) logger.debug( "converting file bytes {} to pdf file {}".format(file_content, output_filepath) ) # nopep8 if not input_extension: input_extension = mimetypes_storage.guess_extension(mimetype, strict=False) if not input_extension: raise InputExtensionNotFound("unable to found input extension from mimetype") # nopep8 temporary_input_content_path = output_filepath + input_extension # nopep8 flag_file_path = create_flag_file(output_filepath) logger.debug( "conversion is based on temporary file {}".format(temporary_input_content_path) ) # nopep8 if not os.path.exists(output_filepath): write_file_content(file_content, output_filepath=temporary_input_content_path) # nopep8 logger.debug( "temporary file written: {}".format(temporary_input_content_path) ) # nopep8 logger.debug( "converting {} to pdf into folder {}".format( temporary_input_content_path, cache_path ) ) libreoffice_lock = self._get_libreoffice_lock(cache_path) cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest() with libreoffice_lock: check_call( [ "libreoffice", "--headless", "--convert-to", "pdf:writer_pdf_Export", temporary_input_content_path, "--outdir", cache_path, "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format( cache_path_hash ), # nopep8 ], stdout=DEVNULL, stderr=STDOUT, ) # HACK - D.A. - 2018-05-31 - name is defined by libreoffice # according to input file name, for homogeneity we prefer to rename it # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension # in its name it won't have the double ".pdf" if os.path.exists(output_filepath + ".pdf"): logger.debug( "renaming output file {} to {}".format(output_filepath + ".pdf", output_filepath) ) os.rename(output_filepath + ".pdf", output_filepath) with contextlib.suppress(FileNotFoundError): logger.info( "Removing temporary copy file {}".format(temporary_input_content_path) ) # nopep8 os.remove(temporary_input_content_path) logger.debug("Removing flag file {}".format(flag_file_path)) os.remove(flag_file_path) with open(output_filepath, "rb") as pdf_handle: pdf_handle.seek(0, 0) content_as_bytes = pdf_handle.read() output = BytesIO(content_as_bytes) output.seek(0, 0) return output
def convert_office_document_to_pdf( self, file_content: typing.IO[bytes], input_extension: typing.Optional[str], # example: '.dxf' cache_path: str, output_filepath: str, mimetype: str, ) -> BytesIO: logger = logging.getLogger(LOGGER_NAME) logger.debug( "converting file bytes {} to pdf file {}".format(file_content, output_filepath) ) # nopep8 if not input_extension: input_extension = mimetypes_storage.guess_extension(mimetype, strict=False) if not input_extension: raise InputExtensionNotFound("unable to found input extension from mimetype") # nopep8 temporary_input_content_path = output_filepath + input_extension # nopep8 with create_flag_file(output_filepath): logger.debug( "conversion is based on temporary file {}".format(temporary_input_content_path) ) # nopep8 if not os.path.exists(output_filepath): write_file_content(file_content, output_filepath=temporary_input_content_path) logger.debug( "temporary file written: {}".format(temporary_input_content_path) ) # nopep8 logger.debug( "converting {} to pdf into folder {}".format( temporary_input_content_path, cache_path ) ) libreoffice_lock = self._get_libreoffice_lock(cache_path) cache_path_hash = hashlib.md5(cache_path.encode("utf-8")).hexdigest() with libreoffice_lock: process = Popen( [ "libreoffice", "--headless", "--convert-to", "pdf:writer_pdf_Export", temporary_input_content_path, "--outdir", cache_path, "-env:UserInstallation=file:///tmp/LibreOffice-conversion-{}".format( cache_path_hash ), # nopep8 ], stdout=DEVNULL, stderr=STDOUT, ) process_timeout = LIBREOFFICE_PROCESS_TIMEOUT if process_timeout is not None: stop_process_timeout = process_timeout / 10 # type: typing.Optional[float] else: stop_process_timeout = None try: process.communicate(timeout=process_timeout) except Exception: try: # INFO - SG - 2021-04-16 # we waited long enough (or we got another exception), give a little time to the process # to exit cleanly logger.warning( "The preview generation for {} took too long, aborting it".format( temporary_input_content_path ) ) process.terminate() process.communicate(timeout=stop_process_timeout) raise except Exception: # too slow to exit… let's kill process.kill() process.communicate(timeout=stop_process_timeout) raise # HACK - D.A. - 2018-05-31 - name is defined by libreoffice # according to input file name, for homogeneity we prefer to rename it # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension # in its name it won't have the double ".pdf" if os.path.exists(output_filepath + ".pdf"): logger.debug( "renaming output file {} to {}".format( output_filepath + ".pdf", output_filepath ) ) os.rename(output_filepath + ".pdf", output_filepath) with contextlib.suppress(FileNotFoundError): logger.info( "Removing temporary copy file {}".format(temporary_input_content_path) ) # nopep8 os.remove(temporary_input_content_path) with open(output_filepath, "rb") as pdf_handle: pdf_handle.seek(0, 0) content_as_bytes = pdf_handle.read() output = BytesIO(content_as_bytes) output.seek(0, 0) return output
def convert_office_document_to_pdf( file_content: typing.IO[bytes], input_extension: typing.Optional[str], # example: '.dxf' cache_path: str, output_filepath: str, mimetype: str, ) -> BytesIO: logger = logging.getLogger(LOGGER_NAME) logger.debug("converting file bytes {} to pdf file {}".format( file_content, output_filepath)) # nopep8 if not input_extension: input_extension = mimetypes_storage.guess_extension(mimetype, strict=False) if not input_extension: raise InputExtensionNotFound( "unable to found input extension from mimetype") # nopep8 temporary_input_content_path = output_filepath + input_extension # nopep8 flag_file_path = create_flag_file(output_filepath) logger.debug("conversion is based on temporary file {}".format( temporary_input_content_path)) # nopep8 if not os.path.exists(output_filepath): write_file_content( file_content, output_filepath=temporary_input_content_path) # nopep8 logger.debug("temporary file written: {}".format( temporary_input_content_path)) # nopep8 logger.debug("converting {} to pdf into folder {}".format( temporary_input_content_path, cache_path)) # INFO - jumenzel - 2019-03-12 - Do not allow multiple concurrent libreoffice calls to avoid issue. # INFO - jumenzel - 2019-03-12 - Should we allow running multiple libreoffice instances ? # see https://github.com/algoo/preview-generator/issues/77 with LIBREOFFICE_CALL_LOCK: check_call( [ "libreoffice", "--headless", "--convert-to", "pdf:writer_pdf_Export", temporary_input_content_path, "--outdir", cache_path, "-env:UserInstallation=file:///tmp/LibreOffice_Conversion_${USER}", # nopep8 ], stdout=DEVNULL, stderr=STDOUT, ) # HACK - D.A. - 2018-05-31 - name is defined by libreoffice # according to input file name, for homogeneity we prefer to rename it # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension # in its name it won't have the double ".pdf" if os.path.exists(output_filepath + ".pdf"): logger.debug("renaming output file {} to {}".format( output_filepath + ".pdf", output_filepath)) os.rename(output_filepath + ".pdf", output_filepath) with contextlib.suppress(FileNotFoundError): logger.info("Removing temporary copy file {}".format( temporary_input_content_path)) # nopep8 os.remove(temporary_input_content_path) logger.debug("Removing flag file {}".format(flag_file_path)) os.remove(flag_file_path) with open(output_filepath, "rb") as pdf_handle: pdf_handle.seek(0, 0) content_as_bytes = pdf_handle.read() output = BytesIO(content_as_bytes) output.seek(0, 0) return output