def test_compute_resize_dims_different_ratio_inverted(): dims_in = ImgDims(198, 600) # vertical dims_out = ImgDims(400, 100) # horizontal dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 33 assert dims_resize.height == 100
def test_compute_resize_dims_right_limits(): dims_in = ImgDims(520, 206) # vertical dims_out = ImgDims(512, 256) # horizontal dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 512 assert dims_resize.height == 203
def test_compute_resize_dims_same_format(): dims_in = ImgDims(100, 50) dims_out = ImgDims(90, 30) dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 60 assert dims_resize.height == 30
def test_compute_resize_dims_different_ratio(): dims_in = ImgDims(100, 50) # horizontal dims_out = ImgDims(200, 400) # vertical dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 200 assert dims_resize.height == 100
def image_to_jpeg_wand(self, jpeg: typing.Union[str, typing.IO[bytes]], preview_dims: ImgDims = None) -> BytesIO: ''' for jpeg, gif and bmp :param jpeg: :param size: :return: ''' logging.info('Converting image to jpeg using wand') with WImage(file=jpeg, background=Color('white')) as image: preview_dims = ImgDims(width=preview_dims.width, height=preview_dims.height) resize_dim = compute_resize_dims(dims_in=ImgDims( width=image.size[0], height=image.size[1]), dims_out=preview_dims) image.resize(resize_dim.width, resize_dim.height) content_as_bytes = image.make_blob('jpeg') output = BytesIO() output.write(content_as_bytes) output.seek(0, 0) return output
def test_compute_resize_dims_same_ratio(): dims_in = ImgDims(100, 50) dims_out = ImgDims(200, 100) dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 200 assert dims_resize.height == 100
def image_to_jpeg_wand(self, jpeg: typing.Union[str, typing.IO[bytes]], preview_dims: ImgDims = None) -> BytesIO: """ for jpeg, gif and bmp :param jpeg: :param size: :return: """ self.logger.info("Converting image to jpeg using wand") with WImage(file=jpeg, background=Color("white")) as image: preview_dims = ImgDims(width=preview_dims.width, height=preview_dims.height) resize_dim = compute_resize_dims(dims_in=ImgDims( width=image.size[0], height=image.size[1]), dims_out=preview_dims) image.resize(resize_dim.width, resize_dim.height) # INFO - jumenzel - 2019-03-12 - remove metadata, color-profiles from this image. image.strip() content_as_bytes = image.make_blob("jpeg") output = BytesIO() output.write(content_as_bytes) output.seek(0, 0) return output
def test_compute_resize_dims_same_ratio(): dims_in = ImgDims(100, 50) dims_out = ImgDims(200, 100) builder = ImagePreviewBuilderWand() dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 200 assert dims_resize.height == 100
def test_compute_resize_dims_same_format(): dims_in = ImgDims(100, 50) dims_out = ImgDims(90, 30) builder = ImagePreviewBuilderWand() dims_resize = compute_resize_dims(dims_in, dims_out) assert dims_resize.width == 60 assert dims_resize.height == 30
def test_get_preview_name_with_size() -> None: pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) from preview_generator.utils import ImgDims filehash = pm.get_preview_context("/tmp/image.jpeg", file_ext=".jpeg").hash hash = pm._get_preview_name(filehash, ImgDims(width=512, height=256)) assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-512x256"
def image_to_jpeg_pillow( self, png: typing.Union[str, typing.IO[bytes]], preview_dims: ImgDims ) -> BytesIO: self.logger.info('Converting image to jpeg using Pillow') with Image.open(png) as image: resize_dim = compute_resize_dims( dims_in=ImgDims(width=image.size[0], height=image.size[1]), dims_out=preview_dims ) image = image.resize((resize_dim.width, resize_dim.height)) output_image = Image.new( 'RGB', (resize_dim.width, resize_dim.height), (255, 255, 255) ) try: output_image.paste(image, (0, 0), image) except ValueError: self.logger.warning( 'Failed the transparency mask superposition. ' 'Maybe your image does not contain a transparency mask') output_image.paste(image) output = BytesIO() output_image.save(output, 'jpeg') output.seek(0, 0) return output
def _convert_image(self, file_path: str, preview_dims: ImgDims) -> Image: """ refer: https://legacy.imagemagick.org/Usage/thumbnails/ like cmd: convert -layers merge -background white -thumbnail widthxheight \ -auto-orient -quality 85 -interlace plane input.jpeg output.jpeg """ img = Image(filename=file_path) resize_dim = compute_resize_dims(dims_in=ImgDims(width=img.width, height=img.height), dims_out=preview_dims) img.auto_orient() img.iterator_reset() img.background_color = Color("white") img.merge_layers("merge") if self.progressive: img.interlace_scheme = "plane" img.compression_quality = self.quality img.thumbnail(resize_dim.width, resize_dim.height) return img
def build_jpeg_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int, extension: str = ".jpeg", size: ImgDims = None, mimetype: str = "", ) -> None: if not size: size = self.default_size with open(file_path, "rb") as img: result = self.image_to_jpeg_wand( img, ImgDims(width=size.width, height=size.height)) with open( "{path}{extension}".format(path=cache_path + preview_name, extension=extension), "wb", ) as jpeg: buffer = result.read(1024) while buffer: jpeg.write(buffer) buffer = result.read(1024)
def test_get_file_hash_with_size_and_page() -> None: pm = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) from preview_generator.utils import ImgDims hash = pm._get_file_hash("/tmp/image.jpeg", ImgDims(width=512, height=256), page=3) assert hash == "7f8df7223d8be60a7ac8a9bf7bd1df2a-512x256-page3"
def get_jpeg_preview( self, file_path: str, page: int = -1, width: int = None, height: int = 256, force: bool = False, file_ext: str = "", dry_run: bool = False, ) -> str: """ Return a JPEG preview of given file, according to parameters :param file_path: path of the file to preview :param page: page of the original document, if it makes sense :param width: width of the requested preview image :param height: height of the requested preview image :param force: if True, do not use cached preview. :param file_ext: extension associated to the file. Eg 'jpg'. May be empty - it's useful if the extension can't be found in file_path :param dry_run: Don't actually generate the file, but return its path as if we had :return: path to the generated preview file """ preview_context = self.get_preview_context(file_path, file_ext) if width is None: width = height size = ImgDims(width=width, height=height) extension = ".jpeg" preview_name = self._get_preview_name(preview_context.hash, size, page) preview_file_path = os.path.join(self.cache_path, preview_name + extension) # nopep8 if dry_run: return preview_file_path # INFO - G.M - 2021-04-29 deal with pivot format # jpeg preview from pdf for libreoffice/scribus # - change original file to use to pivot file (pdf preview) of the content instead of the # original file # - use preview context of this pivot pdf file. if isinstance(preview_context.builder, DocumentPreviewBuilder): file_path = self.get_pdf_preview(file_path=file_path, force=force) preview_context = self.get_preview_context(file_path, file_ext=".pdf") with preview_context.filelock: if force or not os.path.exists(preview_file_path): preview_context.builder.build_jpeg_preview( file_path=file_path, preview_name=preview_name, cache_path=self.cache_path, page_id=max(page, 0), # if page is -1 then return preview of first page, extension=extension, size=size, mimetype=preview_context.mimetype, ) return preview_file_path
def _get_extraction_size(self, video_dims: utils.ImgDims, preview_dims: utils.ImgDims) -> utils.ImgDims: """ Compute extraction dimensions. The extract size in order to directly get the right height or width according to what is expected for preview :param video_dims: ImgDims object representing width and height of the video stream :param preview_dims: ImgDims object representing width and height of the preview to generate :return: ImgDims to use for ffmpeg video frame extraction """ extract_size = utils.ImgDims(-1, -1) if video_dims.ratio() > preview_dims.ratio(): extract_size.width = preview_dims.width else: extract_size.height = preview_dims.height return extract_size
def test_png_to_jpeg_with_background_white() -> None: image_file_path = os.path.join(CURRENT_DIR, "the_png.png") to_size = ImgDims(width=512, height=256) with Image(filename=image_file_path) as input_img: input_size = ImgDims(width=input_img.width, height=input_img.height) expected_size = compute_resize_dims(input_size, to_size) manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) assert manager.has_jpeg_preview(file_path=image_file_path) is True path_to_file = manager.get_jpeg_preview( file_path=image_file_path, width=to_size.width, height=to_size.height, force=True ) assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) with Image(filename=path_to_file) as output_img: assert output_img.width == expected_size.width assert output_img.height == expected_size.height assert nearest_colour_white(output_img[5][5])
def get_jpeg_preview( self, file_path: str, page: int = -1, width: int = None, height: int = 256, force: bool = False, file_ext: str = "", ) -> str: """ Return a JPEG preview of given file, according to parameters :param file_path: path of the file to preview :param page: page of the original document, if it makes sense :param width: width of the requested preview image :param height: height of the requested preview image :param force: if True, do not use cached preview. :param file_ext: extension associated to the file. Eg 'jpg'. May be empty - it's usefull if the extension can't be found in file_path :return: path to the generated preview file """ if width is None: width = height size = ImgDims(width=width, height=height) mimetype = self._factory.get_file_mimetype(file_path, file_ext) builder = self._factory.get_preview_builder(mimetype) extension = ".jpeg" if type(builder) in [ OfficePreviewBuilderLibreoffice, DocumentPreviewBuilderScribus ]: file_path = self.get_pdf_preview(file_path=file_path, force=force) preview_name = self._get_file_hash(file_path, size, page) mimetype = self._factory.get_file_mimetype(file_path) builder = self._factory.get_preview_builder(mimetype) preview_file_path = os.path.join(self.cache_path, preview_name + extension) # nopep8 page = max(page, 0) # if page is -1 then return preview of first page if force or not os.path.exists(preview_file_path): builder.build_jpeg_preview( file_path=file_path, preview_name=preview_name, cache_path=self.cache_path, page_id=page, extension=extension, size=size, mimetype=mimetype, ) return preview_file_path
def get_jpeg_preview( self, file_path, page = -1, width = None, height = 256, force = False, ): """ Return a JPEG preview of given file, according to parameters :param file_path: path of the file to preview :param page: page of the original document, if it makes sense :param width: width of the requested preview image :param height: height of the requested preview image :param force: if True, do not use cached preview. :return: path to the generated preview file """ if width is None: width = height size = ImgDims(width=width, height=height) mimetype = self._factory.get_file_mimetype(file_path) builder = self._factory.get_preview_builder(mimetype) extension = '.jpeg' if isinstance(builder, OfficePreviewBuilderLibreoffice): file_path = self.get_pdf_preview( file_path=file_path, force=force, ) preview_name = self._get_file_hash(file_path, size, page) mimetype = self._factory.get_file_mimetype(file_path) builder = self._factory.get_preview_builder(mimetype) preview_file_path = os.path.join(self.cache_path, preview_name + extension) # nopep8 page = max(page, 0) # if page is -1 then return preview of first page if force or not os.path.exists(preview_file_path): builder.build_jpeg_preview( file_path=file_path, preview_name=preview_name, cache_path=self.cache_path, page_id=page, extension=extension, size=size ) return preview_file_path
def convert_pdf_to_jpeg(pdf: typing.Union[str, typing.IO[bytes]], preview_size: ImgDims) -> BytesIO: pdf = pdf.read() images = convert_from_bytes(pdf) output = BytesIO() for image in images: resize_dims = compute_resize_dims(ImgDims(image.width, image.height), preview_size) resized = image.resize((resize_dims.width, resize_dims.height), resample=True) resized.save(output, format="JPEG") output.seek(0, 0) return output
def build_jpeg_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int, extension: str = ".jpg", size: utils.ImgDims = None, mimetype: str = "", ) -> None: """ generate the pdf small preview """ # INFO - G.M - 2021-10-21 - Page id in pdftocairo begins at 1 instead of 0 page_id = page_id + 1 if not size: size = self.default_size with tempfile.NamedTemporaryFile("w+b", prefix="preview-generator-", suffix=".png") as tmp_png: build_png_result_code = check_call( [ PDFTOCAIRO_EXECUTABLE, "-png", "-singlefile", "-scale-to", str(size.max_dim()), "-f", str(page_id), "-l", str(page_id), file_path, # HACK - G.M - 2021-10-21 - For unclear reason, pdftocairo add a second .png # extension to the file created. tmp_png.name.rsplit(".png", 1)[0], ], stdout=DEVNULL, stderr=STDOUT, ) if build_png_result_code != 0: raise IntermediateFileBuildingFailed( "Building PNG intermediate file using pdftocairo failed with status {}" .format(build_png_result_code)) return ImagePreviewBuilderPillow().build_jpeg_preview( tmp_png.name, preview_name, cache_path, page_id, extension, size, mimetype)
def convert_pdf_to_jpeg(pdf, preview_size): with WImage(file=pdf) as img: # HACK - D.A. - 2017-08-01 # The following 2 lines avoid black background in case of transparent # objects found on the page. As we save to JPEG, this is not a problem img.background_color = Color('white') img.alpha_channel = 'remove' resize_dims = compute_resize_dims(ImgDims(img.width, img.height), preview_size) img.resize(resize_dims.width, resize_dims.height) content_as_bytes = img.make_blob('jpeg') output = BytesIO() output.write(content_as_bytes) output.seek(0, 0) return output
def build_jpeg_preview(self, file_path, preview_name, cache_path, page_id, extension='.jpeg', size=None): with open(file_path, 'rb') as img: result = self.image_to_jpeg_wand( img, ImgDims(width=size.width, height=size.height)) with open( '{path}{extension}'.format(path=cache_path + preview_name, extension=extension), 'wb') as jpeg: buffer = result.read(1024) while buffer: jpeg.write(buffer) buffer = result.read(1024)
def image_to_jpeg_pillow(self, png: typing.Union[str, typing.IO[bytes]], preview_dims: ImgDims) -> BytesIO: self.logger.info("Converting image to jpeg using Pillow") with Image.open(png) as image: resize_dim = compute_resize_dims(dims_in=ImgDims( width=image.size[0], height=image.size[1]), dims_out=preview_dims) output = BytesIO() image = image.resize((resize_dim.width, resize_dim.height), resample=self.resample_filter_algorithm) image_converter = PillowImageConvertStrategyFactory( self.logger).get_strategy(image) return image_converter.save( image, output, optimize=self.optimize, progressive=self.progressive, quality=self.quality, )
def test_to_jpeg() -> None: os.makedirs(CACHE_DIR) builder = ImagePreviewBuilderVtk() assert builder.has_jpeg_preview() is True size = ImgDims(height=256, width=512) preview_name = "stl_cube_test_vtk" builder.build_jpeg_preview( file_path=IMAGE_FILE_PATH, size=size, page_id=0, cache_path=CACHE_DIR, preview_name=preview_name, ) path_to_file = os.path.join(CACHE_DIR, "{}.jpg".format(preview_name)) assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width == 256
def test_to_jpeg() -> None: os.makedirs(CACHE_DIR) builder = VideoPreviewBuilderFFMPEG() assert builder.has_jpeg_preview() is True size = ImgDims(height=256, width=512) preview_name = "ogg_theora_big_buck_bunny_trailer_test_ffmpeg" builder.build_jpeg_preview( file_path=IMAGE_FILE_PATH, size=size, page_id=7, cache_path=CACHE_DIR, preview_name=preview_name, ) path_to_file = os.path.join(CACHE_DIR, "{}.jpg".format(preview_name)) assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width == 461
def test_to_jpeg(file: typing.Dict[str, typing.Any]) -> None: os.makedirs(CACHE_DIR) builder = ImagePreviewBuilderCairoSVG() assert builder.has_jpeg_preview() is True size = ImgDims(height=256, width=512) preview_name = "svg_tesselation_test_cairosvg" builder.build_jpeg_preview( file_path=os.path.join(CURRENT_DIR, file["name"]), size=size, page_id=0, cache_path=CACHE_DIR, preview_name=preview_name, ) path_to_file = os.path.join(CACHE_DIR, "{}.jpg".format(preview_name)) assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 with Image.open(path_to_file) as jpeg: assert jpeg.height == file["height"] assert jpeg.width == file["width"]
def build_jpeg_preview(self, file_path, preview_name, cache_path, page_id, extension='.jpg', size=None): """ generate the pdf small preview """ if not size: size = ImgDims(256, 256) with open(file_path, 'rb') as pdf: # HACK - D.A. - 2017-08-11 Deactivate strict mode # This avoid crashes when PDF are not standard # See https://github.com/mstamy2/PyPDF2/issues/244 input_pdf = PdfFileReader(pdf, strict=False) output_pdf = PdfFileWriter() output_pdf.addPage(input_pdf.getPage(int(page_id))) output_stream = BytesIO() output_pdf.write(output_stream) output_stream.seek(0, 0) result = convert_pdf_to_jpeg(output_stream, size) if page_id == -1: preview_path = '{path}{file_name}{extension}'.format( file_name=preview_name, path=cache_path, extension=extension) else: preview_path = '{path}{file_name}{extension}'.format( file_name=preview_name, path=cache_path, page_id=page_id, extension=extension) with open(preview_path, 'wb') as jpeg: buffer = result.read(1024) while buffer: jpeg.write(buffer) buffer = result.read(1024)
def test_build_jpeg_preview() -> None: wand_builder = ImagePreviewBuilderWand() test_orient_path = os.path.join(CURRENT_DIR, "the_img.png") extension = ".jpg" preview_name = "preview_the_img" width = 512 height = 256 size = ImgDims(width=width, height=height) wand_builder.build_jpeg_preview( file_path=test_orient_path, preview_name=preview_name, cache_path=CACHE_DIR, page_id=-1, size=size, extension=extension, ) preview_name = preview_name + extension dest_path = os.path.join(CACHE_DIR, preview_name) assert os.path.exists(dest_path) assert os.path.getsize(dest_path) > 0 with Image.open(dest_path) as jpg: assert jpg.height == height assert jpg.width in range(288, 290)
class PreviewBuilder(object): default_size = ImgDims(256, 256) def __init__(self, ) -> None: self.logger = logging.getLogger(LOGGER_NAME) self.logger.info("New Preview builder of class" + str(self.__class__)) @classmethod def get_supported_mimetypes(cls) -> typing.List[str]: raise NotImplementedError() @classmethod def get_label(cls) -> str: return cls.__name__ # default label is the class name @classmethod def check_dependencies(cls) -> None: """Raises a BuilderDependencyNotFound with an appropriate message if a dependency is missing. """ @classmethod def get_mimetypes_mapping(cls) -> typing.List[MimetypeMapping]: """ Get specific mimetypes mappings (mimetype/file_extension) related to builder, this allow to update mimetypes mapping use by preview_generator to help preview_generator to determine more correctly type or file_extension :return: """ return [] @classmethod def update_mimetypes_mapping(cls) -> None: """ Update mimetypes mapping with file extension in preview_generator mimetypes_storage """ for mimetypes_mapping in cls.get_mimetypes_mapping(): # INFO - G.M - 2019-11-22 - mimetype are added as strict to force override of default # system/mimetype lib value, which is needed for type like .obj where system type can be # "text/plain" or "application/octet-stream" mimetypes_storage.add_type( # type: ignore type=mimetypes_mapping.mimetype, ext=mimetypes_mapping.file_extension, strict=True) @classmethod def dependencies_versions(cls) -> typing.Optional[str]: """Tell about the version of dependencies. Returns None if there is this builder has no dependencies. """ return None def get_page_number(self, file_path: str, preview_name: str, cache_path: str, mimetype: str = "") -> int: """ Get the number of page of the document """ raise UnavailablePreviewType() def build_jpeg_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int, extension: str = ".jpg", size: ImgDims = None, mimetype: str = "", ) -> None: """ generate the jpg preview """ raise UnavailablePreviewType() def has_pdf_preview(self) -> bool: """ Override and return True if your builder allow PDF preview :return: """ return False def has_jpeg_preview(self) -> bool: """ Override and return True if your builder allow jpeg preview """ return False def has_json_preview(self) -> bool: """ Override and return True if your builder allow json preview """ return True def has_text_preview(self) -> bool: """ Override and return True if your builder allow text preview """ return False def has_html_preview(self) -> bool: """ Override and return True if your builder allow html preview """ return False def build_pdf_preview( self, file_path: str, preview_name: str, cache_path: str, extension: str = ".pdf", page_id: int = -1, mimetype: str = "", ) -> None: """ generate pdf preview. No default implementation """ raise UnavailablePreviewType( "No builder registered for PDF preview of {}".format(file_path)) def build_html_preview(self, file_path: str, preview_name: str, cache_path: str, extension: str = ".html") -> None: """ generate the html preview. No default implementation """ raise UnavailablePreviewType() def build_json_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int = 0, extension: str = ".json", ) -> None: """ generate the json preview. Default implementation is based on ExifTool """ metadata = pyexifinfo.get_json(file_path)[0] with open(cache_path + preview_name + extension, "w") as jsonfile: json.dump(metadata, jsonfile) def build_text_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int = 0, extension: str = ".txt", ) -> None: """ generate the text preview. No default implementation """ raise UnavailablePreviewType()