def _convert_image(self, file_path: str, preview_dims: ImgDims) -> Image: """ refer: https://legacy.imagemagick.org/Usage/thumbnails/ like cmd: convert -layers merge -background white -thumbnail widthxheight \ -auto-orient -quality 85 -interlace plane input.jpeg output.jpeg """ img = Image(filename=file_path) resize_dim = compute_resize_dims(dims_in=ImgDims(width=img.width, height=img.height), dims_out=preview_dims) img.auto_orient() img.iterator_reset() img.background_color = Color("white") img.merge_layers("merge") if self.progressive: img.interlace_scheme = "plane" img.compression_quality = self.quality img.thumbnail(resize_dim.width, resize_dim.height) return img
def get_thumbnail_from_pdf(self, file): try: filename = file.name img = None # Convert PDF files imgs_pdf = Image(file=file) imgs = imgs_pdf.convert('jpeg') if imgs: img = Image(image=imgs.sequence[0]) img.background_color = Color('white') img.alpha_channel = 'remove' # resized and save the converted file img.transform(crop='', resize=THUMBNAIL_SIZE) img.thumbnail() temp = NamedTemporaryFile(delete=False) temp.flush() temp0 = File(temp) with temp0.open('wb') as f: img.save(file=f) return temp0.open('rb') except Exception as e: print(repr(e)) return None
def convex(img: Image): resize(img) img.virtual_pixel = "transparent" img.background_color = Color("white") img.distort(method="barrel", arguments=[1, 0, 0, 1]) return img
def _run_convert(filename, page, res=120): idx = page + 1 temp_time = time.time() * 1000 # 由于每次转换的时候都需要重新将整个PDF载入内存,所以这里使用内存缓存 pdfile = getPdfReader(filename) pageObj = pdfile.getPage(page) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color("white") # 保存图片 img_path = '%s%d.png' % (filename[:filename.rindex('.')], idx) img.save(filename=img_path) img.destroy() img = None pdf_bytes = None dst_pdf = None print('convert page %d cost time %d' % (idx, (time.time() * 1000 - temp_time)))
def _generate_thumbnail_image_content_file(document): content = None if document.file_on_server: content = document.unique_file.file_field.read() else: with requests.request('get', document.external_url, stream=True) as response: content = response.content temp_pdf_path = os.path.join(settings.MEDIA_ROOT, 'document_thumbnails', 'temp.pdf') with open(temp_pdf_path, 'w+') as f: f.write(content) reader = PdfReader(temp_pdf_path) if len(reader.pages) > 1: page = reader.pages[0] writer = PdfWriter() writer.addpage(page) writer.write(temp_pdf_path) images = Image(filename=temp_pdf_path, resolution=38) images.background_color = Color('white') images.alpha_channel = 'flatten' os.remove(temp_pdf_path) return ContentFile(images.make_blob('jpg'))
def pdf2image(self,dest_width, dest_height): RESOLUTION = 300 for attachment in self: img = Image(blob=attachment.datas.decode('base64'),resolution=(RESOLUTION,RESOLUTION)) img.background_color = Color('white') #img.resize(dest_width,dest_height) attachment.image = img.make_blob(format='jpg').encode('base64')
def createAndStoreThumbnail(**kwargs): # fileDirectory = "/home/akshat/Desktop/" # inFileName="test_doc_2.pdf" # outFileName="myOutputfile.jpg" imageFromPdf = Image(filename=kwargs["fileLocation"] + kwargs["fileName"]) # pages = len(imageFromPdf.sequence) # print(pages) # creates an empty Image. image = Image(width=imageFromPdf.width, height=imageFromPdf.height) # resize the empty image image.sample(470, 330) #superimpose on the empty image the argument given, at the position specified image.composite(imageFromPdf.sequence[0], top=0, left=0) image.background_color = Color("white") image.alpha_channel = 'remove' # for i in range(pages): # image.composite( # imageFromPdf.sequence[i], # top=imageFromPdf.height * i, # left=0 # ) image.format = "jpg" image.save(filename=kwargs["imageLocation"] + kwargs["imageName"] + ".jpg") return kwargs["imageName"] + ".jpg" # display(image)
def _run_convert_all(filename, res=120): '''把pdf所有页面转化为图片''' # 由于每次转换的时候都需要重新将整个PDF载入内存,所以这里使用内存缓存 pdfile, f = get_pdf_reader(filename), open(filename, "rb") # pdfile,f = get_pdf_reader2(filename) for i in range(0, pdfile.getNumPages()): temp_time = time.time() * 1000 pageobj = pdfile.getPage(i) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageobj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color("white") # 保存图片 img_path = 'dest/%s_pg%d.png' % (filename[:filename.rindex('.')], i + 1) img.save(filename=img_path) img.destroy() img, pdf_bytes, dst_pdf = None, None, None print('convert page %d cost time %dms' % (i + 1, (time.time() * 1000 - temp_time))) f.close()
def _run_convert(filename, page, res=120): '''把pdf指定页码转化为图片''' pdfile, f = get_pdf_reader(filename), open(filename, "rb") # pdfile,f = get_pdf_reader2(filename) if page <= pdfile.getNumPages(): idx = page + 1 temp_time = time.time() * 1000 # 由于每次转换的时候都需要重新将整个PDF载入内存,所以这里使用内存缓存 pageobj = pdfile.getPage(page) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageobj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color("white") # 保存图片 img_path = 'dest/%s_pg%d.png' % (filename[:filename.rindex('.')], idx) img.save(filename=img_path) img.destroy() img, pdf_bytes, dst_pdf = None, None, None print('convert page %d cost time %dms' % (idx, (time.time() * 1000 - temp_time))) else: print("pg%r list index out of range" % page) f.close()
def _pdf_thumbnail(filename): img = WandImage(filename=filename + '[0]') img.background_color = Color('white') tw, th = get_thumbnail_size(img.height, img.width, 50, 50) img.resize(tw, th) rawData = img.make_blob('jpeg') return base64.b64encode(rawData)
def _run_convert(filename, page, res=120): idx = page + 1 temp_time = time.time() * 1000 # 由于每次转换的时候都需要重新将整个PDF载入内存,所以这里使用内存缓存 pdfile = getPdfReader(filename) pageObj = pdfile.getPage(page) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color('white') img.alpha_channel = 'remove' #img.channels = 'rgb_channels' # 保存图片 #filename[:filename.rindex('.')] img_path = './image/%s.png' % (filename[filename.rindex('/pdf/') + 5:filename.rindex('.pdf')]) #) img.save(filename=img_path) img.destroy() img = None pdf_bytes = None dst_pdf = None
def image_labels_to_countable_npy(): _user = User.objects.filter(username='******')[0] _labeler = Labeler.objects.filter(user=_user)[0] labels = ImageLabel.objects.filter(labeler=_labeler) foldername = 'npy' for label in labels: parent_image = label.parentImage filename = '%s' % parent_image.name.replace('.JPG', '') outputFilenameNpy = (settings.STATIC_ROOT + settings.LABEL_FOLDER_NAME + foldername + '/' + filename + '.npy') categorylabels = label.categorylabel_set.all() height = parent_image.height width = parent_image.width total_paths = 254 masks_ndarray = np.zeros((total_paths, height, width)) ctr = 0 for cat_id, categorylabel in enumerate(categorylabels): svg = categorylabel.labelShapes paths = [] poly = [] paths = re.findall(SVGRegex.rePath, svg) poly = re.findall(SVGRegex.rePolygon, svg) shapes = paths + poly if len(paths) + len(poly) > 0: for idx, path in enumerate(shapes): print(ctr, cat_id, idx, path) img = WandImage(blob=image_string_to_SVG_string_file( image_label_string_to_SVG_string(path, height, width))) img.resize(width, height) img.background_color = WandColor('white') img.alpha_channel = 'remove' img.negate() img.threshold(0) img.format = 'png' if not os.path.exists(settings.STATIC_ROOT + settings.LABEL_FOLDER_NAME + foldername): os.makedirs(settings.STATIC_ROOT + settings.LABEL_FOLDER_NAME + foldername) outputFilename = (settings.STATIC_ROOT + settings.LABEL_FOLDER_NAME + foldername + '/' + filename + '_' + str(idx) + '_' + str(ctr) + IMAGE_FILE_EXTENSION) img.save(filename=outputFilename) im = imageio.imread(outputFilename) masks = np.array(im) category_id = categorylabel.categoryType_id cat_mask = np.where(masks == 255, category_id, masks) masks_ndarray[ctr, :, :] = cat_mask ctr = ctr + 1 else: print(ctr, cat_id, 0, 'EMPTY') masks_ndarray.resize(ctr, height, width) print(masks_ndarray.shape) np.save(outputFilenameNpy, masks_ndarray)
def pdf2image(self,dest_width, dest_height): RESOLUTION = 300 #blob = self.datas.decode('base64') #raise Warning(self.base64_decode(self.datas)) #str = self.datas + '=' *(-len(self.datas)%4) #img = Image(blob=self[0].datas.decode('base64')) #img.resize(dest_width,dest_height) #~ self[0].image = img.make_blob(format='jpg').encode('base64') img = Image(blob=self[0].datas.decode('base64'),resolution=(RESOLUTION,RESOLUTION)) img.background_color = Color('white') self[0].image = img.make_blob(format='jpg').encode('base64')
def pagify_pdf(fname): """Create a PNG image for each page of a PDF document""" all_pages = Image(filename=fname+'.pdf', resolution=250) count = 1 for page in all_pages.sequence: img = Image(page) img.format = 'png' img.background_color = Color('white') img.alpha_channel = 'remove' img.save(filename=fname+str(count)+'.png') count += 1 return count - 1
def makeKmzFile(self,filename,title): '''Produce the MapRun map, as a georeferenced JPG image in a KMZ file.''' if self.aspect == "landscape": ss = "style=streeto|paper=0.297,0.210|scale=10000|centre=" else: ss = "style=streeto|paper=0.210,0.297|scale=10000|centre=" mp = self.getMidpoint().getEPSG3857() ss += str(int(mp[1])) + "," + str(int(mp[0])) + "|title="+title+"|club=|mapid=|start=" ss += str(int(self.startFinish.getEPSG3857()[1])) + "," + str(int(self.startFinish.getEPSG3857()[0])) ss += "|crosses=|controls=" #TODO Remove controls from request for actual MapRun map ss += ",".join([str(control.getNumber()) + ",45," + str(int(control.getEPSG3857()[1])) + "," + str( int(control.getEPSG3857()[0])) for control in self.getControls()]) r = requests.get(PDF_SERVER+"/pdf/?"+ss) pdfFile = tempfile.NamedTemporaryFile(delete=False) for chunk in r.iter_content(chunk_size=128): pdfFile.write(chunk) pdfFile.close() with Image(filename=pdfFile.name, resolution=180) as img_pdf: img = Image(image=img_pdf.sequence[0]) img.background_color = Color('white') img.alpha_channel = 'remove' img.format = "jpeg" img.save(filename=pdfFile.name+".jpg") kml = Element("kml", {"xmlns": "http://www.opengis.net/kml/2.2"}) document = SubElement(kml, "Document") folder = SubElement(document, "Folder") groundOverlay = SubElement(folder, "GroundOverlay") name=SubElement(groundOverlay,"name") name.text="tile.jpg" drawOrder=SubElement(groundOverlay,"drawOrder") drawOrder.text="75" icon=SubElement(groundOverlay,"Icon") href=SubElement(icon,"href") href.text="files/tile.jpg" latLonBox=SubElement(groundOverlay,"LatLonBox") north=SubElement(latLonBox,"north") north.text=str(self.topLeft.getLat()) south=SubElement(latLonBox,"south") south.text=str(self.bottomRight.getLat()) east=SubElement(latLonBox,"east") east.text=str(self.bottomRight.getLon()) west=SubElement(latLonBox,"west") west.text=str(self.topLeft.getLon()) rotation=SubElement(latLonBox,"rotation") rotation.text="0.0" docKmlFile = tempfile.NamedTemporaryFile(delete=False) docKmlFile.write(tostring(kml)) docKmlFile.close() with zipfile.ZipFile(filename,"w") as kmzFile: kmzFile.write(docKmlFile.name,"doc.kml") kmzFile.write(pdfFile.name+".jpg","files/tile.jpg")
def convert_pdf_to_png(filenameVal): pdf = Image(filename=filenameVal, resolution=200) pages = len(pdf.sequence) image = Image(width=pdf.width, height=pdf.height * pages) for i in xrange(pages): image.composite(pdf.sequence[i], top=pdf.height * i, left=0) image.background_color = Color("white") image.alpha_channel = 'remove' image.save(filename="out.png") return remove_points_from_image(filenameVal)
def floor(img: Image): resize(img, 128) img.alpha_channel = False img.background_color = Color("light-blue") img.virtual_pixel = "tile" img.distort( method="perspective", arguments=[0, 0, 20, 61, 90, 0, 70, 63, 0, 90, 0, 83, 90, 90, 85, 88]) resize(img, 512) return img
def _floor(img: Image): x, y = img.size img.alpha_channel = False img.background_color = Color("#81cfe0") img.virtual_pixel = "tile" img.distort( method="perspective", arguments=[0, 0, 20, 61, 90, 0, 70, 63, 0, 90, 0, 83, 90, 90, 85, 88]) img.resize(x, y) return img
def toPNG(filePath, xResScale=220, yResScale=220): # converts a pdf to GIF # for each page, say og file is Goldenrod_Full.pdf # output will be Goldenrod_Full-0.png, Goldenrod_Full-1.png img = Image(filename=filePath, resolution=400) if len(img.sequence) == 1: # whole file as 1 page for now img.format = 'png' img.background_color = Color('white') img.alpha_channel = 'remove' # image resolution scale adjustment for display img.resample(x_res=xResScale, y_res=yResScale) # saving img.save(filename=filePath[:-4] + '.png') else: # multipage image for i in range(len(img.sequence)): indiv = Image(image=img.sequence[i], resolution=400) indiv.format = 'png' indiv.background_color = Color('white') indiv.alpha_channel = 'remove' # adjust resolution for display indiv.resample(x_res=xResScale, y_res=yResScale) # save it indiv.save(filename=filePath[:-4] + '-' + str(i) + '.png') # returns number of pages converted return len(img.sequence)
def toGIF(filePath, xResScale=220, yResScale=220): # converts a pdf to PNG # beware when converting to multiple pages, it might become an # animated GIF img = Image(filename=filePath, resolution=400) if len(img.sequence) == 1: # whole file as 1 page for now img.format = 'gif' img.background_color = Color('white') img.alpha_channel = 'remove' # image resolution scale adjustment for display img.resample(x_res=xResScale, y_res=yResScale) # saving img.save(filename=filePath[:-4] + '.gif') else: # multipage image for i in range(len(img.sequence)): indiv = Image(image=img.sequence[i], resolution=400) indiv.format = 'gif' indiv.background_color = Color('white') indiv.alpha_channel = 'remove' # adjust resolution for display indiv.resample(x_res=xResScale, y_res=yResScale) # save it indiv.save(filename=filePath[:-4] + '-' + str(i) + '.gif') # returns number of pages converted return len(img.sequence)
def resize_image(path, width, height): with WAND_LOCK.reader_lock, Image(width=width, height=height) as bg: # Resize our input image. with Image(filename=path, resolution=300) as s: d = Image(s.sequence[0]) d.background_color = Color("white") d.alpha_channel = 'remove' d.transform(resize='%ix%i>' % (width, height)) # Offset input image on top of background. left = (bg.width - d.width) // 2 top = (bg.height - d.height) // 2 bg.composite(d, left, top, operator='over') with tempfile.NamedTemporaryFile(delete=False, suffix='.gif') as t: bg.save(filename=t.name) return t.name
def _create_preview(self): """ create the PNG file for this pub. This is called internally after PDF generation, because there is a dependency. """ pdf_filename = opj( self.working_dir, "pub2/{0}.pdf".format(self.file.preamble["identifier"])) preview_filename = opj( self.working_dir, "pub2/{0}.png".format(self.file.preamble["identifier"])) img = Image(filename='{0}[0]'.format(pdf_filename)) img.background_color = Color("white") img.merge_layers('flatten') img.format = 'png' img.save(filename=preview_filename)
def ocr(invoice_path): option = {} try: img = Image(blob=invoice_path.read_bytes(), resolution=300) img.format = 'png' img.background_color = Color("white") img_bytes = img.make_blob() response = ocrClient.request_ocr_service_base64( req_uri, img_bytes, option) if response.status_code != 200: print("Status code:" + str(response.status_code) + "\ncontent:" + response.text) return response.json()["result"] except ValueError as e: print(e)
def writeImageNoWarp(pdf, out, resolution=pdfDense, trim=True): with Image(filename=pdf, resolution=resolution) as pdf: pages = len(pdf.sequence) height = 0 width = 0 for j in range(pages): ## Go through the sequence and trim them individually with pdf.sequence.index_context(j): if trim: pdf.trim() height += pdf.height + 10 if (width < pdf.width): width = pdf.width else: width = pdf.width height = pdf.height i = Image(width=width, height=height) height = [r.height for r in pdf.sequence] height = height[::-1] height.append(0) height = height[::-1] width = [math.floor((width - r.width) / 2) for r in pdf.sequence] for j in range(pages): i.composite(pdf.sequence[j], top=height[j] + j * 10, left=width[j]) i.background_color = Color('white') # Set white background. i.alpha_channel = 'remove' if trim: ## This is necessary because of stacked charts (above) i.trim() i.sharpen(radius=5.0, sigma=5.0) i.normalize() i.quantize(16, dither=False) i.compression_quality = 00 i.save(filename=out.replace(".png", ".png8")) tmp = out.replace(".png", "") + "*" #out = glob.glob(tmp) if (os.system("rename .png8 .png %s*" % tmp)): print("Error in saving file.") if (os.system("optipng -quiet %s*" % tmp)): print("Error in optipng.") commstr = 'cwebp -quiet -lossless -z 9 -metadata exif %s -o %s' % ( out, out.replace(".png", ".webp")) if (os.system(commstr)): print("Error in cwebp")
def convert(filename, page=0, res=120): reader = PdfFileReader(filename, strict=False) page_obj = reader.getPage(page) dst_pdf = PdfFileWriter() dst_pdf.addPage(page_obj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color('white') img_path = filename.replace('pdf', 'png') img.save(filename=img_path) img.destroy()
def read_image(image_path : str, dpi=300): assert os.path.exists(image_path) image_list = [] if image_path.endswith(".pdf"): with WandImage(filename=image_path, resolution=dpi) as img: for page_img_seq in img.sequence: page_img = WandImage(page_img_seq) page_img.background_color = Color('white') page_img.alpha_channel = 'remove' img_buffer = np.asarray(bytearray(page_img.make_blob(format='jpeg')), dtype=np.uint8) if img_buffer is not None: image_list.append(imdecode(img_buffer, IMREAD_COLOR)) elif image_path.endswith(".png") or image_path.endswith(".jpg") or image_path.endswith(".jpeg"): image_list.append(imread(image_path, IMREAD_COLOR)) else: sys.exit("Unknown input file format. Accepted inputs: .pdf, .jpg, .jpeg, .png") return image_list
def convert(filename, res=120): pdf_file = PdfFileReader(open(filename, "rb")) pageObj = pdf_file.getPage(0) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img_filename = filename.replace('old', 'img').replace('pdf', 'png') img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 120 img.background_color = Color("white") img.save(filename=img_filename) img.destroy()
def _convert_pdf2img(self, file, store_path): pdf_file = PdfFileReader(file, strict=False) pageObj = pdf_file.getPage(0) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=200) img.format = 'jpg' img.compression_quality = 90 img.background_color = Color("white") img_path = store_path + '%s.jpg' % (uuid.uuid3(uuid.uuid1(), file)) print(img_path) img.save(filename=img_path) img.destroy() return img_path
def _run_convert(filename, page, res=120): idx = page + 1 pdfile = getPdfReader(filename) pageObj = pdfile.getPage(page) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'jpg' img.compression_quality = 90 img.background_color = Color("white") img_path = '%s%d.jpg' % (filename[:filename.rindex('.')], idx) img.save(filename=img_path) img.destroy()
def _run_convert(pdfile, savedfilename, page_index, index, res=120): pageObj = pdfile.getPage(page_index) #获取pdf的第page_index页 dst_pdf = PdfFileWriter() dst_pdf.addPage(pageObj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=res) img.format = 'png' img.compression_quality = 90 img.background_color = Color("white") img_path = '%s%04d.jpg' % (savedfilename, index) img.save(filename=img_path) print(img_path) img.destroy()
pages = 1 image = Image( width = imageFromPdf.width, height = imageFromPdf.height*pages ) for i in range(pages): image.composite( imageFromPdf.sequence[i], top = imageFromPdf.height * i, left = 0 ) image.resize(250,250) image.alpha_channel = False image.format = 'png' print(image.size) image.background_color = Color('pink') image.type = 'grayscale' image.caption = file.split('.')[0] image.save(filename = fileDirectory+file.split('.')[0]+".png") image.clear() image.close() #display(image)