def iter_images_and_pages(images): """This function iterates over a images and also the contained pages. As OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal loading method for those.""" for filename in images: try: # Check whether this is a TIFF file (ie. try to retrieve the page count) pages = image.get_tiff_page_count(filename) is_tiff = True except AssertionError: pages = 1 is_tiff = False for page in xrange(pages): if not is_tiff: img = cv2.imread(filename) else: # TIFF pages are zero based surf = image.get_rgb24_from_tiff(filename, page, False) width = surf.get_width() height = surf.get_height() stride = surf.get_stride() # We need to ensure a sane stride! np_width = stride / 4 # This converts by doing a copy; first create target numpy array # We need a dummy alpha channel ... target = np.empty((height, np_width), dtype=np.uint32) tmp_surf = cairo.ImageSurface.create_for_data( target.data, cairo.FORMAT_RGB24, width, height, stride) cr = cairo.Context(tmp_surf) cr.set_source_surface(surf) cr.paint() del cr tmp_surf.flush() del tmp_surf # Now, we need a bit of reshaping img = np.empty((height, width, 3), dtype=np.uint8) # order should be BGR img[:, :, 2] = 0xff & (target[:, :] >> 16) img[:, :, 1] = 0xff & (target[:, :] >> 8) img[:, :, 0] = 0xff & target[:, :] yield img, filename, page
def iter_images_and_pages(images): """This function iterates over a images and also the contained pages. As OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal loading method for those.""" for filename in images: try: # Check whether this is a TIFF file (ie. try to retrieve the page count) pages = image.get_tiff_page_count(filename) is_tiff = True except AssertionError: pages = 1 is_tiff = False for page in xrange(pages): if not is_tiff: img = cv2.imread(filename) else: # TIFF pages are zero based surf = image.get_rgb24_from_tiff(filename, page, False) width = surf.get_width() height = surf.get_height() stride = surf.get_stride() # We need to ensure a sane stride! np_width = stride / 4 # This converts by doing a copy; first create target numpy array # We need a dummy alpha channel ... target = np.empty((height, np_width), dtype=np.uint32) tmp_surf = cairo.ImageSurface.create_for_data(target.data, cairo.FORMAT_RGB24, width, height, stride) cr = cairo.Context(tmp_surf) cr.set_source_surface(surf) cr.paint() del cr tmp_surf.flush() del tmp_surf # Now, we need a bit of reshaping img = np.empty((height, width, 3), dtype=np.uint8) # order should be BGR img[:,:,2] = 0xff & (target[:,:] >> 16) img[:,:,1] = 0xff & (target[:,:] >> 8) img[:,:,0] = 0xff & target[:,:] yield img, filename, page
def survey_image(request, slug, filenum, page): # This function does not open the real SDAPS survey, as unpickling the data # is way to inefficient. survey = get_survey_or_404(request, slug, review=True) image_file = os.path.join(survey.path, "%s.tif" % (filenum,)) if not os.path.exists(os.path.join(survey.path)): raise Http404 surface = image.get_rgb24_from_tiff(image_file, int(page), False) if surface is None: raise Http404 # Create PNG stream and return it response = HttpResponse(content_type='image/png') response['Cache-Control'] = 'private, max-age=3600' surface.write_to_png(response) return response
def survey_image(request, slug, filenum, page): # This function does not open the real SDAPS survey, as unpickling the data # is way to inefficient. survey = get_survey_or_404(request, slug, review=True) image_file = os.path.join(survey.path, "%s.tif" % (filenum, )) if not os.path.exists(os.path.join(survey.path)): raise Http404 surface = image.get_rgb24_from_tiff(image_file, int(page), False) if surface is None: raise Http404 # Create PNG stream and return it response = HttpResponse(content_type='image/png') response['Cache-Control'] = 'private, max-age=3600' surface.write_to_png(response) return response
def iter_images_and_pages(images): """This function iterates over a images and also the contained pages. As OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal loading method for those.""" for filename in images: pages = 1 is_tiff = False is_pdf = False try: # Check whether this is a TIFF file (ie. try to retrieve the page count) pages = image.get_tiff_page_count(filename) is_tiff = True except AssertionError: pass if not is_tiff: try: gfile = Gio.File.new_for_path(filename) pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None) pages = pdf_doc.get_n_pages() is_pdf = True except: # Either not PDF/damaged or poppler not installed properly pass for page in xrange(pages): if is_tiff: # TIFF pages are zero based surf = image.get_rgb24_from_tiff(filename, page, False) img = to_opencv(surf) elif is_pdf: # Try to retrieve a single fullpage image, if that fails, render # document at 300dpi. THRESH = 10 #pt pdfpage = pdf_doc.get_page(page) page_width, page_height = pdfpage.get_size() images = pdfpage.get_image_mapping() if len(images) == 1 and ( abs(images[0].area.x1) < THRESH and abs(images[0].area.y1) < THRESH and abs(images[0].area.x2 - page_width) < THRESH and abs(images[0].area.y2 - page_height) < THRESH): # Assume one full page image, and simply use that. surf = pdfpage.get_image(images[0].image_id) else: # Render page at 300dpi surf = cairo.ImageSurface(cairo.FORMAT_RGB24, int(300 / 72 * page_width), int(300 / 72 * page_height)) cr = cairo.Context(surf) cr.scale(300 / 72, 300 / 72) cr.set_source_rgb(1, 1, 1) cr.paint() pdfpage.render_for_printing(cr) del cr img = to_opencv(surf) else: img = cv2.imread(filename) yield img, filename, page
def iter_images_and_pages(images): """This function iterates over a images and also the contained pages. As OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal loading method for those.""" for filename in images: if not os.path.exists(filename): raise IOError(errno.ENOENT, _("File does not exist"), filename) pages = 1 is_tiff = False is_pdf = False try: # Check whether this is a TIFF file (ie. try to retrieve the page count) pages = image.get_tiff_page_count(filename) is_tiff = True except AssertionError: pass if not is_tiff: try: gfile = Gio.File.new_for_path(filename) pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None) pages = pdf_doc.get_n_pages() is_pdf = True except: # Either not PDF/damaged or poppler not installed properly pass for page in range(pages): if is_tiff: # TIFF pages are zero based surf = image.get_rgb24_from_tiff(filename, page, False) img = to_opencv(surf) elif is_pdf: # Try to retrieve a single fullpage image, if that fails, render # document at 300dpi. THRESH = 10 #pt pdfpage = pdf_doc.get_page(page) page_width, page_height = pdfpage.get_size() images = pdfpage.get_image_mapping() if len(images) == 1 and ( abs(images[0].area.x1) < THRESH and abs(images[0].area.y1) < THRESH and abs(images[0].area.x2 - page_width) < THRESH and abs(images[0].area.y2 - page_height) < THRESH): # Assume one full page image, and simply use that. surf = pdfpage.get_image(images[0].image_id) else: dpi = 0 # Try to detect the DPI of the scan for img in images: if img.area.y2 - img.area.y1 < page_height / 2: continue surf = pdfpage.get_image(img.image_id) # Calculate DPI from height dpi_x = round(surf.get_height() / (img.area.y2 - img.area.y1) * 72) dpi_y = round(surf.get_width() / (img.area.x2 - img.area.x1) * 72) if abs(dpi_x - dpi_y) <= 1: dpi = max(dpi, dpi_x, dpi_y) # Fall back to 300dpi for odd values if dpi < 199 or dpi > 601: dpi = 300 surf = cairo.ImageSurface(cairo.FORMAT_RGB24, int(dpi / 72 * page_width), int(dpi / 72 * page_height)) cr = cairo.Context(surf) cr.scale(dpi / 72, dpi / 72) cr.set_source_rgb(1, 1, 1) cr.paint() pdfpage.render_for_printing(cr) del cr img = to_opencv(surf) else: img = cv2.imread(filename) yield img, filename, page