def getTextSizeFromImage(image): width, height = image.size colors = image.getcolors(width * height) background_color = 255 if colors: colors.sort() background_color = (list(colors[-1][1])).pop() text_sizes = [0] for i in xrange(getTextBeginHeight(image), height): current_line = image.crop((0, i - 1, width, i)) current_line.convert('L') blank_line = True for i in range(0, current_line.size[0], 3): color = current_line.getpixel((i, 0)) if colorsContrast(list(color).pop(), background_color): blank_line = False break if blank_line: if text_sizes[-1]: text_sizes.append(0) else: text_sizes[-1] += 1 text_sizes.sort() text_sizes = [i for i in text_sizes if i != 0] text_size = 0 if text_sizes: text_sizes_avg = sum(text_sizes) / len(text_sizes) for i in text_sizes: if i > text_sizes_avg: text_size = math.floor(i) break text_size = max(text_sizes) debug('Text Size: ', text_size) return text_size
def getAllBlocks(self): blocks = self.extendBlocksByBelongingSingles() blocks = self.unifyBlocks(blocks) if OCRFEEDER_DEBUG: for block in blocks: debug(block) return blocks
def addText(self, data_box): text = data_box.getText() frame_style = Style(name='FrameStyle', family='graphic') debug('Angle: ', data_box.text_data.angle) angle = data_box.text_data.angle if angle: frame_style = Style(name='FrameStyleRotated', family='graphic') x, y, width, height = data_box.getBoundsPrintSize( self.current_page_resolution) frame = Frame(stylename=frame_style, width=str(width) + 'in', height=str(height) + 'in', x=str(x) + 'in', y=str(y) + 'in', anchortype='paragraph') if angle: frame.addAttribute( 'transform', 'rotate (%s) translate (%scm %scm)' % (abs(math.radians(angle)), x, y)) self.current_page.addElement(frame) textbox = TextBox() frame.addElement(textbox) for line in text.split('\n'): textbox.addElement( P(stylename=self.__handleFrameStyle(data_box.text_data), text=line))
def save(self): name = self.name if not name.lower().endswith(".odt"): name += ".odt" self.document.save(name) for image in self.temp_images: try: os.unlink(image) except: debug("Error removing image: %s" % image)
def save(self): name = self.name if not name.lower().endswith('.odt'): name += '.odt' self.document.save(name) for image in self.temp_images: try: os.unlink(image) except: debug('Error removing image: %s' % image)
def __init__(self, path_to_image, window_size=None): self.window_size = window_size if os.path.isfile(path_to_image): try: self.original_image = Image.open(path_to_image) self.black_n_white_image = self.original_image.convert('L') if not self.window_size: self.window_size = self.original_image.size[1] / 60. debug('Window Size: ', self.window_size) except: raise ImageManipulationError else: raise IOError
def choosePageSize(self): current_reviewer = self.__getCurrentReviewer() current_page = current_reviewer.page page_size_dialog = PageSizeDialog((current_page.width, current_page.height)) response = page_size_dialog.run() if response == gtk.RESPONSE_ACCEPT: size = page_size_dialog.getSize() if page_size_dialog.all_pages_radio.get_active(): for key, reviewer in self.image_reviewer_dict.items(): reviewer.page.setSize(size) else: current_reviewer.page.setSize(size) debug('Page size: ', size) page_size_dialog.destroy()
def choosePageSize(self): current_reviewer = self.__getCurrentReviewer() current_page = current_reviewer.page page_size_dialog = PageSizeDialog( (current_page.width, current_page.height)) response = page_size_dialog.run() if response == gtk.RESPONSE_ACCEPT: size = page_size_dialog.getSize() if page_size_dialog.all_pages_radio.get_active(): for key, reviewer in self.image_reviewer_dict.items(): reviewer.page.setSize(size) else: current_reviewer.page.setSize(size) debug('Page size: ', size) page_size_dialog.destroy()
def loadConfiguration(self, folder = None): folder = folder or self.configuration_dir project_xml = os.path.join(folder, 'project.xml') if not project_xml: return None document = minidom.parse(project_xml) root_node = document.documentElement images_node = document.getElementsByTagName('image') images = self.__getImagesInfo(images_node) page_data_nodes = document.getElementsByTagName('PageData') pages = [] for page_data in self.__getPageDataInfo(page_data_nodes): debug('Page Data:', page_data) data_boxes = [] for data_box in page_data['data_boxes']: args = [] # text variable is to avoid problems with # escaping characters text = '' for var_name, value in data_box.items(): if var_name == 'text': text = value continue real_value = '"""%s"""' % re.escape(value) try: real_value = int(value) except ValueError: pass args.append('%s = %s' % (var_name, real_value)) exec('box = DataBox(%s)' % ', '.join(args)) box.text = text data_boxes.append(box) image_path = page_data['image_path'] if not os.path.exists(image_path): image_path = os.path.join(self.configuration_dir, 'images', images[image_path]) page = PageData(image_path, data_boxes) pages.append(page) return pages
def loadConfiguration(self, folder=None): folder = folder or self.configuration_dir project_xml = os.path.join(folder, "project.xml") if not project_xml: return None document = minidom.parse(project_xml) root_node = document.documentElement images_node = document.getElementsByTagName("image") images = self.__getImagesInfo(images_node) page_data_nodes = document.getElementsByTagName("PageData") pages = [] for page_data in self.__getPageDataInfo(page_data_nodes): debug("Page Data:", page_data) data_boxes = [] for data_box in page_data["data_boxes"]: args = [] # text variable is to avoid problems with # escaping characters text = "" for var_name, value in data_box.items(): if var_name == "text": text = value continue real_value = '"""%s"""' % re.escape(value) try: real_value = int(value) except ValueError: pass args.append("%s = %s" % (var_name, real_value)) exec("box = DataBox(%s)" % ", ".join(args)) box.text = text data_boxes.append(box) image_path = page_data["image_path"] if not os.path.exists(image_path): image_path = os.path.join(self.configuration_dir, "images", images[image_path]) page = PageData(image_path, data_boxes) pages.append(page) return pages
def addText(self, data_box): text = data_box.getText() frame_style = Style(name="FrameStyle", family="graphic") debug("Angle: ", data_box.text_data.angle) angle = data_box.text_data.angle if angle: frame_style = Style(name="FrameStyleRotated", family="graphic") x, y, width, height = data_box.getBoundsPrintSize(self.current_page_resolution) frame = Frame( stylename=frame_style, width=str(width) + "in", height=str(height) + "in", x=str(x) + "in", y=str(y) + "in", anchortype="paragraph", ) if angle: frame.addAttribute("transform", "rotate (%s) translate (%scm %scm)" % (abs(math.radians(angle)), x, y)) self.current_page.addElement(frame) textbox = TextBox() frame.addElement(textbox) for line in text.split("\n"): textbox.addElement(P(stylename=self.__handleFrameStyle(data_box.text_data), text=line))
def OLDgetTextSizeFromImage(image): width, height = image.size global count debug('Image #%s' % count) image.save('/tmp/img%s.png' % count, 'PNG') count += 1 text_sizes = [0] for i in xrange(getTextBeginHeight(image), height): current_line = image.crop((0, i - 1, width, i)) current_line.convert('L') blank_line = True colors = current_line.getcolors() background_color = 255 if colors: colors.sort() background_color = (list(colors[-1][1])).pop() for color in colors: if colorsContrast(list(color[1]).pop(), background_color): blank_line = False break if blank_line: if text_sizes[-1]: text_sizes.append(0) else: text_sizes[-1] += 1 text_sizes.sort() text_sizes = [i for i in text_sizes if i != 0] text_size = 0 if text_sizes: text_sizes_avg = sum(text_sizes) / len(text_sizes) for i in text_sizes: if i > text_sizes_avg: text_size = math.floor(i) break text_size = max(text_sizes) debug('Text Size: ', text_size) return text_size
def OLDgetTextSizeFromImage(image): width, height = image.size global count debug('Image #%s' % count) image.save('/tmp/img%s.png' % count , 'PNG') count += 1 text_sizes = [0] for i in xrange(getTextBeginHeight(image), height): current_line = image.crop((0, i - 1, width, i)) current_line.convert('L') blank_line = True colors = current_line.getcolors() background_color = 255 if colors: colors.sort() background_color = (list(colors[-1][1])).pop() for color in colors: if colorsContrast(list(color[1]).pop(), background_color): blank_line = False break if blank_line: if text_sizes[-1]: text_sizes.append(0) else: text_sizes[-1] += 1 text_sizes.sort() text_sizes = [i for i in text_sizes if i != 0] text_size = 0 if text_sizes: text_sizes_avg = sum(text_sizes) / len(text_sizes) for i in text_sizes: if i > text_sizes_avg: text_size = math.floor(i) break text_size = max(text_sizes) debug('Text Size: ', text_size) return text_size
def performOcr(self, engine_name = None): selected_engine_index = self.box_editor.getSelectedOcrEngine() if engine_name: for i in xrange(len(self.ocr_engines)): if self.ocr_engines[i][0].name == engine_name: selected_engine_index = i break self.box_editor.selectOcrEngine(selected_engine_index) image = graphics.convertPixbufToImage(self.box_editor.getImage()) angle = self.box_editor.getAngle() if angle: image = graphics.getImageRotated(image, angle) if selected_engine_index != None: engine = self.ocr_engines[selected_engine_index][0] engine.setImage(image) text = engine.read() self.box_editor.setText(text) debug('Finished reading') text_size = graphics.getTextSizeFromImage(image) if text_size: y_resolution = float(self.reviewer.page.resolution[1]) text_size /= y_resolution text_size *= 72.0 self.box_editor.setFontSize(math.floor(text_size))
def performOcr(self, engine_name=None): selected_engine_index = self.box_editor.getSelectedOcrEngine() if engine_name: for i in xrange(len(self.ocr_engines)): if self.ocr_engines[i][0].name == engine_name: selected_engine_index = i break self.box_editor.selectOcrEngine(selected_engine_index) image = graphics.convertPixbufToImage(self.box_editor.getImage()) angle = self.box_editor.getAngle() if angle: image = graphics.getImageRotated(image, angle) if selected_engine_index != None: engine = self.ocr_engines[selected_engine_index][0] engine.setImage(image) text = engine.read() self.box_editor.setText(text) debug('Finished reading') text_size = graphics.getTextSizeFromImage(image) if text_size: y_resolution = float(self.reviewer.page.resolution[1]) text_size /= y_resolution text_size *= 72.0 self.box_editor.setFontSize(math.floor(text_size))
def __pressedAngleDetectionButton(self, widget): image = graphics.convertPixbufToImage(self.box_editor.getImage()) angle = graphics.getHorizontalAngleForText(image) debug('ANGLE: ', angle) self.box_editor.setAngle(angle)