def addImages(self, image_path_list): item_list = [] item_list_length = len(image_path_list) if not self.configuration_manager.deskew_images_after_addition and \ not self.configuration_manager.unpaper_images_after_addition: for index in range(0, len(image_path_list)): if not self.__addImage(image_path_list[index], index == 0): debug('Failed to load image "%s"' % image_path_list[index]) return dialog = QueuedEventsProgressDialog(self.main_window.window) for index in range(0, item_list_length): image_path = image_path_list[index] item = AsyncItem( self.__imagePreProcessing, (image_path, ), self.__imagePreProcessingFinishedCb, (dialog, index == item_list_length - 1, index == 0)) if item_list_length == 1: item_info = (_('Preparing image'), _(u'Please wait…')) else: item_info = (_('Preparing image %(current_index)s/%(total)s') % \ {'current_index': index + 1, 'total': item_list_length}, _(u'Please wait…')) item_list.append((item_info, item)) dialog.setItemsList(item_list) dialog.run()
def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): engine = self.getEngineFromXml(xml_file) if engine: self.ocr_engines.append((engine, xml_file)) favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name if not len(self.ocr_engines): lib.debug("Warning: no engines found!") elif not favorite_engine_exists: self.configuration_manager.favorite_engine = self.ocr_engines[0][ 0].name engines_needing_update = {'auto': [], 'manual': []} for engine, path in self.ocr_engines: path = engine.engine_path default_conf = \ self.configuration_manager.getEngineDefaultConfiguration(path) if default_conf is None: continue if float(engine.version) < float(default_conf['version']): update_type = 'manual' for arguments in default_conf['old_arguments']: if engine.arguments == arguments: update_type = 'auto' break engines_needing_update[update_type].append({ 'engine': engine, 'configuration': default_conf }) return engines_needing_update
def getAllBlocks(self): blocks = self.extendBlocksByBelongingSingles() blocks = self.unifyBlocks(blocks) if OCRFEEDER_DEBUG: for block in blocks: debug(block) return blocks
def performOcrForDataBox(self, data_box, engine): if engine.hasLanguages(): engine.setLanguage(data_box.getLanguage()) pixbuf_width = self.image_pixbuf.get_width() pixbuf_height = self.image_pixbuf.get_height() new_pixbuf_width = min(data_box.getWidth(), pixbuf_width) new_pixbuf_height = min(data_box.getHeight(), pixbuf_height) subpixbuf = self.image_pixbuf.new_subpixbuf(data_box.getX(), data_box.getY(), new_pixbuf_width, new_pixbuf_height) subpixbuf.x = data_box.getX() subpixbuf.y = data_box.getY() subpixbuf.width = pixbuf_width image = graphics.convertPixbufToImage(subpixbuf) layout_analysis = LayoutAnalysis(engine, clean_text = self.configuration_manager.clean_text) text = layout_analysis.readImage(image) data_box.setText(text) self.main_window.copy_to_clipboard_menu.set_sensitive(True) self.main_window.spellchecker_menu.set_sensitive(True) debug('Finished reading') text_size = layout_analysis.getTextSizeFromImage(image, self.page.resolution[1]) if text_size: data_box.setFontSize(text_size)
def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): engine = self.getEngineFromXml(xml_file) if engine: self.ocr_engines.append((engine, xml_file)) favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name if not len(self.ocr_engines): lib.debug("Warning: no engines found!") elif not favorite_engine_exists: self.configuration_manager.favorite_engine = self.ocr_engines[0][0].name engines_needing_update = {'auto': [], 'manual': []} for engine, path in self.ocr_engines: path = engine.engine_path default_conf = \ self.configuration_manager.getEngineDefaultConfiguration(path) if default_conf is None: continue if float(engine.version) < float(default_conf['version']): update_type = 'manual' for arguments in default_conf['old_arguments']: if engine.arguments == arguments: update_type = 'auto' break engines_needing_update[update_type].append({'engine': engine, 'configuration': default_conf}) return engines_needing_update
def addImages(self, image_path_list): item_list = [] temp_dir = self.configuration_manager.TEMPORARY_FOLDER image_path_list = graphics.convertMultiImagesInList(image_path_list, temp_dir) item_list_length = len(image_path_list) if not self.configuration_manager.deskew_images_after_addition and \ not self.configuration_manager.unpaper_images_after_addition: for index in range(0, len(image_path_list)): if not self.__addImage(image_path_list[index], index == 0): debug('Failed to load image "%s"' % image_path_list[index]) return dialog = QueuedEventsProgressDialog(self.main_window) for index in range(0, item_list_length): image_path = image_path_list[index] item = AsyncItem(self.__imagePreProcessing, (image_path,), self.__imagePreProcessingFinishedCb, (dialog, index == item_list_length - 1, index == 0)) if item_list_length == 1: item_info = (_('Preparing image'), _(u'Please wait…')) else: item_info = (_('Preparing image %(current_index)s/%(total)s') % \ {'current_index': index + 1, 'total': item_list_length}, _(u'Please wait…')) item_list.append((item_info,item)) dialog.setItemsList(item_list) dialog.run()
def updateBackgroundImage(self, image_path): self.path_to_image = self.page.image_path = image_path if not os.path.exists(self.path_to_image): return try: self.image_pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.path_to_image) except Exception, exception: debug(exception.message) return
def save(self): name = self.name if not name.lower().endswith('.odt'): name += '.odt' self.document.save(name) for image in self.temp_images: try: os.unlink(image) except: debug('Error removing image: %s' % image)
def updateBackgroundImage(self, image_path): self.path_to_image = self.page.image_path = image_path if not os.path.exists(self.path_to_image): return try: self.image_pixbuf = gtk.gdk.pixbuf_new_from_file( self.path_to_image) except Exception, exception: debug(exception.message) return
def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): try: engine = self.getEngineFromXml(xml_file) self.ocr_engines.append((engine, xml_file)) except WrongSettingsForEngine, we: lib.debug("Cannot load engine at %s: %s" %( xml_file, str(we))) else: favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name
def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): try: engine = self.getEngineFromXml(xml_file) self.ocr_engines.append((engine, xml_file)) except WrongSettingsForEngine, we: lib.debug("Cannot load engine at %s: %s" % (xml_file, str(we))) else: favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name
def getEngineFromXml(self, xml_file_name): document = ET.parse(xml_file_name) root_node = document.getroot() arguments = {} for child in root_node.getchildren(): arg_name = child.tag arg_value = child.text arguments[arg_name] = arg_value try: engine = Engine(**arguments) except TypeError, exception: lib.debug('Error when unserializing engine: %s', exception.message) engine = None
def choosePageSize(self): current_reviewer = self.__getCurrentReviewer() current_page = current_reviewer.page page_size_dialog = PageSizeDialog((current_page.width, current_page.height)) response = page_size_dialog.run() if response == gtk.RESPONSE_ACCEPT: size = page_size_dialog.getSize() if page_size_dialog.all_pages_radio.get_active(): for page in self.source_images_selector_widget.getAllPages(): page.setSize(size) else: current_reviewer.page.setSize(size) debug('Page size: ', size) page_size_dialog.destroy() self.__updateStatusBar(current_reviewer)
def choosePageSize(self): current_reviewer = self.__getCurrentReviewer() current_page = current_reviewer.page page_size_dialog = PageSizeDialog( (current_page.width, current_page.height)) response = page_size_dialog.run() if response == gtk.RESPONSE_ACCEPT: size = page_size_dialog.getSize() if page_size_dialog.all_pages_radio.get_active(): for page in self.source_images_selector_widget.getAllPages(): page.setSize(size) else: current_reviewer.page.setSize(size) debug('Page size: ', size) page_size_dialog.destroy() self.__updateStatusBar(current_reviewer)
def addText(self, data_box): text = data_box.getText() frame_style = Style(name='FrameStyle', family = 'graphic') debug('Angle: ', data_box.text_data.angle) angle = data_box.text_data.angle if angle: frame_style = Style(name='FrameStyleRotated', family = 'graphic') x, y, width, height = data_box.getBoundsPrintSize(self.current_page_resolution) frame = Frame(stylename = frame_style, width = str(width) + 'in', height = str(height) + 'in', x = str(x) + 'in', y = str(y) + 'in', anchortype = 'paragraph') if angle: frame.addAttribute('transform', 'rotate (%s) translate (%scm %scm)' % (abs(math.radians(angle)), x, y)) self.current_page.addElement(frame) textbox = TextBox() frame.addElement(textbox) for line in text.split('\n'): textbox.addElement(P(stylename = self.__handleFrameStyle(data_box.text_data), text = line))
class OcrEnginesManager: def __init__(self, configuration_manager): self.ocr_engines = [] self.configuration_manager = configuration_manager def getEnginesNames(self): return [engine.name for engine, path in self.ocr_engines] def getEnginePath(self, engine): for eng, path in self.ocr_engines: if eng == engine: return path return None def replaceEngine(self, engine, new_engine): for i in xrange(len(self.ocr_engines)): eng, path = self.ocr_engines[i] if eng == engine: new_path = self.engineToXml(new_engine, path) self.ocr_engines[i] = new_engine, path return True return False def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): try: engine = self.getEngineFromXml(xml_file) self.ocr_engines.append((engine, xml_file)) except WrongSettingsForEngine, we: lib.debug("Cannot load engine at %s: %s" % (xml_file, str(we))) else: favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name if not len(self.ocr_engines): lib.debug("Warning: no engines found!") elif not favorite_engine_exists: self.configuration_manager.favorite_engine = self.ocr_engines[0][ 0].name engines_needing_update = {'auto': [], 'manual': []} for engine, path in self.ocr_engines: path = engine.engine_path default_conf = \ self.configuration_manager.getEngineDefaultConfiguration(path) if default_conf is None: continue if float(engine.version) < float(default_conf['version']): update_type = 'manual' for arguments in default_conf['old_arguments']: if engine.arguments == arguments: update_type = 'auto' break engines_needing_update[update_type].append({ 'engine': engine, 'configuration': default_conf }) return engines_needing_update
def choosePageSize(self): current_reviewer = self.__getCurrentReviewer() current_page = current_reviewer.page page_size_dialog = PageSizeDialog(self.main_window, (current_page.width, current_page.height)) response = page_size_dialog.run() if response == Gtk.ResponseType.ACCEPT: size = page_size_dialog.getSize() if page_size_dialog.all_pages_radio.get_active(): for page in self.pages_icon_view.getAllPages(): page.setSize(size) else: current_reviewer.page.setSize(size) debug('Page size: ', size) page_size_dialog.destroy() self.__updateStatusBar(current_reviewer)
def performOcrForDataBox(self, data_box, engine): pixbuf_width = self.image_pixbuf.get_width() pixbuf_height = self.image_pixbuf.get_height() subpixbuf = self.image_pixbuf.subpixbuf( data_box.getX(), data_box.getY(), min(data_box.getWidth(), pixbuf_width), min(data_box.getHeight(), pixbuf_height)) image = graphics.convertPixbufToImage(subpixbuf) layout_analysis = LayoutAnalysis( engine, clean_text=self.configuration_manager.clean_text) text = layout_analysis.readImage(image) data_box.setText(text) self.main_window.copy_to_clipboard_menu.set_sensitive(True) self.main_window.spellchecker_menu.set_sensitive(True) debug('Finished reading') text_size = layout_analysis.getTextSizeFromImage( image, self.page.resolution[1]) if text_size: data_box.setFontSize(text_size)
def addText(self, box): x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution) text = self.canvas.beginText() # Make the text transparent if we are not # creating a PDF from scratch if not self._from_scratch: text.setTextRenderMode(3) text.setTextOrigin(x * units.inch, (self.page_data.height - y) * units.inch) text.setCharSpace(box.text_data.letter_space) text.setLeading(box.text_data.line_space + box.text_data.size) text.moveCursor(0, box.text_data.size) try: self.setFont(box.text_data.face, box.text_data.size) except: debug('Error setting font %s' % box.text_data.face) self.canvas.setFontSize(box.text_data.size) text.textLines(box.text) self.canvas.drawText(text)
def loadConfiguration(self, folder = None): folder = folder or self.configuration_dir project_xml = os.path.join(folder, 'project.xml') if not project_xml: return None document = minidom.parse(project_xml) root_node = document.documentElement images_node = document.getElementsByTagName('image') images = self.__getImagesInfo(images_node) page_data_nodes = document.getElementsByTagName('PageData') pages = [] for page_data in self.__getPageDataInfo(page_data_nodes): debug('Page Data:', page_data) data_boxes = [] for data_box in page_data['data_boxes']: args = [] # text variable is to avoid problems with # escaping characters text = '' for var_name, value in data_box.items(): if var_name == 'text': text = value continue real_value = '"""%s"""' % re.escape(value) try: real_value = int(value) except ValueError: pass args.append('%s = %s' % (var_name, real_value)) exec('box = DataBox(%s)' % ', '.join(args)) box.text = text data_boxes.append(box) image_path = page_data['image_path'] if not os.path.exists(image_path): image_path = os.path.join(self.configuration_dir, 'images', images[image_path]) page = PageData(image_path, data_boxes) pages.append(page) return pages
def __init__(self, path_to_image, window_size = None, contrast_tolerance = 120): self.window_size = window_size self.contrast_tolerance = contrast_tolerance error_message = _("A problem occurred while trying to open the image:\n %s\n" "Ensure the image exists or try converting it to another format.") % path_to_image if os.path.isfile(path_to_image): try: self.original_image = Image.open(path_to_image) self.black_n_white_image = self.original_image.convert('L') if not self.window_size: self.window_size = self.original_image.size[1] / 60. debug('Window Size: ', self.window_size) except: debug(sys.exc_info()) raise ImageManipulationError(error_message) else: debug(sys.exc_info()) raise ImageManipulationError(error_message) self.bg_color = 255
def __pressedAngleDetectionButton(self, widget): image = graphics.convertPixbufToImage(self.box_editor.getImage()) angle = graphics.getHorizontalAngleForText(image) debug('ANGLE: ', angle) self.box_editor.setAngle(angle)
class OcrEnginesManager: def __init__(self, configuration_manager): self.ocr_engines = [] self.configuration_manager = configuration_manager def getEnginesNames(self): return [engine.name for engine, path in self.ocr_engines] def getEnginePath(self, engine): for eng, path in self.ocr_engines: if eng == engine: return path return None def replaceEngine(self, engine, new_engine): for i in xrange(len(self.ocr_engines)): eng, path = self.ocr_engines[i] if eng == engine: new_path = self.engineToXml(new_engine, path) self.ocr_engines[i] = new_engine, path return True return False def makeEnginesFromFolder(self, folder): self.ocr_engines = [] favorite_engine_exists = False for xml_file in self.getXmlFilesInFolder(folder): engine = self.getEngineFromXml(xml_file) if engine: self.ocr_engines.append((engine, xml_file)) favorite_engine_exists = favorite_engine_exists or \ self.configuration_manager.favorite_engine == engine.name if not len(self.ocr_engines): lib.debug("Warning: no engines found!") elif not favorite_engine_exists: self.configuration_manager.favorite_engine = self.ocr_engines[0][ 0].name engines_needing_update = {'auto': [], 'manual': []} for engine, path in self.ocr_engines: path = engine.engine_path default_conf = \ self.configuration_manager.getEngineDefaultConfiguration(path) if default_conf is None: continue if float(engine.version) < float(default_conf['version']): update_type = 'manual' for arguments in default_conf['old_arguments']: if engine.arguments == arguments: update_type = 'auto' break engines_needing_update[update_type].append({ 'engine': engine, 'configuration': default_conf }) return engines_needing_update def migrateEngine(self, engine, configuration, only_version=False): if not only_version: engine.arguments = configuration['arguments'] engine.language_argument = configuration['language_argument'] engine.setLanguages(configuration['languages']) engine.version = configuration['version'] self.replaceEngine(engine, engine) def getEngineFromXml(self, xml_file_name): document = ET.parse(xml_file_name) root_node = document.getroot() arguments = {} for child in root_node.getchildren(): arg_name = child.tag arg_value = child.text arguments[arg_name] = arg_value try: engine = Engine(**arguments) except TypeError, exception: lib.debug('Error when unserializing engine: %s', exception.message) engine = None except WrongSettingsForEngine, we: lib.debug("Cannot load engine at %s: %s" % (xml_file_name, str(we))) engine = None
def removeTemporaryFolder(self): try: shutil.rmtree(self.TEMPORARY_FOLDER) except: debug('Error when removing the temporary folder: ' + \ self.TEMPORARY_FOLDER)