def process_image(self, image_cv2: np.ndarray, options: dict) -> None: tessdata_best_path = get_absolute_path('tessdata/best') tessdata_fast_path = get_absolute_path('tessdata/fast') tessdata_path = tessdata_best_path if options['best'] else tessdata_fast_path custom_config = f'--oem {options["oem"]} --psm {options["psm"]} --tessdata-dir "{tessdata_path}"' # Running pipeline and collecting image metadata # Image to be stored - cv2 / numpy array format # cv2 stores images in BGR format, but pytesseract assumes RGB format. Perform conversion. rgb_image_cv2 = cv2.cvtColor(src=image_cv2, code=cv2.COLOR_BGR2RGB) # Setting up and running image processing pipeline, if necessary image_pipeline = ImagePipeline() image_pipeline.add_step(name='Grayscale', new_step=cv2.cvtColor, image_param_name='src', other_params={'code': cv2.COLOR_RGB2GRAY}) image_pipeline.add_step(name='Binary Threshold', new_step=cv2.threshold, image_param_name='src', other_params={ 'thresh': 20, 'maxval': 255, 'type': cv2.THRESH_BINARY}, capture_index=1) # Image to be directly stored in db as RGB image in bytes with no loss during compression # cv2.imencode is expecting BGR image, not RGB image_stored_bytes = cv2.imencode(ext='.jpg', img=image_cv2, params=[ cv2.IMWRITE_JPEG_QUALITY, 100])[1].tostring() image_for_pytesseract = image_pipeline.run( image=rgb_image_cv2) if options['preprocessing'] else rgb_image_cv2 # Collects metadata on page text after refining with pipeline page_data = pytesseract.image_to_data( image=image_for_pytesseract, config=custom_config, output_type=Output.DICT) # OCRPageData object creation # Metadata on pipeline-refined image ocr_page_data = OcrPageData(image_to_data=page_data) self._data.append((page_data, image_stored_bytes, ocr_page_data))
def display_info(self): """ When the information button is pressed, this window spawns with the information about the new document options """ text_file = Qw.QTextBrowser() text = open(get_absolute_path("information_doc_options.txt")).read() text_file.setText(text) dialog = Qw.QDialog(parent=self) desktop = Qw.QDesktopWidget() desktop_size = desktop.availableGeometry( desktop.primaryScreen()).size() dialog.resize(desktop_size.width() * 0.2, desktop_size.height() * 0.4) temp_layout = Qw.QHBoxLayout() temp_layout.addWidget(text_file) dialog.setWindowTitle("Information") dialog.setLayout(temp_layout) dialog.show()
def process_image(idx: int, filepath: str, oem: int = 3, psm: int = 3, best: bool = True, preprocessing: bool = False) -> tuple: """ Processes image using ImagePipeline Parameters filepath - filepath where image or PDF is stored oem - OCR engine mode (0-3) psm - page segmentation mode (0-13) Modes 0-2 don't perform OCR, so don't allow those best - whether to use the best model (or fast model) preprocessing - whether to refine image temporarily with ImagePipeline before running pytesseract """ try: if oem not in range(4): raise ValueError( 'oem must be an integer between 0 and 3 inclusive') if psm not in range(3, 14): raise ValueError( 'psm must be an integer between 3 and 13 inclusive') except ValueError as error: print(str(error)) return image_cv2 = cv2.imread(filename=filepath, flags=cv2.IMREAD_COLOR) tessdata_best_path = get_absolute_path('tessdata/best') tessdata_fast_path = get_absolute_path('tessdata/fast') tessdata_path = tessdata_best_path if best else tessdata_fast_path custom_config = f'--oem {oem} --psm {psm} --tessdata-dir "{tessdata_path}"' # Running pipeline and collecting image metadata # Image to be stored - cv2 / numpy array format # cv2 stores images in BGR format, but pytesseract assumes RGB format. Perform conversion. rgb_image_cv2 = cv2.cvtColor(src=image_cv2, code=cv2.COLOR_BGR2RGB) # Setting up and running image processing pipeline, if necessary def grayscale_flat_field_correction(src: np.ndarray, ksize: int = 99) -> np.ndarray: image_grayscale = src if src.ndim == 2 else cv2.cvtColor( src=src, code=cv2.COLOR_BGR2GRAY) blur = cv2.medianBlur(src=image_grayscale, ksize=ksize) mean = cv2.mean(src=blur)[0] # It's fine if we divide by zero with np.errstate(divide='ignore', invalid='ignore'): flat_field = (image_grayscale * mean) / blur return flat_field image_pipeline = ImagePipeline() image_pipeline.add_step(name='Grayscale', new_step=cv2.cvtColor, image_param_name='src', other_params={'code': cv2.COLOR_RGB2GRAY}) image_pipeline.add_step(name='Flat-Field', new_step=grayscale_flat_field_correction, image_param_name='src', other_params={'ksize': 91}) # Image to be directly stored in db as RGB image in bytes with no loss during compression # cv2.imencode is expecting BGR image, not RGB image_stored_bytes = cv2.imencode( ext='.jpg', img=image_cv2, params=[cv2.IMWRITE_JPEG_QUALITY, 100])[1].tostring() image_for_pytesseract = image_pipeline.run( image=rgb_image_cv2) if preprocessing else rgb_image_cv2 # Collects metadata on page text after refining with pipeline os.environ['OMP_THREAD_LIMIT'] = '1' page_data = pytesseract.image_to_data(image=image_for_pytesseract, config=custom_config, output_type=Output.DICT) # OCRPageData object creation # Metadata on pipeline-refined image ocr_page_data = OcrPageData(image_to_data=page_data) return (idx, (page_data, image_stored_bytes, ocr_page_data))
def __init__(self, new_doc_cb, parent=None): super().__init__(parent) db.connect(reuse_if_open=True) self._filter = '' self.new_doc_cb = new_doc_cb self.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.MinimumExpanding) self._layout = Qw.QVBoxLayout() self.doc_grid = Qw.QGridLayout() self.scroll_area = Qw.QScrollArea() self.scroll_area.setWidgetResizable(True) self.ui_box = Qw.QHBoxLayout() self._doc_buttons = [] self.search_bar = Qw.QLineEdit() self.search_bar.setPlaceholderText("Search for document name...") self.search_bar.textChanged.connect(self.update_filter) self.doc_search = Qw.QRadioButton("DOC") self.doc_search.clicked.connect(self.update_filter) self.doc_search.setChecked(True) self.ocr_search = Qw.QRadioButton("OCR") self.ocr_search.clicked.connect(self.update_filter) self.remove_mode = Qw.QPushButton("Enable remove mode") self.remove_mode.setCheckable(True) self.remove_mode.toggled.connect(self.set_remove_mode) self.ui_box.addWidget(self.doc_search) self.ui_box.addWidget(self.ocr_search) self.ui_box.addWidget(self.search_bar) self.ui_box.addWidget(self.remove_mode) # produces the document buttons that users can interact with for doc in OcrDocument.select(): # assuming that each doc will surely have at least one page img = doc.pages[0].image name = doc.name doc_button = SingleDocumentButton(name, img, doc) doc_button.pressed.connect( lambda doc=doc: self.create_doc_window(doc)) doc_button.setVisible(True) self._doc_buttons.append(doc_button) new_doc_button_icon = open(get_absolute_path("icons/plus_icon.png"), "rb").read() self.new_doc_button = SingleDocumentButton('Add New Document', new_doc_button_icon, None) self.new_doc_button.pressed.connect( lambda: self.create_new_doc_window()) self._active_docs = self._doc_buttons self.render_doc_grid() self._layout.addLayout(self.ui_box) self._layout.addWidget(self.scroll_area) self.setLayout(self._layout) db.close()
def __init__(self, new_doc_cb, doc=None, parent=None): super().__init__(parent) db.connect(reuse_if_open=True) self.new_doc_cb = new_doc_cb self._doc = doc self._doc_size = 0 if self._doc is None else len(self._doc.pages) self.parentWidget().close_event_signal.connect(self.cleanup_temp_files) self.display_preview_button = Qw.QPushButton( "Show document preview", default=False, autoDefault=False, parent=self) self.display_preview_button.setCheckable(True) if self._doc is None: self.display_preview_button.setEnabled(False) self.display_preview_button.toggled.connect( self.on_display_preview_button_toggled) self.choose_file_button = Qw.QPushButton( "Add files", default=False, autoDefault=False, parent=self) self.choose_file_button.clicked.connect(self.choose_files) self.remove_file_button = Qw.QPushButton( "Remove files", default=False, autoDefault=False, parent=self) self.remove_file_button.clicked.connect(self.remove_files) self.options = Qw.QGroupBox("Options") self.name_label = Qw.QLabel("Document Name:") self.name_edit = Qw.QLineEdit(parent=self) if self._doc is not None: self.name_edit.setText(self._doc.name) # renaming is not permitted self.name_edit.setReadOnly(True) # Bug in qdarkstyle that makes dropdowns too large, so we need to add styles self.dropdown_style = """QComboBox::item:checked { height: 12px; border: 1px solid #32414B; margin-top: 0px; margin-bottom: 0px; padding: 4px; padding-left: 0px; }""" self.preset_label = Qw.QLabel("Preset:") self.preset_options = Qw.QComboBox() self.preset_options.setStyleSheet(self.dropdown_style) self.preset_options.addItem("Screenshot") self.preset_options.addItem("Printed Text (PDF)") self.preset_options.addItem("Written Paragraph") self.preset_options.addItem("Written Page") self.preset_options.addItem("Custom") self.preset_options.setCurrentIndex(4) self.preset_options.currentIndexChanged.connect(self.preset_changed) self.best_vs_fast = Qw.QLabel("Best Model or Fast Model:") self.best_vs_fast_options = Qw.QComboBox() self.best_vs_fast_options.setStyleSheet(self.dropdown_style) self.best_vs_fast_options.addItem("Fast") self.best_vs_fast_options.addItem("Best") # Default should be Best self.best_vs_fast_options.setCurrentIndex(1) self.processing_label = Qw.QLabel("Perform image preprocessing:") self.processing_options = Qw.QComboBox() self.processing_options.setStyleSheet(self.dropdown_style) self.processing_options.addItem("No") self.processing_options.addItem("Yes") # default should be no self.processing_options.setCurrentIndex(0) self.processing_options.currentIndexChanged.connect(self.custom_preset) self.psm_label = Qw.QLabel("PSM Number") self.psm_num = Qw.QComboBox() self.psm_num.setStyleSheet(self.dropdown_style) for i in range(3, 14): self.psm_num.addItem(str(i)) # Default should be 3 self.psm_num.setCurrentIndex(0) self.psm_num.currentIndexChanged.connect(self.custom_preset) self.info_button = Qw.QPushButton( default=False, autoDefault=False, parent=self) self.info_button.setIcon( Qg.QIcon(get_absolute_path("icons/info_icon.png"))) self.info_button.clicked.connect(self.display_info) self.status_bar = Qw.QStatusBar() self.status_bar.showMessage("Ready") options_layout = Qw.QVBoxLayout() options_layout.addWidget(self.name_label) options_layout.addWidget(self.name_edit) options_layout.addWidget(self.preset_label) options_layout.addWidget(self.preset_options) options_layout.addWidget(self.best_vs_fast) options_layout.addWidget(self.best_vs_fast_options) options_layout.addWidget(self.processing_label) options_layout.addWidget(self.processing_options) options_layout.addWidget(self.psm_label) options_layout.addWidget(self.psm_num) options_layout.addWidget(self.info_button, alignment=Qc.Qt.AlignRight) self.options.setLayout(options_layout) self.file_names_label = Qw.QLabel("Files Chosen: ") self.listwidget = DragList(self) self.listwidget.file_dropped_signal.connect(self.insert_files) self.listwidget.drag_complete_signal.connect(self.update_file_previews) self.submit = Qw.QPushButton( "Process Document", default=False, autoDefault=False, parent=self) self.submit.clicked.connect(self.process_document) layout = Qw.QVBoxLayout() layout.addWidget(self.display_preview_button) layout.addWidget(self.choose_file_button) layout.addWidget(self.remove_file_button) layout.addWidget(self.file_names_label) layout.addWidget(self.listwidget) layout.addWidget(self.options) layout.addWidget(self.submit) layout.addWidget(self.status_bar) main_layout = Qw.QHBoxLayout() main_layout.addLayout(layout) self.setLayout(main_layout) # For the preview image feature, keep two data types # Dictionary that stores PDF filepath -> ([image filepaths], temp_dir) self.pdf_previews = {} # List of filenames, with PDFs already converted to images self._pages = [] db.close()
def __init__(self, doc, parent=None, filter=''): """ Constructor method :param doc: OCRDocument :param filter: Filter from main window """ super().__init__(parent=parent) db.connect(reuse_if_open=True) self.setWindowTitle(doc.name) desktop = Qw.QDesktopWidget() desktop_size = desktop.availableGeometry( desktop.primaryScreen()).size() self.resize(desktop_size.width() * 0.3, desktop_size.height() * 0.6) self._doc = doc self._filter = filter self._curr_page = 0 self._pages = self._doc.pages self._pages_len = len(self._pages) # Store key as page index, value as list of blocks self._filtered_page_indexes = OrderedDict() self._layout = Qw.QVBoxLayout() self._options = Qw.QHBoxLayout() self.search_bar = Qw.QLineEdit() self.search_bar.setPlaceholderText("Search through notes...") self.search_bar.textChanged.connect(self.update_filter) self.case_sens_button = Qw.QRadioButton("Case Sensitive", parent=self) self.case_sens_button.toggled.connect(self.update_filter) self.filter_mode = Qw.QPushButton("Show matching pages", default=False, autoDefault=False, parent=self) self.filter_mode.setCheckable(True) self.filter_mode.toggled.connect(self.set_filter_mode) self._options.addWidget(self.search_bar, alignment=Qc.Qt.AlignTop) self._options.addWidget(self.case_sens_button) self._options.addWidget(self.filter_mode, alignment=Qc.Qt.AlignTop) self._layout.addLayout(self._options, alignment=Qc.Qt.AlignTop) # create button group for prev and next page buttons self.next_page_button = Qw.QPushButton("Next Page", default=False, autoDefault=False, parent=self) self.next_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.next_page_button.clicked.connect(self.next_page) self.prev_page_button = Qw.QPushButton("Previous Page", default=False, autoDefault=False, parent=self) self.prev_page_button.setSizePolicy(Qw.QSizePolicy.MinimumExpanding, Qw.QSizePolicy.Fixed) self.prev_page_button.clicked.connect(self.prev_page) self.page_number_box = Qw.QLineEdit(parent=self) self.page_number_box.setSizePolicy(Qw.QSizePolicy.Minimum, Qw.QSizePolicy.Fixed) self.page_number_box.setInputMask("0" * len(str(self._pages_len))) self.page_number_box.setFixedWidth(self.page_number_box.fontMetrics( ).boundingRect(str(self._pages_len)).width() + 20) self.page_number_box.editingFinished.connect( self.on_page_number_box_change) # Added viewer self.viewer = PhotoViewer(parent=self) self._layout.addWidget(self.viewer) self.info_button = Qw.QPushButton(default=False, autoDefault=False, parent=self) self.info_button.setIcon( Qg.QIcon(get_absolute_path("icons/info_icon.png"))) self.info_button.clicked.connect(self.display_info) self.rename_button = Qw.QPushButton("Rename doc", default=False, autoDefault=False, parent=self) self.rename_button.clicked.connect(self.rename_document) self.export_button = Qw.QPushButton("Export as PDF", default=False, autoDefault=False, parent=self) self.export_button.clicked.connect(self.export_pdf) self.add_pages_button = Qw.QPushButton("Add pages", default=False, autoDefault=False, parent=self) self.add_pages_button.clicked.connect( lambda: self.add_pages(self._doc)) self._button_group = Qw.QHBoxLayout() self._button_group.addWidget(self.rename_button) self._button_group.addWidget(self.add_pages_button) self._button_group.addWidget(self.prev_page_button) self._button_group.addWidget(self.page_number_box) self._button_group.addWidget(self.next_page_button) self._button_group.addWidget(self.export_button) self._button_group.addWidget(self.info_button) self._layout.addLayout(self._button_group) self.setLayout(self._layout) # if filter passed through from main window, set the search bar text and update window if self._filter: self.search_bar.setText(self._filter) self.update_filter() self.jump_to_page(0) db.close()
from peewee import (Model, Check, PrimaryKeyField, CharField, IntegerField, BlobField, ForeignKeyField, TextField) from playhouse.sqlite_ext import SqliteExtDatabase from StudiOCR.util import get_absolute_path # Should likely change where the database files are stored DATABASE = get_absolute_path('ocr_files.db') # Do we need c extensions? db = SqliteExtDatabase( DATABASE, autoconnect=False, c_extensions=False, pragmas={ 'journal_mode': 'delete', # Use DELETE mode 'foreign_keys': 1 }) # Enforce foreign-key constraints class BaseModel(Model): class Meta: database = db # Table entry for an OCR'ed document class OcrDocument(BaseModel): id = PrimaryKeyField(null=False) name = CharField(unique=True) def delete_document(self):