def __init__(self, number, parent: QWidget = None): super().__init__(parent) self._number: int = number self._edit_pair: StudentPair = None self._dates: DatePair = DatePair() # window settings self.setWindowFlag(Qt.WindowContextHelpButtonHint, False) self.setWindowTitle(self.tr("Creator")) self.setMinimumSize(640, 350) self.resize(800, 400) # general settings window self.group_box_general = QGroupBox(self.tr("General")) self.layout_general = QFormLayout(self.group_box_general) # title self.label_title = QLabel(self.tr("Title")) self.layout_general.setWidget(0, QFormLayout.LabelRole, self.label_title) self.line_edit_title = QLineEdit("") self.layout_general.setWidget(0, QFormLayout.FieldRole, self.line_edit_title) # lecturer self.label_lecturer = QLabel(self.tr("Lecturer")) self.layout_general.setWidget(1, QFormLayout.LabelRole, self.label_lecturer) self.line_edit_lecturer = QLineEdit("") self.layout_general.setWidget(1, QFormLayout.FieldRole, self.line_edit_lecturer) self.completer = QCompleter(LecturerPair.get_lecturers()) self.completer.setModelSorting(QCompleter.CaseSensitivelySortedModel) self.completer.setCaseSensitivity(Qt.CaseInsensitive) self.completer.setFilterMode(Qt.MatchContains) self.line_edit_lecturer.setCompleter(self.completer) # type self.label_type = QLabel(self.tr("Type")) self.layout_general.setWidget(2, QFormLayout.LabelRole, self.label_type) self.combo_box_type = QComboBox() self.layout_general.setWidget(2, QFormLayout.FieldRole, self.combo_box_type) for name, attrib in TypePairAttrib.items(): self.combo_box_type.addItem(name, attrib) # classes self.label_classes = QLabel(self.tr("Classes")) self.layout_general.setWidget(3, QFormLayout.LabelRole, self.label_classes) self.line_edit_classes = QLineEdit("") self.layout_general.setWidget(3, QFormLayout.FieldRole, self.line_edit_classes) # subgroup self.label_subgroup = QLabel(self.tr("Subgroup")) self.layout_general.setWidget(4, QFormLayout.LabelRole, self.label_subgroup) self.combo_box_subgroup = QComboBox() self.layout_general.setWidget(4, QFormLayout.FieldRole, self.combo_box_subgroup) for name, attrib in SubgroupPairAttrib.items(): self.combo_box_subgroup.addItem(name, attrib) # time setting self.group_box_time = QGroupBox(self.tr("Time")) self.layout_time = QFormLayout(self.group_box_time) self.label_start = QLabel(self.tr("Start")) self.layout_time.setWidget(0, QFormLayout.LabelRole, self.label_start) self.combo_box_start = QComboBox() self.layout_time.setWidget(0, QFormLayout.FieldRole, self.combo_box_start) self.label_end = QLabel(self.tr("End")) self.layout_time.setWidget(1, QFormLayout.LabelRole, self.label_end) self.combo_box_end = QComboBox() self.layout_time.setWidget(1, QFormLayout.FieldRole, self.combo_box_end) self.combo_box_start.addItems(TimePair.time_starts()) self.combo_box_start.setCurrentIndex(self._number) self.combo_box_end.addItems(TimePair.time_ends()) self.combo_box_end.setCurrentIndex(self._number) # date setting self.group_box_date = QGroupBox(self.tr("Date")) self.layout_date_edit = QHBoxLayout(self.group_box_date) self.list_widget_date = QListWidget(self.group_box_date) self.layout_date_edit.addWidget(self.list_widget_date) self.layout_date_edit_navigate = QVBoxLayout() self.layout_date_edit.addLayout(self.layout_date_edit_navigate) self.push_button_add_date = QPushButton(self.tr("Add")) self.layout_date_edit_navigate.addWidget(self.push_button_add_date) self.push_button_edit_date = QPushButton(self.tr("Edit")) self.layout_date_edit_navigate.addWidget(self.push_button_edit_date) self.push_button_remove_date = QPushButton(self.tr("Remove")) self.layout_date_edit_navigate.addWidget(self.push_button_remove_date) self.layout_date_edit_navigate.addStretch(1) # navigate self.layout_navigate = QHBoxLayout() self.layout_navigate.addStretch(1) self.push_button_ok = QPushButton(self.tr("OK")) self.layout_navigate.addWidget(self.push_button_ok) self.push_button_cancel = QPushButton(self.tr("Cancel")) self.layout_navigate.addWidget(self.push_button_cancel) # layout settings self.layout_general_time = QVBoxLayout() self.layout_general_time.addWidget(self.group_box_general) self.layout_general_time.addWidget(self.group_box_time) self.layout_center = QHBoxLayout() self.layout_center.addLayout(self.layout_general_time) self.layout_center.addWidget(self.group_box_date) self.layout_main = QVBoxLayout() self.layout_main.addLayout(self.layout_center) self.layout_main.addLayout(self.layout_navigate) self.setLayout(self.layout_main) # connection self.combo_box_start.currentIndexChanged.connect( self.combo_box_start_changed) self.list_widget_date.itemDoubleClicked.connect( self.push_button_edit_date_clicked) self.push_button_add_date.clicked.connect( self.push_button_add_date_clicked) self.push_button_edit_date.clicked.connect( self.push_button_edit_date_clicked) self.push_button_remove_date.clicked.connect( self.push_button_remove_date_clicked) self.push_button_ok.clicked.connect(self.push_button_ok_clicked) self.push_button_cancel.clicked.connect( self.push_button_cancel_clicked)
def import_from_pdf(process_id, manager: ImportManager) -> None: tesseract = TesseractWrapper(tesseract_path=manager.tesseract_path) while not manager.queue.empty(): try: file_path = manager.queue.get(True, 1) file = QFileInfo(file_path) # convert from pdf to PIL image img_pdf = pdf2image.convert_from_path( file.absoluteFilePath(), dpi=manager.dpi, poppler_path=manager.poppler_path) img_pdf = img_pdf[0].convert('RGB') # convert to NumPy array img = np.array(img_pdf) img = img[:, :, ::-1].copy() # set mask grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) thresh = cv.threshold(grey, 127, 255, 0)[1] if manager.flags["stop"]: break # found contours contours = cv.findContours(thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)[0] height, width = img.shape[:2] # max and min area of rect max_area = height * width / 20 min_area = max_area / 40 cells = [] time_cells = dict() contours_number = len(contours) number = 0 for k, contour in enumerate(contours, 1): rect = cv.minAreaRect(contour) area = int(rect[1][0] * rect[1][1]) if min_area < area < max_area: if manager.flags["stop"]: break x, y, w, h = cv.boundingRect(contour) crop_img = img[int(y):int(y + h), int(x):int(x + w)] txt = tesseract.to_string(crop_img) found = False for i in range(8): if TimePair.time_starts( )[i] in txt and TimePair.time_ends()[i]: time_cells[i] = (x, x + w) found = True break if not found: cells.append((x, x + w, " ".join(txt.split()))) # draw debug rect with number if manager.debug_image: box = cv.boxPoints(rect) box = np.int0(box) blue_color = (255, 0, 0) center = (int(rect[0][0]), int(rect[0][1])) cv.drawContours(img, [box], 0, blue_color, 2) cv.putText(img, str(number), (center[0] - 100, center[1] - 40), cv.FONT_HERSHEY_SIMPLEX, 3, blue_color, 12) number += 1 process = int(k / contours_number * 70) manager.progress_value_list[process_id] = process manager.progress_text_list[process_id] = "{} {}%".format( file.baseName(), process) if manager.debug_image: cv.imwrite(file_path[0:-4] + "-debug.jpg", img) if manager.flags["stop"]: break schedule = Schedule() cells_number = len(cells) for k, cell in enumerate(cells): if manager.flags["stop"]: break start_x, end_x, text = cell first_start_time, first_end_time = time_cells[0] if not abs(end_x - first_start_time) < abs(start_x - first_start_time): text = "\n".join(re.findall(r".*?\]", text)) while True: try: pairs = parse_pair(manager, text) break except InvalidDatePair as ex: text = confuse_loop( process_id, manager, ConfuseSituationException( file.absoluteFilePath(), text, confuse=str(ex))) except ConfuseSituationException as ex: ex.filename = file.absoluteFilePath( )[0:-4] + "-debug.jpg" ex.cell = k ex.context = text if ex.maybe_answer == "": ex.maybe_answer = text text = confuse_loop(process_id, manager, ex) if len(pairs) != 0: diff_start = abs(start_x - first_start_time) diff_end = abs(end_x - first_end_time) start, end = 0, 0 for number, (start_time, end_time) in time_cells.items(): diff = abs(start_x - start_time) if diff < diff_start: diff_start = diff start = number diff = abs(end_x - end_time) if diff < diff_end: diff_end = diff end = number for pair in pairs: pair["time"].set_time( TimePair.time_starts()[start], TimePair.time_ends()[end]) schedule.add_pair(pair) process = int(70 + k / cells_number * 30) manager.progress_value_list[process_id] = process manager.progress_text_list[process_id] = "{} {}%".format( file.baseName(), process) schedule.save(file.absoluteFilePath()[0:-4] + ".json") print(file.absoluteFilePath()[0:-4] + ".json") if manager.flags["stop"]: break if manager.weekly: export_weeks_to_pdf( schedule, file.baseName(), True, file.absoluteFilePath()[0:-4] + "-weekly.pdf", manager.font_name, manager.font_path, manager.encoding, manager.start, manager.end, manager.color_a, manager.color_b) if manager.full: export_full_to_pdf(schedule, file.baseName(), file.absoluteFilePath()[0:-4] + "-full.pdf", manager.font_name, manager.font_path, manager.encoding) except Exception as ex: print("Exception, process:", process_id, "is:", ex) traceback.print_exc() manager.progress_value_list[process_id] = 100 manager.progress_text_list[process_id] = "Work complete"