def get(self, page=1, name=None, date=None): if name: employee = self.collection.find_one({'name': name}) return employee elif page == 'all': employees = self.collection.find(sort=[('name', 1)]) employees_list = [] for employee in employees: regular = True if 'regular' in employee and employee['regular'] == '비상근': regular = False if 'email' in employee and employee['email']: email = employee['email'] else: email = None if 'status' in employee and employee['status']: status = employee['status'] else: status = None # 퇴사하지 않은 직원만 포함하기 위해서 if status != '퇴사': if 'endDate' not in employee: employees_list.append({'name': employee['name'], 'employeeId': employee['employeeId'], 'email': email, 'regular': regular, 'status': status}) else: if date and date <= employee['endDate']: employees_list.append({'name': employee['name'], 'employeeId': employee['employeeId'], 'email': email, 'regular': regular, 'status': status}) return employees_list else: data_list = self.collection.find(sort=[('department', 1), ('name', 1)]) get_page = Page(page) return get_page.paginate(data_list)
def handle_missing_page_id(aligned_page: Page, original_page: Page, list_id: str, id_bounding_box: BoundingBox, page_number: int) -> Tuple[bool, Page]: # to check if it's a homography issue, see if the list ID is visible on the raw page test_id = original_page.get_list_id(id_bounding_box) if test_id == list_id: print('Homography error on page {}, using uncorrected page instead.'. format(page_number)) return True, original_page # didn't find on raw page, ask the user to confirm the ID id_area = original_page.get_roi(id_bounding_box) id_area = id_area.add_border(top=30) # annotate the response image question = "ID: {}? (y|n)".format(list_id) # keep looping until the y or n key is pressed while True: # display the image and wait for a keypress key = id_area.show(title="review", message=question, resize=False) # if the 'y' key is pressed, user approves if key == ord("y"): cv2.destroyAllWindows() return True, original_page # if the 'n' key is pressed, user rejects elif key == ord("n"): cv2.destroyAllWindows() break return False, aligned_page
def generate_error_pages(error_images: List[Image], skipped_pages: List[Page], list_id: str) -> None: page = utils.make_blank_page() page.crop(top=int(2 * utils.MARGIN), right=int(2 * utils.MARGIN)) # to account for printing # calculate the number of error images that can fit on a page num_images_per_page = math.floor(page.size.h / error_images[0].size.h) # init error pages array error_pages = skipped_pages images_on_page = 0 for i, error_image in enumerate(error_images): # Create new pages as necessary if i % num_images_per_page == 0 and i > 0: # save the previous page to the error_pages array page.add_border(top=int(2 * utils.MARGIN), right=int(2 * utils.MARGIN)) error_pages.append(page) images_on_page = 0 # add images to the page insert_point = Point(0, images_on_page * error_image.size.h) page = Page(page.insert_image(error_image, insert_point)) # increment the images on page counter images_on_page += 1 # add the last page to the array page.add_border(top=int(2 * utils.MARGIN), right=int(2 * utils.MARGIN)) error_pages.append(page) # save out a pdf save_error_pages(error_pages, list_id)
def get_device_list(self, page=1, date=None): device_list = [] data_list = self.collection.aggregate([ {'$match':{'date': date, 'time': {"$gt": WORKING['time']['overNight']}}}, {'$group':{'_id': '$mac'}}]) for data in data_list: for key in data: device_list.append(data[key]) if page == 'all': return device_list else: get_page = Page(page) return get_page.paginate(device_list)
def get(self, page=1, date=None): if date is None and page=='all': device_list = self.collection.find() else: if date is None: _, _, today, _ = check_time() date = today date = get_date_several_months_before(date, delta=2) device_list = self.collection.find({'endDate': {"$gt": date}}) if page == 'all': return device_list else: get_page = Page(page) return get_page.paginate(device_list)
def markup_response_codes(page: Page, list_id: str, line_number: int) -> List[ResponseCode]: # Get line in the page. line_bb = page.get_line_bb(line_number, utils.get_list_dir(list_id), padding=100, right_half=True) markup_roi = page.get_roi(line_bb) # Iterate through and mark each scan code. response_codes = [] question_number = 1 while True: print("Please mark each response code in survey question %d." % question_number) bounding_box = markup_roi.markup() text = markup_roi.find_text(bounding_box) # sometimes OCR picks up stray symbols, get rid of them. text = ''.join(ch for ch in text if ch.isalnum()) bounding_box = bounding_box.update_coordinate_system(line_bb.top_left) roi = page.get_roi(bounding_box) response_code = ResponseCode(bounding_box, question_number, text) print("Extracted scan code: \"%s\"" % response_code.value) while True: print("Is this correct? [y|n]") yes_no = input().lower() if yes_no == "y": break else: print("Please enter the correct response code: ") response_code.value = input() response_codes.append(response_code) print( r"""Hit enter (no input) to mark another response in the same survey question. Enter 'n' to move to the next survey question. Enter 'q' to finish. [enter|n|q]""" ) next_step = input().lower() if next_step == "n": question_number += 1 elif next_step == "q": break return response_codes
def summary(self, page=1, start=None, end=None): data_list = None summary_list = [] if start and end: data_list = self.collection.find( {'date': { "$gte": start, "$lte": end }}, sort=[('name', 1), ('date', -1)]) if data_list: summary = OrderedDict() for data in data_list: if data['workingHours'] is not None: name = data['name'] if name not in summary: summary[name] = {'name': name} if 'totalDay' not in summary[name]: summary[name]['totalDay'] = 0 if 'totalWorkingDay' not in summary[name]: summary[name]['totalWorkingDay'] = 0 if 'totalWorkingHours' not in summary[name]: summary[name]['totalWorkingHours'] = 0 for status in WORKING['inStatus']: if status not in summary[name]: summary[name][status] = 0 for status in WORKING['status']: if status not in summary[name]: summary[name][status] = 0 summary[name]['totalDay'] = summary[name]['totalDay'] + 1 if 'status' in data: if data['status'][0]: summary[name][data['status'][0]] = summary[name][ data['status'][0]] + 1 if 'reason' in data and data['reason'] and data[ 'reason'] in summary[name]: summary[name][ data['reason']] = summary[name][data['reason']] + 1 summary[name]['totalWorkingHours'] = summary[name][ 'totalWorkingHours'] + data['workingHours'] for name in summary: summary[name] = self.get_summary(summary[name]) summary_list.append(summary[name]) get_page = Page(page) return get_page.paginate(summary_list)
def get_temp_page(pages, page_number: int, rotate_dir: Rotation, temp_filename: str) -> Page: temp_path = "%s%s" % (utils.TEMP_DIR, temp_filename) pages[page_number - 1].save( temp_path, 'JPEG') # Save specified page out to temp so we can read it in again return Page.from_file(temp_path, rotate_dir)
def scan_page(list_id: str, rotate_dir: Rotation, args, page_number: int, ref_page: Page, ref_bounding_boxes: Dict[str, BoundingBox], list_dir: str, results_scans, results_stats, results_errors, previous_scans: dict, backup_writer): page = Page.from_file(utils.get_page_filename(list_id, page_number), rotate_dir) response_codes = utils.load_response_codes(list_id) # align page aligned_page = page.align_to(ref_page) if utils.__DEBUG__: aligned_page.show(title="aligned page") # confirm page has the correct list_id page_list_id = page.get_list_id(ref_bounding_boxes["list_id"]) if page_list_id != list_id: valid_id, page = handle_missing_page_id(aligned_page, page, list_id, ref_bounding_boxes["list_id"], page_number) if not valid_id: print( 'Error: Page {} has ID {}, but active ID is {}. Page {} has been skipped.' .format(page_number + 1, page_list_id, list_id, page_number + 1)) results_errors['skipped_pages'].append({page_number: page}) return results_scans, results_stats, results_errors # find the barcodes in the image and decode each of the barcodes # Barcode scanner needs the unthresholded image. barcodes = pyzbar.decode(page.raw_image) if len(barcodes) == 0: print('Error: Cannot find barcodes. Page {} has been skipped.'.format( page_number + 1)) results_errors['skipped_pages'].append({page_number: page}) return results_scans, results_stats, results_errors # loop over the detected barcodes voter_ids: Set = set() for barcode in barcodes: results_scans, results_stats, results_errors = scan_barcode( barcode, page, ref_bounding_boxes, list_dir, response_codes, args, results_scans, results_stats, results_errors, previous_scans, backup_writer, voter_ids) check_num_barcodes(page, list_dir, len(voter_ids), results_stats) if utils.__DEBUG__: page.show() return results_scans, results_stats, results_errors
def check_num_barcodes(page: Page, list_dir: str, num_scanned_barcodes: int, results_stats) -> None: # Manually loop and count barcodes num_actual_barcodes = 0 for line_number in range(1, utils.MAX_BARCODES_ON_PAGE + 1): line_bb = page.get_line_bb(line_number, list_dir) # extract the barcode portion line_bb.top_left.x = line_bb.bottom_right.x - 700 barcode_roi = page.get_roi(line_bb).invert() BARCODE_EXISTS_THRESHOLD = 20000 # if a barcode exists in the area it averages 29k black pixels. if barcode_roi.numWhitePixels() > BARCODE_EXISTS_THRESHOLD: num_actual_barcodes += 1 else: break # we have likely reached the end of the page. if num_actual_barcodes < num_scanned_barcodes: print( "Something went wrong with the image alignment! Cannot accurately count missed barcodes." ) elif num_actual_barcodes > num_scanned_barcodes: results_stats[ "num_missed_barcodes"] += num_actual_barcodes - num_scanned_barcodes
def get(self, page=1, start=None, end=None): data_list = self.collection.find(sort=[('create_time', -1)]) get_page = Page(page) return get_page.paginate(data_list)
def main() -> None: args = parse_args() list_id = args["list_id"] check_files_exist(list_id) list_dir: str = utils.get_list_dir(list_id) rotate_dir = utils.map_rotation(args["rotate_dir"]) ref_bounding_boxes = utils.load_ref_boxes(list_dir) ref_page = Page.from_file(list_dir + utils.CLEAN_IMAGE_FILENAME, rotate_dir) # init results object results_scans: list = [] # things to track for error reporting results_stats = {} results_stats['num_scanned_barcodes'] = 0 results_stats['num_missed_barcodes'] = 0 results_stats['num_error_barcodes'] = 0 results_stats['incorrect_scans'] = [] # stuff to build error PDF for human scanning results_errors: dict = {} results_errors['errors_for_human'] = [] results_errors['skipped_pages'] = [] # write out to CSV backup as process the list backup_filename, colnames = prep_backup_csv(list_dir, list_id) previous_scans = load_previous_scans(backup_filename, args) with open(backup_filename, mode='w') as backup_csv: backup_writer = csv.DictWriter(backup_csv, fieldnames=colnames) backup_writer.writeheader() num_pages = len( os.listdir("{}/{}".format(list_dir, utils.WALKLIST_DIR))) for page_number in range(args['start_page'], num_pages): print('===Scanning page {} of {} ==='.format( page_number + 1, num_pages)) results_scans, results_stats, results_errors = scan_page( list_id, rotate_dir, args, page_number, ref_page, ref_bounding_boxes, list_dir, results_scans, results_stats, results_errors, previous_scans, backup_writer) # output results output_results_csv(args['list_id'], list_dir, results_scans) # generate_error_pages(results_errors['errors_for_human'], results_errors['skipped_pages'], args['list_id']) # show list of skipped pages print('Skipped {} pages:'.format(len(results_errors['skipped_pages']))) for page in results_errors['skipped_pages']: print(page.keys()) # run test suite if set if args["test_file"]: test.run_test_suite(args['test_file'], results_scans) else: # print statistics show_statistics(results_stats, args)
def scan_barcode(barcode, page, ref_bounding_boxes, list_dir, response_codes, args, results_scans, results_stats, results_errors, previous_scans, backup_writer, voter_ids) -> Tuple[list, dict, dict]: barcode_info = extract_barcode_info(barcode, page) # skip if not a valid barcode if not barcode_info: return results_scans, results_stats, results_errors barcode_coords, voter_id = barcode_info # Check if the barcode has already been read, skip if so. if voter_id in voter_ids: return results_scans, results_stats, results_errors else: voter_ids.add(voter_id) # increment barcodes counter results_stats['num_scanned_barcodes'] += 1 # use the existing info if already scanned, unless in testing mode if voter_id in previous_scans and not args["test_file"]: print('Already scanned {}'.format(voter_id)) results_dict = previous_scans[voter_id] # new barcode to scan else: if utils.__DEBUG__: cv2.rectangle(page, barcode_coords.top_left.to_tuple(), barcode_coords.bottom_right.to_tuple(), (255, 0, 255), 3) page.show() # Get the corresponding response codes region response_bounding_box = get_response_for_barcode( barcode_coords, ref_bounding_boxes["response_codes"], page.size) # Figure out which ones are circled ref_response_codes = Page.from_file( list_dir + utils.RESPONSE_CODES_IMAGE_FILENAME, Rotation.NONE) circled_responses, has_error = get_circled_responses( response_bounding_box, response_codes, page, list_dir) has_error = has_error or error_check_responses(circled_responses) # if has an error at this point, add to the error tally if has_error: results_stats['num_error_barcodes'] += 1 # Do manual review if error or if flagged, unless in testing mode if (has_error or args["manual_review"]) and not args["test_file"]: verdict_right, circled_responses = manual_review( response_bounding_box, page, circled_responses, voter_id, response_codes) # if user verdict is false, add the voter_id to the list of incorrect scans if not verdict_right: results_stats['incorrect_scans'].append(voter_id) # if in testing mode, convert any None circled_responses to an empty list if args["test_file"] and circled_responses is None: circled_responses = [] # build results dict results_dict = build_results_dict(voter_id, circled_responses) # save results results_scans.append(results_dict) write_to_backup(results_dict, backup_writer) return results_scans, results_stats, results_errors
def create_error_image(page: Page, barcode_coords: BoundingBox, first_response_coords: BoundingBox) -> Image: full_response_bounding_box = get_response_including_barcode( barcode_coords, first_response_coords, page.size) error_image = page.get_roi(full_response_bounding_box) return error_image
def scrape_words(url): page = Page(url) words = page.css('.podcast_table_home .pod_body b') return [word.text_content().lower().strip() for word in words]
def attend(self, page=None, name=None, start=None, end=None): if page == 'all': employee = self.employee.get(name=name) if start: data_list = self.collection.find({'date': {"$gte": start, "$lte": end}, 'name': name}, sort=[('name', 1), ('date', 1)]) else: data_list = self.collection.find({'name': name}, sort=[('date', 1)]) attend_list = [] for data in data_list: if data['begin']: begin = data['begin'][0:2] + ':' + data['begin'][2:4] else: begin = '' if data['end']: end = data['end'][0:2] + ':' + data['end'][2:4] else: end = '' if 'reason' in data: reason = data['reason'] else: reason = '' attend_list.append( {'name': data['name'], 'rank': employee['rank'], 'department': employee['department'], 'date': data['date'], 'begin': begin, 'end': end, 'reason': reason}) return attend_list else: if start and end: if name: data_list = self.collection.find({'date': {"$gte": start, "$lte": end}, 'name': name}, sort=[('name', 1), ('date', -1)]) else: data_list = self.collection.find({'date': {"$gte": start, "$lte": end}}, sort=[('name', 1), ('date', -1)]) else: if name: data_list = self.collection.find({'date': self.today, 'name': name}, sort=[('date', -1)]) else: data_list = self.collection.find({'date': self.today}, sort=[('name', 1)]) get_page = Page(page) paging, data_list = get_page.paginate(data_list) summary = OrderedDict() if name: attend_list = [] summary['totalDay'] = 0 summary['totalWorkingDay'] = 0 summary['totalWorkingHours'] = 0 for status in WORKING['inStatus']: summary[status] = 0 for status in WORKING['status']: summary[status] = 0 for data in data_list: if data['workingHours'] is not None: summary['totalDay'] = summary['totalDay'] + 1 del data['_id'] if 'status' in data: if data['status'][0]: summary[data['status'][0]] = summary[data['status'][0]] + 1 if 'reason' in data and data['reason']: summary[data['reason']] = summary[data['reason']] + 1 summary['totalWorkingHours'] = summary['totalWorkingHours'] + data['workingHours'] attend_list.append(data) summary = self.get_summary(summary) return paging, self.today, attend_list, summary else: return paging, self.today, data_list, summary