def test_parse_checksum_wrong1(self): data = reader.read_scanner_result('../data/checksum_wrong1.txt') ocr_characters = ocr_parser.parse(data) self.assertEqual(9, len(ocr_characters)) account_number = ocr_parser.translate(ocr_characters) self.assertEqual(2, account_number.calculate_checksum()) self.assertEqual('664371495 ERR', str(account_number))
def test_parse_12ERR456709(self): data = reader.read_scanner_result('../data/testcase5.txt') ocr_characters = ocr_parser.parse(data) self.assertEqual(9, len(ocr_characters)) account_number = ocr_parser.translate(ocr_characters) self.assertEqual('12?456709 ILL', str(account_number)) print(f'account number: {account_number}, {account_number!r}')
def test_parse_checksum_ok2(self): data = reader.read_scanner_result('../data/checksum.txt') ocr_characters = ocr_parser.parse(data) self.assertEqual(9, len(ocr_characters)) account_number = ocr_parser.translate(ocr_characters) self.assertEqual(0, account_number.calculate_checksum()) self.assertEqual('457508000', str(account_number))
output_filename = os.path.join(out_dir, os.path.splitext(pdf_filename)[0] + '.csv') print("Writing CSV to", output_filename) try: file = open(output_filename, 'w') file.write(csv_headers) file.write(csv_data) print('Done extracting text, written to {0}'.format(output_filename)) except IOError as e: print("Error: unable to write CSV to", output_filename) print(e) if __name__ == '__main__': if len(sys.argv) != 2: print("Need exactly one argument for filename.") sys.exit() FILENAME = sys.argv[1] print("Attempting text extraction.") csv_data = exp_text_parser.extract_to_csv(FILENAME) if len(csv_data) == 0: print("Text extraction failed; attempting OCR parsing.") csv_data = ocr_parser.parse(FILENAME) print(csv_data) write_to_csv(csv_data, FILENAME)
def test_parse_ones(self): data = reader.read_scanner_result('../data/testcase2.txt') ocr_characters = ocr_parser.parse(data) self.assertEqual(9, len(ocr_characters)) account_number = ocr_parser.translate(ocr_characters) self.assertEqual('111111111 ERR', str(account_number))