def test_azure_ocr_good_file(self): azure_ocr_good_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_samples', 'azure_ocr', 'good_text.json') with open(azure_ocr_good_file, 'rb') as sample_reponse: reponse_json = json.load(sample_reponse) box_list = BoundBox.azure_ocr_boxes(reponse_json) merged_box = BoundBox.void_box() # google ocr returns a list of list for box in box_list[0]: merged_box += box self.assertEqual(merged_box.text_value, 'Noisy image to test Tesseract OCR') box_list_for_lines = BoundBox.azure_ocr_boxes(reponse_json, merge_line=True) merged_box = BoundBox.void_box() # google ocr returns a list of list for box in box_list_for_lines[0]: merged_box += box self.assertEqual(merged_box.text_value, 'Noisy image to test Tesseract OCR')
def test_azure_ocr_blank_file(self): azure_ocr_blank_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_samples', 'azure_ocr', 'blank_image.json') with open(azure_ocr_blank_file, 'rb') as sample_reponse: reponse_json = json.load(sample_reponse) box_list = BoundBox.azure_ocr_boxes(reponse_json) self.assertEqual(len(box_list[0]), 0)