Пример #1
0
    def test_receipt_continuation_token(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read()

        initial_poller = client.begin_recognize_receipts(receipt)
        cont_token = initial_poller.continuation_token()
        poller = client.begin_recognize_receipts(receipt, continuation_token=cont_token)
        result = poller.result()
        self.assertIsNotNone(result)
        initial_poller.wait()  # necessary so azure-devtools doesn't throw assertion error
Пример #2
0
 def test_damaged_file_passed_as_bytes_io(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(b"\x25\x50\x44\x46\x55\x55\x55")  # still has correct bytes to be recognized as PDF
     with self.assertRaises(HttpResponseError):
         poller = client.begin_recognize_receipts(
             damaged_pdf,
         )
Пример #3
0
    def strongly_typed_receipt(self):
        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "./sample_forms/receipt/contoso-allinone.jpg"))

        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_receipts(receipt=f)
        receipts = poller.result()

        for receipt in receipts:
            receipt = Receipt(receipt)
            print("Receipt Type: {} has confidence: {}".format(receipt.receipt_type.value, receipt.receipt_type.confidence))
            print("Merchant Name: {} has confidence: {}".format(receipt.merchant_name.value, receipt.merchant_name.confidence))
            print("Transaction Date: {} has confidence: {}".format(receipt.transaction_date.value, receipt.transaction_date.confidence))
            print("Receipt items:")
            for item in receipt.receipt_items:
                print("...Item Name: {} has confidence: {}".format(item.name.value, item.name.confidence))
                print("...Item Quantity: {} has confidence: {}".format(item.quantity.value, item.quantity.confidence))
                print("...Individual Item Price: {} has confidence: {}".format(item.price.value, item.price.confidence))
                print("...Total Item Price: {} has confidence: {}".format(item.total_price.value, item.total_price.confidence))
            print("Subtotal: {} has confidence: {}".format(receipt.subtotal.value, receipt.subtotal.confidence))
            print("Tax: {} has confidence: {}".format(receipt.tax.value, receipt.tax.confidence))
            print("Tip: {} has confidence: {}".format(receipt.tip.value, receipt.tip.confidence))
            print("Total: {} has confidence: {}".format(receipt.total.value, receipt.total.confidence))
Пример #4
0
 def test_authentication_bad_key(self, formrecognizer_test_endpoint,
                                 formrecognizer_test_api_key):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_receipts(b"xx",
                                                  content_type="image/jpeg")
Пример #5
0
    def test_receipt_png(self, resource_group, location,
                         form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with open(self.receipt_png, "rb") as stream:
            poller = client.begin_recognize_receipts(stream)

        result = poller.result()
        self.assertEqual(len(result), 1)
        receipt = result[0]
        self.assertEqual(receipt.merchant_address.value,
                         '123 Main Street Redmond, WA 98052')
        self.assertEqual(receipt.merchant_name.value, 'Contoso Contoso')
        self.assertEqual(receipt.subtotal.value, 1098.99)
        self.assertEqual(receipt.tax.value, 104.4)
        self.assertEqual(receipt.total.value, 1203.39)
        self.assertEqual(receipt.transaction_date.value,
                         date(year=2019, month=6, day=10))
        self.assertEqual(receipt.transaction_time.value,
                         time(hour=13, minute=59, second=0))
        self.assertEqual(receipt.page_range.first_page, 1)
        self.assertEqual(receipt.page_range.last_page, 1)
        self.assertFormPagesHasValues(receipt.pages)
        self.assertIsNotNone(receipt.receipt_type.confidence)
        self.assertEqual(receipt.receipt_type.type, 'Itemized')
Пример #6
0
    def test_receipt_jpg_include_text_content(self, resource_group, location,
                                              form_recognizer_account,
                                              form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read()
        poller = client.begin_recognize_receipts(receipt,
                                                 include_text_content=True)

        result = poller.result()
        self.assertEqual(len(result), 1)
        receipt = result[0]

        self.assertFormPagesHasValues(receipt.pages)
        for field, value in receipt.__dict__.items():
            if field not in [
                    "receipt_type", "receipt_items", "page_range", "pages",
                    "fields", "form_type", "receipt_locale"
            ]:
                form_field = getattr(receipt, field)
                self.assertTextContentHasValues(
                    form_field.value_data.text_content,
                    receipt.page_range.first_page)

        for field, value in receipt.fields.items():
            self.assertTextContentHasValues(value.value_data.text_content,
                                            receipt.page_range.first_page)
Пример #7
0
    def test_receipt_jpg(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read()

        poller = client.begin_recognize_receipts(receipt)

        result = poller.result()
        self.assertEqual(len(result), 1)
        receipt = result[0]
        self.assertEqual(receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond, WA 98052')
        self.assertEqual(receipt.fields.get("MerchantName").value, 'Contoso Contoso')
        self.assertEqual(receipt.fields.get("MerchantPhoneNumber").value, '+19876543210')
        self.assertEqual(receipt.fields.get("Subtotal").value, 11.7)
        self.assertEqual(receipt.fields.get("Tax").value, 1.17)
        self.assertEqual(receipt.fields.get("Tip").value, 1.63)
        self.assertEqual(receipt.fields.get("Total").value, 14.5)
        self.assertEqual(receipt.fields.get("TransactionDate").value, date(year=2019, month=6, day=10))
        self.assertEqual(receipt.fields.get("TransactionTime").value, time(hour=13, minute=59, second=0))
        self.assertEqual(receipt.page_range.first_page_number, 1)
        self.assertEqual(receipt.page_range.last_page_number, 1)
        self.assertFormPagesHasValues(receipt.pages)
        receipt_type = receipt.fields.get("ReceiptType")
        self.assertIsNotNone(receipt_type.confidence)
        self.assertEqual(receipt_type.value, 'Itemized')
        self.assertReceiptItemsHasValues(receipt.fields['Items'].value, receipt.page_range.first_page_number, False)
Пример #8
0
    def recognize_receipts(self):
        # [START recognize_receipts]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient
        form_recognizer_client = FormRecognizerClient(
            endpoint=self.endpoint, credential=AzureKeyCredential(self.key)
        )
        with open("sample_forms/receipt/contoso-allinone.jpg", "rb") as f:
            poller = form_recognizer_client.begin_recognize_receipts(stream=f.read())
        receipts = poller.result()

        for idx, receipt in enumerate(receipts):
            print("--------Recognizing receipt #{}--------".format(idx))
            print("Receipt Type: {} has confidence: {}".format(receipt.receipt_type.type, receipt.receipt_type.confidence))
            print("Merchant Name: {} has confidence: {}".format(receipt.merchant_name.value, receipt.merchant_name.confidence))
            print("Transaction Date: {} has confidence: {}".format(receipt.transaction_date.value, receipt.transaction_date.confidence))
            print("Receipt items:")
            for item in receipt.receipt_items:
                print("...Item Name: {} has confidence: {}".format(item.name.value, item.name.confidence))
                print("...Item Quantity: {} has confidence: {}".format(item.quantity.value, item.quantity.confidence))
                print("...Individual Item Price: {} has confidence: {}".format(item.price.value, item.price.confidence))
                print("...Total Item Price: {} has confidence: {}".format(item.total_price.value, item.total_price.confidence))
            print("Subtotal: {} has confidence: {}".format(receipt.subtotal.value, receipt.subtotal.confidence))
            print("Tax: {} has confidence: {}".format(receipt.tax.value, receipt.tax.confidence))
            print("Tip: {} has confidence: {}".format(receipt.tip.value, receipt.tip.confidence))
            print("Total: {} has confidence: {}".format(receipt.total.value, receipt.total.confidence))
            print("--------------------------------------")
Пример #9
0
    def test_receipt_multipage(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
        with open(self.multipage_invoice_pdf, "rb") as fd:
            receipt = fd.read()
        poller = client.begin_recognize_receipts(receipt, include_text_content=True)
        result = poller.result()

        self.assertEqual(len(result), 3)
        receipt = result[0]
        self.assertEqual(receipt.fields.get("MerchantAddress").value, '123 Hobbit Lane 567 Main St. Redmond, WA Redmond, WA')
        self.assertEqual(receipt.fields.get("MerchantName").value, 'Bilbo Baggins')
        self.assertEqual(receipt.fields.get("MerchantPhoneNumber").value, '+15555555555')
        self.assertEqual(receipt.fields.get("Subtotal").value, 300.0)
        self.assertEqual(receipt.fields.get("Total").value, 100.0)
        self.assertEqual(receipt.page_range.first_page_number, 1)
        self.assertEqual(receipt.page_range.last_page_number, 1)
        self.assertFormPagesHasValues(receipt.pages)
        receipt_type = receipt.fields.get("ReceiptType")
        self.assertIsNotNone(receipt_type.confidence)
        self.assertEqual(receipt_type.value, 'Itemized')
        receipt = result[2]
        self.assertEqual(receipt.fields.get("MerchantAddress").value, '123 Hobbit Lane 567 Main St. Redmond, WA Redmond, WA')
        self.assertEqual(receipt.fields.get("MerchantName").value, 'Frodo Baggins')
        self.assertEqual(receipt.fields.get("MerchantPhoneNumber").value, '+15555555555')
        self.assertEqual(receipt.fields.get("Subtotal").value, 3000.0)
        self.assertEqual(receipt.fields.get("Total").value, 1000.0)
        self.assertEqual(receipt.page_range.first_page_number, 3)
        self.assertEqual(receipt.page_range.last_page_number, 3)
        self.assertFormPagesHasValues(receipt.pages)
        receipt_type = receipt.fields.get("ReceiptType")
        self.assertIsNotNone(receipt_type.confidence)
        self.assertEqual(receipt_type.value, 'Itemized')
Пример #10
0
    def test_receipt_jpg(self, resource_group, location,
                         form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read()

        poller = client.begin_recognize_receipts(receipt)

        result = poller.result()
        self.assertEqual(len(result), 1)
        receipt = result[0]
        self.assertEqual(receipt.merchant_address.value,
                         '123 Main Street Redmond, WA 98052')
        self.assertEqual(receipt.merchant_name.value, 'Contoso Contoso')
        self.assertEqual(receipt.merchant_phone_number.value, '+19876543210')
        self.assertEqual(receipt.subtotal.value, 11.7)
        self.assertEqual(receipt.tax.value, 1.17)
        self.assertEqual(receipt.tip.value, 1.63)
        self.assertEqual(receipt.total.value, 14.5)
        self.assertEqual(receipt.transaction_date.value,
                         date(year=2019, month=6, day=10))
        self.assertEqual(receipt.transaction_time.value,
                         time(hour=13, minute=59, second=0))
        self.assertEqual(receipt.page_range.first_page, 1)
        self.assertEqual(receipt.page_range.last_page, 1)
        self.assertFormPagesHasValues(receipt.pages)
        self.assertIsNotNone(receipt.receipt_type.confidence)
        self.assertEqual(receipt.receipt_type.type, 'Itemized')
        self.assertReceiptItemsHasValues(receipt.receipt_items,
                                         receipt.page_range.first_page, False)
Пример #11
0
 def test_damaged_file_bytes_io_fails_autodetect(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55")  # doesn't match any magic file numbers
     with self.assertRaises(ValueError):
         poller = client.begin_recognize_receipts(
             damaged_pdf,
         )
Пример #12
0
def analyze_form(endpoint, key, local_path):
    form_recognizer_client = FormRecognizerClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
        logging_enable=True)
    with open(local_path, "rb") as f:
        poller = form_recognizer_client.begin_recognize_receipts(receipt=f)
        poller.wait()
Пример #13
0
 def test_authentication_bad_key(self, resource_group, location,
                                 form_recognizer_account,
                                 form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_receipts(b"xx",
                                                  content_type="image/jpeg")
Пример #14
0
    def test_receipt_multipage_transform(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        with open(self.multipage_invoice_pdf, "rb") as fd:
            myfile = fd.read()

        poller = client.begin_recognize_receipts(
            receipt=myfile,
            include_text_content=True,
            cls=callback
        )

        result = poller.result()
        raw_response = responses[0]
        returned_model = responses[1]
        actual = raw_response.analyze_result.document_results
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results
        page_results = raw_response.analyze_result.page_results

        # check hardcoded values
        for receipt, actual in zip(returned_model, actual):
            if not actual.fields:  # second page is blank
                continue

            # check dict values
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantAddress"), actual.fields.get("MerchantAddress"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantName"), actual.fields.get("MerchantName"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantPhoneNumber"), actual.fields.get("MerchantPhoneNumber"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"), actual.fields.get("Subtotal"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"), actual.fields.get("Tax"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"), actual.fields.get("Tip"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Total"), actual.fields.get("Total"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionDate"), actual.fields.get("TransactionDate"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionTime"), actual.fields.get("TransactionTime"), read_results)

            # check page range
            self.assertEqual(receipt.page_range.first_page_number, actual.page_range[0])
            self.assertEqual(receipt.page_range.last_page_number, actual.page_range[1])

            # check receipt type
            receipt_type = receipt.fields.get("ReceiptType")
            self.assertEqual(receipt_type.confidence, actual.fields["ReceiptType"].confidence)
            self.assertEqual(receipt_type.value, actual.fields["ReceiptType"].value_string)

            # check receipt items
            self.assertReceiptItemsTransformCorrect(receipt.fields["Items"].value, actual.fields["Items"], read_results)

        # Check form pages
        self.assertFormPagesTransformCorrect(returned_model, read_results)
Пример #15
0
 def test_passing_bad_content_type_param_passed(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ValueError):
         poller = client.begin_recognize_receipts(
             myfile,
             content_type="application/jpeg"
         )
Пример #16
0
    def recognize_receipts(self):
        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
                                                            "..", "./sample_forms/receipt/contoso-allinone.jpg"))
        # [START recognize_receipts]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_receipts(receipt=f, locale="en-US")
        receipts = poller.result()

        for idx, receipt in enumerate(receipts):
            print("--------Recognizing receipt #{}--------".format(idx+1))
            receipt_type = receipt.fields.get("ReceiptType")
            if receipt_type:
                print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
            merchant_name = receipt.fields.get("MerchantName")
            if merchant_name:
                print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
            transaction_date = receipt.fields.get("TransactionDate")
            if transaction_date:
                print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))
            if receipt.fields.get("Items"):
                print("Receipt items:")
                for idx, item in enumerate(receipt.fields.get("Items").value):
                    print("...Item #{}".format(idx+1))
                    item_name = item.value.get("Name")
                    if item_name:
                        print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
                    item_quantity = item.value.get("Quantity")
                    if item_quantity:
                        print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
                    item_price = item.value.get("Price")
                    if item_price:
                        print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
                    item_total_price = item.value.get("TotalPrice")
                    if item_total_price:
                        print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))
            subtotal = receipt.fields.get("Subtotal")
            if subtotal:
                print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
            tax = receipt.fields.get("Tax")
            if tax:
                print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
            tip = receipt.fields.get("Tip")
            if tip:
                print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
            total = receipt.fields.get("Total")
            if total:
                print("Total: {} has confidence: {}".format(total.value, total.confidence))
            print("--------------------------------------")
Пример #17
0
 def test_receipt_bad_endpoint(self, formrecognizer_test_endpoint,
                               formrecognizer_test_api_key):
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_recognize_receipts(myfile)
Пример #18
0
    def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.unsupported_content_py, "rb") as fd:
            myfile = fd.read()

        with self.assertRaises(ValueError):
            poller = client.begin_recognize_receipts(
                myfile,
            )
Пример #19
0
 def test_passing_enum_content_type(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_png, "rb") as fd:
         myfile = fd.read()
     poller = client.begin_recognize_receipts(
         myfile,
         content_type=FormContentType.image_png
     )
     result = poller.result()
     self.assertIsNotNone(result)
Пример #20
0
    def test_blank_page(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.blank_pdf, "rb") as fd:
            blank = fd.read()
        poller = client.begin_recognize_receipts(
            blank,
        )
        result = poller.result()
        self.assertIsNotNone(result)
Пример #21
0
 def test_authentication_successful_key(self, resource_group, location,
                                        form_recognizer_account,
                                        form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     poller = client.begin_recognize_receipts(myfile)
     result = poller.result()
Пример #22
0
    def test_passing_unsupported_url_content_type(self, resource_group,
                                                  location,
                                                  form_recognizer_account,
                                                  form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(TypeError):
            poller = client.begin_recognize_receipts(
                "https://badurl.jpg", content_type="application/json")
Пример #23
0
    def test_receipt_multipage(self, resource_group, location,
                               form_recognizer_account,
                               form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.multipage_invoice_pdf, "rb") as fd:
            receipt = fd.read()
        poller = client.begin_recognize_receipts(receipt,
                                                 include_text_content=True)
        result = poller.result()

        self.assertEqual(len(result), 3)
        receipt = result[0]
        self.assertEqual(
            receipt.merchant_address.value,
            '123 Hobbit Lane 567 Main St. Redmond, WA Redmond, WA')
        self.assertEqual(receipt.merchant_name.value, 'Bilbo Baggins')
        self.assertEqual(receipt.merchant_phone_number.value, '+15555555555')
        self.assertEqual(receipt.subtotal.value, 300.0)
        # TODO: revert after service side fix
        self.assertIsNotNone(receipt.total.value)
        self.assertEqual(receipt.page_range.first_page, 1)
        self.assertEqual(receipt.page_range.last_page, 1)
        self.assertFormPagesHasValues(receipt.pages)
        self.assertIsNotNone(receipt.receipt_type.confidence)
        self.assertEqual(receipt.receipt_type.type, 'Itemized')
        receipt = result[2]
        self.assertEqual(
            receipt.merchant_address.value,
            '123 Hobbit Lane 567 Main St. Redmond, WA Redmond, WA')
        self.assertEqual(receipt.merchant_name.value, 'Frodo Baggins')
        self.assertEqual(receipt.merchant_phone_number.value, '+15555555555')
        self.assertEqual(receipt.subtotal.value, 3000.0)
        self.assertEqual(receipt.total.value, 1000.0)
        self.assertEqual(receipt.page_range.first_page, 3)
        self.assertEqual(receipt.page_range.last_page, 3)
        self.assertFormPagesHasValues(receipt.pages)
        self.assertIsNotNone(receipt.receipt_type.confidence)
        self.assertEqual(receipt.receipt_type.type, 'Itemized')
Пример #24
0
# <snippet_logging>
import sys
import logging
from azure.ai.formrecognizer import FormRecognizerClient
from azure.core.credentials import AzureKeyCredential

# Create a logger for the 'azure' SDK
logger = logging.getLogger('azure')
logger.setLevel(logging.DEBUG)

# Configure a console output
handler = logging.StreamHandler(stream=sys.stdout)
logger.addHandler(handler)

endpoint = "https://<my-custom-subdomain>.cognitiveservices.azure.com/"
credential = AzureKeyCredential("<api_key>")

# This client will log detailed information about its HTTP sessions, at DEBUG level
form_recognizer_client = FormRecognizerClient(endpoint,
                                              credential,
                                              logging_enable=True)
# </snippet_logging>

# <snippet_example>
poller = form_recognizer_client.begin_recognize_receipts(receipt,
                                                         logging_enable=True)
# </snippet_example>
Пример #25
0
def extract():
    session = Session()
    form_recognizer_client = FormRecognizerClient(ENDPOINT,
                                                  AzureKeyCredential(KEY))

    # receiptUrl = request.form.get('receipt')
    # user_id = request.form.get('user_id')

    user_id = request.form.get('user_id')
    receipt = request.files['receipt']

    poller = form_recognizer_client.begin_recognize_receipts(receipt=receipt)
    result = poller.result()

    receipts = []
    expenses = []

    for receipt in result:
        rec_name = "unknown"
        date = "unknown"
        num_items = 0
        total_price = 0
        for name, field in receipt.fields.items():
            if name == "Items":
                print("Receipt Items:")
                for idx, items in enumerate(field.value):
                    print("...Item #{}".format(idx + 1))
                    num_items += 1

                    exp_name = "unknown"
                    price = 0
                    quantity = 1
                    for item_name, item in items.value.items():
                        if item_name == "TotalPrice":
                            price = float(item.value)
                            total_price += float(item.value)
                        if item_name == "Name":
                            exp_name = item.value
                        if item_name == "quantity":
                            quantity = int(item.value)
                        print("......{}: {} has confidence {}".format(
                            item_name, item.value, item.confidence))
                    expense = Expense(item_name=exp_name,
                                      user_id=user_id,
                                      vendor_name=rec_name,
                                      date=date,
                                      price=price,
                                      quantity=quantity)
                    expenses.append(expense)
            else:
                if (name == "MerchantName"):
                    rec_name = field.value
                if (name == "TransactionDate"):
                    date = field.value
                print("{}: {} has confidence {}".format(
                    name, field.value, field.confidence))

        receipt = Receipt(name=rec_name,
                          date=date,
                          num_items=num_items,
                          total_price=total_price)
        for expense in expenses:
            receipt.expenses.append(expense)
        receipts.append(receipt)

    session.add_all(receipts)
    session.add_all(expenses)
    session.commit()

    return "Success"

    return
Пример #26
0
    def test_receipt_stream_transform_jpg(self, resource_group, location,
                                          form_recognizer_account,
                                          form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(
                AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_us_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        with open(self.receipt_jpg, "rb") as fd:
            myfile = fd.read()

        poller = client.begin_recognize_receipts(stream=myfile,
                                                 include_text_content=True,
                                                 cls=callback)

        result = poller.result()
        raw_response = responses[0]
        returned_model = responses[1]
        receipt = returned_model[0]
        actual = raw_response.analyze_result.document_results[0].fields
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results
        page_results = raw_response.analyze_result.page_results

        # check hardcoded values
        self.assertFormFieldTransformCorrect(receipt.merchant_address,
                                             actual.get("MerchantAddress"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.merchant_name,
                                             actual.get("MerchantName"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.merchant_phone_number,
                                             actual.get("MerchantPhoneNumber"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.subtotal,
                                             actual.get("Subtotal"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.tax, actual.get("Tax"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.tip, actual.get("Tip"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.total,
                                             actual.get("Total"), read_results)
        self.assertFormFieldTransformCorrect(receipt.transaction_date,
                                             actual.get("TransactionDate"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.transaction_time,
                                             actual.get("TransactionTime"),
                                             read_results)

        # check dict values
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantAddress"),
            actual.get("MerchantAddress"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantName"), actual.get("MerchantName"),
            read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantPhoneNumber"),
            actual.get("MerchantPhoneNumber"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"),
                                             actual.get("Subtotal"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"),
                                             actual.get("Tax"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"),
                                             actual.get("Tip"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Total"),
                                             actual.get("Total"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("TransactionDate"),
            actual.get("TransactionDate"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("TransactionTime"),
            actual.get("TransactionTime"), read_results)

        # check page range
        self.assertEqual(receipt.page_range.first_page,
                         document_results[0].page_range[0])
        self.assertEqual(receipt.page_range.last_page,
                         document_results[0].page_range[1])

        # check receipt type
        self.assertEqual(receipt.receipt_type.confidence,
                         actual["ReceiptType"].confidence)
        self.assertEqual(receipt.receipt_type.type,
                         actual["ReceiptType"].value_string)

        # check receipt items
        self.assertReceiptItemsTransformCorrect(receipt.receipt_items,
                                                actual["Items"], read_results)

        # Check form pages
        self.assertFormPagesTransformCorrect(receipt.pages, read_results)