async def test_custom_form_url_bad_endpoint(self, resource_group, location,
                                             form_recognizer_account,
                                             form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         poller = await client.begin_recognize_custom_forms_from_url(
             model_id="xx", form_url=self.form_url_jpg)
         result = await poller.result()
 async def test_custom_form_empty_model_id(self, resource_group, location,
                                           form_recognizer_account,
                                           form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms(model_id="",
                                                       form=b"xx")
 async def test_authentication_bad_key(self, resource_group, location,
                                       form_recognizer_account,
                                       form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_custom_forms(
                 model_id="xx", form=b"xx", content_type="image/jpeg")
             result = await poller.result()
Exemplo n.º 4
0
 async def test_custom_form_empty_model_id(self,
                                           formrecognizer_test_endpoint,
                                           formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms_from_url(
                 model_id="", form_url="https://badurl.jpg")
Exemplo n.º 5
0
 async def test_url_authentication_bad_key(self,
                                           formrecognizer_test_endpoint,
                                           formrecognizer_test_api_key):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_custom_forms_from_url(
                 model_id="xx", form_url=self.form_url_jpg)
             result = await poller.result()
Exemplo n.º 6
0
 async def test_id_document_bad_endpoint(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     with open(self.id_document_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_id_documents(myfile)
 async def test_custom_form_none_model_id(self,
                                          formrecognizer_test_endpoint,
                                          formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms(model_id=None,
                                                       form=b"xx")
    async def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.unsupported_content_py, "rb") as fd:
            myfile = fd.read()

        with self.assertRaises(ValueError):
            result = await client.recognize_content(
                myfile
            )
Exemplo n.º 9
0
 async def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_receipts_from_url(
                 self.receipt_url_jpg)
             result = await poller.result()
Exemplo n.º 10
0
    async def test_passing_bad_url(self, resource_group, location,
                                   form_recognizer_account,
                                   form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(HttpResponseError):
            result = await client.recognize_custom_forms_from_url(
                model_id="xx", form_url="https://badurl.jpg")
Exemplo n.º 11
0
    async def recognize_custom_forms(self):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..", "..",
                         "./sample_forms/forms/Form_1.jpg"))
        async with FormRecognizerClient(
                endpoint=self.endpoint, credential=AzureKeyCredential(
                    self.key)) as form_recognizer_client:

            # Make sure your form's type is included in the list of form types the custom model can recognize
            with open(path_to_sample_forms, "rb") as f:
                stream = f.read()
            forms_with_labeled_model = await form_recognizer_client.recognize_custom_forms(
                model_id=self.model_trained_with_labels_id, form=stream)
            forms_with_unlabeled_model = await form_recognizer_client.recognize_custom_forms(
                model_id=self.model_trained_without_labels_id, form=stream)

            # With a form recognized by a model trained with labels, this 'name' key will be its
            # training-time label, otherwise it will be denoted by numeric indices.
            # Label data is not returned for model trained with labels.
            print(
                "---------Recognizing forms with models trained with labels---------"
            )
            for labeled_form in forms_with_labeled_model:
                for name, field in labeled_form.fields.items():
                    print(
                        "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

            print(
                "------------------------------------------------------------------"
            )
            print(
                "-------Recognizing forms with models trained without labels-------"
            )
            for unlabeled_form in forms_with_unlabeled_model:
                for name, field in unlabeled_form.fields.items():
                    # The form recognized with a model trained with unlabeled data will also include data about your labels
                    print(
                        "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.label_data.text,
                            format_bounding_box(field.label_data.bounding_box),
                            field.confidence))
                    print(
                        "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))
    async def test_receipt_multipage_transform_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        poller = await client.begin_recognize_receipts_from_url(
            self.multipage_url_pdf,
            include_text_content=True,
            cls=callback
        )

        result = await poller.result()
        raw_response = responses[0]
        returned_model = responses[1]
        actual = raw_response.analyze_result.document_results
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results
        page_results = raw_response.analyze_result.page_results

        # check hardcoded values
        for receipt, actual in zip(returned_model, actual):
            if actual.fields is None:  # second page is blank
                continue

            # check dict values
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantAddress"), actual.fields.get("MerchantAddress"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantName"), actual.fields.get("MerchantName"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantPhoneNumber"), actual.fields.get("MerchantPhoneNumber"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"), actual.fields.get("Subtotal"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"), actual.fields.get("Tax"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"), actual.fields.get("Tip"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Total"), actual.fields.get("Total"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionDate"), actual.fields.get("TransactionDate"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionTime"), actual.fields.get("TransactionTime"), read_results)

            # check page range
            self.assertEqual(receipt.page_range.first_page_number, actual.page_range[0])
            self.assertEqual(receipt.page_range.last_page_number, actual.page_range[1])

            # check receipt type
            receipt_type = receipt.fields.get("ReceiptType")
            self.assertEqual(receipt_type.confidence, actual.fields["ReceiptType"].confidence)
            self.assertEqual(receipt_type.value, actual.fields["ReceiptType"].value_string)

            # check receipt items
            self.assertReceiptItemsTransformCorrect(receipt.fields["Items"].value, actual.fields["Items"], read_results)

        # Check form pages
        self.assertFormPagesTransformCorrect(returned_model, read_results)
Exemplo n.º 13
0
    async def recognize_receipts_from_url(self):
        # [START recognize_receipts_from_url_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        async with FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        ) as form_recognizer_client:
            url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
            poller = await form_recognizer_client.begin_recognize_receipts_from_url(receipt_url=url)
            receipts = await poller.result()

            for idx, receipt in enumerate(receipts):
                print("--------Recognizing receipt #{}--------".format(idx+1))
                receipt_type = receipt.fields.get("ReceiptType")
                if receipt_type:
                    print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
                merchant_name = receipt.fields.get("MerchantName")
                if merchant_name:
                    print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
                transaction_date = receipt.fields.get("TransactionDate")
                if transaction_date:
                    print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))
                if receipt.fields.get("Items"):
                    print("Receipt items:")
                    for idx, item in enumerate(receipt.fields.get("Items").value):
                        print("...Item #{}".format(idx+1))
                        item_name = item.value.get("Name")
                        if item_name:
                            print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
                        item_quantity = item.value.get("Quantity")
                        if item_quantity:
                            print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
                        item_price = item.value.get("Price")
                        if item_price:
                            print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
                        item_total_price = item.value.get("TotalPrice")
                        if item_total_price:
                            print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))
                subtotal = receipt.fields.get("Subtotal")
                if subtotal:
                    print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
                tax = receipt.fields.get("Tax")
                if tax:
                    print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
                tip = receipt.fields.get("Tip")
                if tip:
                    print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
                total = receipt.fields.get("Total")
                if total:
                    print("Total: {} has confidence: {}".format(total.value, total.confidence))
                print("--------------------------------------")
Exemplo n.º 14
0
 async def test_content_url_bad_endpoint(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         async with client:
             poller = await client.begin_recognize_content_from_url(
                 self.invoice_url_pdf)
             result = await poller.result()
 async def test_damaged_file_bytes_io_fails_autodetect(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55"
                           )  # doesn't match any magic file numbers
     with self.assertRaises(ValueError):
         poller = await client.begin_recognize_receipts(damaged_pdf, )
         result = await poller.result()
Exemplo n.º 16
0
 async def test_passing_enum_content_type(self, resource_group, location,
                                          form_recognizer_account,
                                          form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_png, "rb") as fd:
         myfile = fd.read()
     result = await client.recognize_receipts(
         myfile, content_type=FormContentType.image_png)
     self.assertIsNotNone(result)
Exemplo n.º 17
0
 async def test_damaged_file_passed_as_bytes_io(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(
         b"\x25\x50\x44\x46\x55\x55\x55"
     )  # still has correct bytes to be recognized as PDF
     with self.assertRaises(HttpResponseError):
         poller = await client.recognize_receipts(damaged_pdf, )
Exemplo n.º 18
0
    async def test_blank_page(self, resource_group, location,
                              form_recognizer_account,
                              form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with open(self.blank_pdf, "rb") as fd:
            blank = fd.read()
        result = await client.recognize_receipts(blank, )
        self.assertIsNotNone(result)
Exemplo n.º 19
0
 async def test_passing_bad_content_type_param_passed(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ValueError):
         result = await client.recognize_receipts(
             myfile, content_type="application/jpeg")
    async def recognize_content(self):
        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
                                                            "..", "..", "./sample_forms/forms/selection_mark_form.pdf"))
        # [START recognize_content_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        
        async with FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        ) as form_recognizer_client:

            with open(path_to_sample_forms, "rb") as f:
                poller = await form_recognizer_client.begin_recognize_content(form=f)

            form_pages = await poller.result()

            for idx, content in enumerate(form_pages):
                print("----Recognizing content from page #{}----".format(idx+1))
                print("Page has width: {} and height: {}, measured with unit: {}".format(
                    content.width,
                    content.height,
                    content.unit
                ))
                for table_idx, table in enumerate(content.tables):
                    print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
                    print("Table # {} location on page: {}".format(table_idx, format_bounding_box(table.bounding_box)))
                    for cell in table.cells:
                        print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
                            cell.row_index,
                            cell.column_index,
                            cell.text,
                            format_bounding_box(cell.bounding_box)
                        ))

                for line_idx, line in enumerate(content.lines):
                    print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
                        line_idx,
                        len(line.words),
                        line.text,
                        format_bounding_box(line.bounding_box)
                    ))
                    for word in line.words:
                        print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))

                for selection_mark in content.selection_marks:
                    print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                        selection_mark.state,
                        format_bounding_box(selection_mark.bounding_box),
                        selection_mark.confidence
                    ))
                print("----------------------------------------")
Exemplo n.º 21
0
 async def test_content_bad_endpoint(self, resource_group, location,
                                     form_recognizer_account,
                                     form_recognizer_account_key):
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         poller = await client.begin_recognize_content(myfile)
         result = await poller.result()
Exemplo n.º 22
0
 async def test_custom_form_bad_endpoint(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
     with open(self.form_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         result = await client.recognize_custom_forms(model_id="xx",
                                                      stream=myfile)
Exemplo n.º 23
0
 async def test_content_authentication_bad_key(self,
                                               formrecognizer_test_endpoint,
                                               formrecognizer_test_api_key,
                                               **kwargs):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with pytest.raises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_content(
                 b"xxx", content_type="application/pdf")
             result = await poller.result()
Exemplo n.º 24
0
    async def test_content_url_pass_stream(self, resource_group, location,
                                           form_recognizer_account,
                                           form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read(4)  # makes the recording smaller

        with self.assertRaises(HttpResponseError):
            result = await client.recognize_content_from_url(receipt)
Exemplo n.º 25
0
    async def test_content_multipage_url(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        result = await client.recognize_content_from_url(self.multipage_url_pdf
                                                         )

        self.assertEqual(len(result), 3)
        self.assertFormPagesHasValues(result)
    async def test_passing_unsupported_url_content_type(
            self, resource_group, location, form_recognizer_account,
            form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(TypeError):
            poller = await client.begin_recognize_receipts(
                "https://badurl.jpg", content_type="application/json")
            result = await poller.result()
Exemplo n.º 27
0
 async def test_content_bad_endpoint(self, formrecognizer_test_endpoint,
                                     formrecognizer_test_api_key, **kwargs):
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     with pytest.raises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_content(myfile)
             result = await poller.result()
 async def test_authentication_successful_key(self, resource_group,
                                              location,
                                              form_recognizer_account,
                                              form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     poller = await client.begin_recognize_receipts(myfile)
     result = await poller.result()
 async def test_damaged_file_bytes_io_fails_autodetect(
         self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55"
                           )  # doesn't match any magic file numbers
     with pytest.raises(ValueError):
         async with client:
             poller = await client.begin_recognize_receipts(damaged_pdf, )
             result = await poller.result()
    async def test_polling_interval(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key), polling_interval=7)
        self.assertEqual(client._client._config.polling_interval, 7)

        async with client:
            poller = await client.begin_recognize_receipts_from_url(self.receipt_url_jpg, polling_interval=6)
            await poller.wait()
            self.assertEqual(poller._polling_method._timeout, 6)
            poller2 = await client.begin_recognize_receipts_from_url(self.receipt_url_jpg)
            await poller2.wait()
            self.assertEqual(poller2._polling_method._timeout, 7)  # goes back to client default