Exemplo n.º 1
0
 async def test_document_analysis_empty_model_id(self, **kwargs):
     formrecognizer_test_endpoint = kwargs.pop("formrecognizer_test_endpoint")
     formrecognizer_test_api_key = kwargs.pop("formrecognizer_test_api_key")
     client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key))
     with pytest.raises(ValueError):
         async with client:
             await client.begin_analyze_document_from_url(model="", document_url="https://badurl.jpg")
Exemplo n.º 2
0
 async def test_receipt_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = DocumentAnalysisClient("http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_analyze_document("prebuilt-receipt", myfile)
             result = await poller.result()
Exemplo n.º 3
0
 async def test_analyze_document_empty_model_id(
         self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_analyze_document(model="", document=b"xx")
 async def test_active_directory_auth_async(self):
     token = self.generate_oauth_token()
     endpoint = self.get_oauth_endpoint()
     client = DocumentAnalysisClient(endpoint, token)
     async with client:
         poller = await client.begin_analyze_document_from_url(
             "prebuilt-receipt", self.receipt_url_jpg)
         result = await poller.result()
     assert result is not None
 async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     client = DocumentAnalysisClient(formrecognizer_test_endpoint,
                                     AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_analyze_document_from_url(
                 "prebuilt-receipt", self.receipt_url_jpg)
             result = await poller.result()
 async def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs):
     set_bodiless_matcher()
     with pytest.raises(ServiceRequestError):
         client = DocumentAnalysisClient("http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_analyze_document_from_url(
                 "prebuilt-receipt",
                 self.receipt_url_jpg
             )
             result = await poller.result()
Exemplo n.º 7
0
 async def test_document_analysis_none_model(self,
                                             formrecognizer_test_endpoint,
                                             formrecognizer_test_api_key):
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_analyze_document_from_url(
                 model=None, document_url="https://badurl.jpg")
 async def test_analyze_document_none_model_id(self,
                                               formrecognizer_test_endpoint,
                                               formrecognizer_test_api_key,
                                               **kwargs):
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with pytest.raises(ValueError):
         async with client:
             await client.begin_analyze_document(model=None, document=b"xx")
 async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key,
                                         **kwargs):
     set_bodiless_matcher()
     client = DocumentAnalysisClient(formrecognizer_test_endpoint,
                                     AzureKeyCredential("xxxx"))
     with pytest.raises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_analyze_document_from_url(
                 "prebuilt-receipt", self.receipt_url_jpg)
             result = await poller.result()
    async def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs):
        set_bodiless_matcher()
        client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7)
        assert client._client._config.polling_interval ==  7

        async with client:
            poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg, polling_interval=6)
            await poller.wait()
            assert poller._polling_method._timeout ==  6
            poller2 = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
            await poller2.wait()
            assert poller2._polling_method._timeout ==  7  # goes back to client default

        return {}
Exemplo n.º 11
0
async def authentication_with_api_key_credential_document_analysis_client_async():
    # [START create_da_client_with_key_async]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(endpoint, AzureKeyCredential(key))
    # [END create_da_client_with_key_async]
    async with document_analysis_client:
        poller = await document_analysis_client.begin_analyze_document_from_url(
            "prebuilt-layout", url
        )
        result = await poller.result()
Exemplo n.º 12
0
async def convert_to_and_from_dict_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        )
    )

    from azure.core.serialization import AzureJSONEncoder
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient
    from azure.ai.formrecognizer import AnalyzeResult

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-document", document=f
            )
        result = await poller.result()

    # convert the received model to a dictionary
    analyze_result_dict = result.to_dict()

    # save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
    # to help make types, such as dates, JSON serializable
    # NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
    with open('data.json', 'w') as f:
        json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

    # convert the dictionary back to the original model
    model = AnalyzeResult.from_dict(analyze_result_dict)

    # use the model as normal
    print("----Converted from dictionary AnalyzeResult----")
    print("Model ID: '{}'".format(model.model_id))
    print("Number of pages analyzed {}".format(len(model.pages)))
    print("API version used: {}".format(model.api_version))

    print("----------------------------------------")
Exemplo n.º 13
0
 async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key,
                                         **kwargs):
     # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
     set_custom_default_matcher(
         compare_bodies=False,
         excluded_headers=
         "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
     )
     client = DocumentAnalysisClient(formrecognizer_test_endpoint,
                                     AzureKeyCredential("xxxx"))
     with pytest.raises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_analyze_document_from_url(
                 "prebuilt-receipt", self.receipt_url_jpg)
             result = await poller.result()
async def get_words_on_document_line_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-document", document=f)
        result = await poller.result()

    for idx, page in enumerate(result.pages):
        print("----Analyzing lines and words from page #{}----".format(idx +
                                                                       1))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit))

        for line_idx, line in enumerate(page.lines):
            words = line.get_words()
            print(
                "...Line # {} has word count {} and text '{}' within bounding box '{}'"
                .format(
                    line_idx,
                    len(words),
                    line.content,
                    format_bounding_box(line.bounding_box),
                ))

            for word in words:
                print("......Word '{}' has a confidence of {}".format(
                    word.content, word.confidence))

    print("----------------------------------------")
Exemplo n.º 15
0
    async def test_mock_quota_exceeded_429(self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
        response = mock.Mock(
            status_code=429,
            headers={"Retry-After": 186688, "Content-Type": "application/json"},
            reason="Bad Request"
        )
        response.text = lambda encoding=None: json.dumps(
            {"error": {"code": "429", "message": "Out of call volume quota for FormRecognizer F0 pricing tier. "
            "Please retry after 1 day. To increase your call volume switch to a paid tier."}}
        )
        response.content_type = "application/json"
        transport = AsyncMockTransport(send=wrap_in_future(lambda request, **kwargs: response))

        client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), transport=transport)
        with pytest.raises(HttpResponseError) as e:
            poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
        assert e.value.status_code == 429
        assert e.value.error.message == 'Out of call volume quota for FormRecognizer F0 pricing tier. Please retry after 1 day. To increase your call volume switch to a paid tier.'
Exemplo n.º 16
0
    async def test_polling_interval(self, formrecognizer_test_endpoint,
                                    formrecognizer_test_api_key):
        client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(formrecognizer_test_api_key),
            polling_interval=7)
        self.assertEqual(client._client._config.polling_interval, 7)

        async with client:
            poller = await client.begin_analyze_document_from_url(
                "prebuilt-receipt", self.receipt_url_jpg, polling_interval=6)
            await poller.wait()
            self.assertEqual(poller._polling_method._timeout, 6)
            poller2 = await client.begin_analyze_document_from_url(
                "prebuilt-receipt", self.receipt_url_jpg)
            await poller2.wait()
            self.assertEqual(poller2._polling_method._timeout,
                             7)  # goes back to client default
Exemplo n.º 17
0
async def authentication_with_azure_active_directory_document_analysis_client_async(
):
    # [START create_da_client_with_aad_async]
    """DefaultAzureCredential will use the values from these environment
    variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
    """
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient
    from azure.identity.aio import DefaultAzureCredential

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    credential = DefaultAzureCredential()

    document_analysis_client = DocumentAnalysisClient(endpoint, credential)
    # [END create_da_client_with_aad_async]
    async with document_analysis_client:
        poller = await document_analysis_client.begin_analyze_document_from_url(
            "prebuilt-layout", url)
        result = await poller.result()
Exemplo n.º 18
0
    async def test_logging_info_dac_client(self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
        client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key))
        mock_handler = MockHandler()

        logger = logging.getLogger("azure")
        logger.addHandler(mock_handler)
        logger.setLevel(logging.INFO)
        async with client:
            poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
            result = await poller.result()

        for message in mock_handler.messages:
            if message.levelname == "INFO":
                # not able to use json.loads here. At INFO level only API key should be REDACTED
                if message.message.find("Ocp-Apim-Subscription-Key") != -1:
                    assert message.message.find("REDACTED") != -1
                else:
                    assert message.message.find("REDACTED") == -1
Exemplo n.º 19
0
    async def test_polling_interval(self, formrecognizer_test_endpoint,
                                    formrecognizer_test_api_key, **kwargs):
        # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
        set_custom_default_matcher(
            compare_bodies=False,
            excluded_headers=
            "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
        )
        client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(formrecognizer_test_api_key),
            polling_interval=7)
        assert client._client._config.polling_interval == 7

        async with client:
            poller = await client.begin_analyze_document_from_url(
                "prebuilt-receipt", self.receipt_url_jpg, polling_interval=6)
            await poller.wait()
            assert poller._polling_method._timeout == 6
            poller2 = await client.begin_analyze_document_from_url(
                "prebuilt-receipt", self.receipt_url_jpg)
            await poller2.wait()
            assert poller2._polling_method._timeout == 7  # goes back to client default
 def test_bad_api_version_document_analysis_client(self):
     with pytest.raises(ValueError) as excinfo:
         client = DocumentAnalysisClient("url", "key", api_version="9")
     assert "Unsupported API version '9'. Please select from: {}".format(
         ", ".join(v.value for v in DocumentAnalysisApiVersion)) == str(
             excinfo.value)
Exemplo n.º 21
0
async def analyze_invoice_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/sample_invoice.jpg",
        )
    )

    # [START analyze_invoices_async]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-invoice", document=f, locale="en-US"
            )
        invoices = await poller.result()

    for idx, invoice in enumerate(invoices.documents):
        print("--------Recognizing invoice #{}--------".format(idx + 1))
        vendor_name = invoice.fields.get("VendorName")
        if vendor_name:
            print(
                "Vendor Name: {} has confidence: {}".format(
                    vendor_name.value, vendor_name.confidence
                )
            )
        vendor_address = invoice.fields.get("VendorAddress")
        if vendor_address:
            print(
                "Vendor Address: {} has confidence: {}".format(
                    vendor_address.value, vendor_address.confidence
                )
            )
        vendor_address_recipient = invoice.fields.get("VendorAddressRecipient")
        if vendor_address_recipient:
            print(
                "Vendor Address Recipient: {} has confidence: {}".format(
                    vendor_address_recipient.value,
                    vendor_address_recipient.confidence,
                )
            )
        customer_name = invoice.fields.get("CustomerName")
        if customer_name:
            print(
                "Customer Name: {} has confidence: {}".format(
                    customer_name.value, customer_name.confidence
                )
            )
        customer_id = invoice.fields.get("CustomerId")
        if customer_id:
            print(
                "Customer Id: {} has confidence: {}".format(
                    customer_id.value, customer_id.confidence
                )
            )
        customer_address = invoice.fields.get("CustomerAddress")
        if customer_address:
            print(
                "Customer Address: {} has confidence: {}".format(
                    customer_address.value, customer_address.confidence
                )
            )
        customer_address_recipient = invoice.fields.get("CustomerAddressRecipient")
        if customer_address_recipient:
            print(
                "Customer Address Recipient: {} has confidence: {}".format(
                    customer_address_recipient.value,
                    customer_address_recipient.confidence,
                )
            )
        invoice_id = invoice.fields.get("InvoiceId")
        if invoice_id:
            print(
                "Invoice Id: {} has confidence: {}".format(
                    invoice_id.value, invoice_id.confidence
                )
            )
        invoice_date = invoice.fields.get("InvoiceDate")
        if invoice_date:
            print(
                "Invoice Date: {} has confidence: {}".format(
                    invoice_date.value, invoice_date.confidence
                )
            )
        invoice_total = invoice.fields.get("InvoiceTotal")
        if invoice_total:
            print(
                "Invoice Total: {} has confidence: {}".format(
                    invoice_total.value, invoice_total.confidence
                )
            )
        due_date = invoice.fields.get("DueDate")
        if due_date:
            print(
                "Due Date: {} has confidence: {}".format(
                    due_date.value, due_date.confidence
                )
            )
        purchase_order = invoice.fields.get("PurchaseOrder")
        if purchase_order:
            print(
                "Purchase Order: {} has confidence: {}".format(
                    purchase_order.value, purchase_order.confidence
                )
            )
        billing_address = invoice.fields.get("BillingAddress")
        if billing_address:
            print(
                "Billing Address: {} has confidence: {}".format(
                    billing_address.value, billing_address.confidence
                )
            )
        billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
        if billing_address_recipient:
            print(
                "Billing Address Recipient: {} has confidence: {}".format(
                    billing_address_recipient.value,
                    billing_address_recipient.confidence,
                )
            )
        shipping_address = invoice.fields.get("ShippingAddress")
        if shipping_address:
            print(
                "Shipping Address: {} has confidence: {}".format(
                    shipping_address.value, shipping_address.confidence
                )
            )
        shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient")
        if shipping_address_recipient:
            print(
                "Shipping Address Recipient: {} has confidence: {}".format(
                    shipping_address_recipient.value,
                    shipping_address_recipient.confidence,
                )
            )
        print("Invoice items:")
        for idx, item in enumerate(invoice.fields.get("Items").value):
            print("...Item #{}".format(idx + 1))
            item_description = item.value.get("Description")
            if item_description:
                print(
                    "......Description: {} has confidence: {}".format(
                        item_description.value, item_description.confidence
                    )
                )
            item_quantity = item.value.get("Quantity")
            if item_quantity:
                print(
                    "......Quantity: {} has confidence: {}".format(
                        item_quantity.value, item_quantity.confidence
                    )
                )
            unit = item.value.get("Unit")
            if unit:
                print(
                    "......Unit: {} has confidence: {}".format(
                        unit.value, unit.confidence
                    )
                )
            unit_price = item.value.get("UnitPrice")
            if unit_price:
                print(
                    "......Unit Price: {} has confidence: {}".format(
                        unit_price.value, unit_price.confidence
                    )
                )
            product_code = item.value.get("ProductCode")
            if product_code:
                print(
                    "......Product Code: {} has confidence: {}".format(
                        product_code.value, product_code.confidence
                    )
                )
            item_date = item.value.get("Date")
            if item_date:
                print(
                    "......Date: {} has confidence: {}".format(
                        item_date.value, item_date.confidence
                    )
                )
            tax = item.value.get("Tax")
            if tax:
                print(
                    "......Tax: {} has confidence: {}".format(tax.value, tax.confidence)
                )
            amount = item.value.get("Amount")
            if amount:
                print(
                    "......Amount: {} has confidence: {}".format(
                        amount.value, amount.confidence
                    )
                )
        subtotal = invoice.fields.get("SubTotal")
        if subtotal:
            print(
                "Subtotal: {} has confidence: {}".format(
                    subtotal.value, subtotal.confidence
                )
            )
        total_tax = invoice.fields.get("TotalTax")
        if total_tax:
            print(
                "Total Tax: {} has confidence: {}".format(
                    total_tax.value, total_tax.confidence
                )
            )
        previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance")
        if previous_unpaid_balance:
            print(
                "Previous Unpaid Balance: {} has confidence: {}".format(
                    previous_unpaid_balance.value,
                    previous_unpaid_balance.confidence,
                )
            )
        amount_due = invoice.fields.get("AmountDue")
        if amount_due:
            print(
                "Amount Due: {} has confidence: {}".format(
                    amount_due.value, amount_due.confidence
                )
            )
        service_start_date = invoice.fields.get("ServiceStartDate")
        if service_start_date:
            print(
                "Service Start Date: {} has confidence: {}".format(
                    service_start_date.value, service_start_date.confidence
                )
            )
        service_end_date = invoice.fields.get("ServiceEndDate")
        if service_end_date:
            print(
                "Service End Date: {} has confidence: {}".format(
                    service_end_date.value, service_end_date.confidence
                )
            )
        service_address = invoice.fields.get("ServiceAddress")
        if service_address:
            print(
                "Service Address: {} has confidence: {}".format(
                    service_address.value, service_address.confidence
                )
            )
        service_address_recipient = invoice.fields.get("ServiceAddressRecipient")
        if service_address_recipient:
            print(
                "Service Address Recipient: {} has confidence: {}".format(
                    service_address_recipient.value,
                    service_address_recipient.confidence,
                )
            )
        remittance_address = invoice.fields.get("RemittanceAddress")
        if remittance_address:
            print(
                "Remittance Address: {} has confidence: {}".format(
                    remittance_address.value, remittance_address.confidence
                )
            )
        remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient")
        if remittance_address_recipient:
            print(
                "Remittance Address Recipient: {} has confidence: {}".format(
                    remittance_address_recipient.value,
                    remittance_address_recipient.confidence,
                )
            )
Exemplo n.º 22
0
async def analyze_read():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))

    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-read", document=f)
        result = await poller.result()

    print("----Languages detected in the document----")
    for language in result.languages:
        print("Language code: '{}' with confidence {}".format(
            language.language_code, language.confidence))

    for page in result.pages:
        print("----Analyzing document from page #{}----".format(
            page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit))

        for line_idx, line in enumerate(page.lines):
            words = line.get_words()
            print(
                "...Line # {} has {} words and text '{}' within bounding box '{}'"
                .format(
                    line_idx,
                    len(words),
                    line.content,
                    format_bounding_box(line.bounding_box),
                ))

            for word in words:
                print("......Word '{}' has a confidence of {}".format(
                    word.content, word.confidence))

        for selection_mark in page.selection_marks:
            print(
                "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}"
                .format(
                    selection_mark.state,
                    format_bounding_box(selection_mark.bounding_box),
                    selection_mark.confidence,
                ))

    print("----------------------------------------")
async def analyze_layout_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/form_selection_mark.png",
        )
    )

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-layout", document=f
            )
        result = await poller.result()

    for idx, style in enumerate(result.styles):
        print(
            "Document contains {} content".format(
                "handwritten" if style.is_handwritten else "no handwritten"
            )
        )

    for idx, page in enumerate(result.pages):
        print("----Analyzing layout from page #{}----".format(idx + 1))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            words = line.get_words()
            print(
                "...Line # {} has word count {} and text '{}' within bounding box '{}'".format(
                    line_idx,
                    len(words),
                    line.content,
                    format_bounding_box(line.bounding_box),
                )
            )

            for word in words:
                print(
                    "......Word '{}' has a confidence of {}".format(
                        word.content, word.confidence
                    )
                )

        for selection_mark in page.selection_marks:
            print(
                "Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                    selection_mark.state,
                    format_bounding_box(selection_mark.bounding_box),
                    selection_mark.confidence,
                )
            )

    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )
        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {} is {}".format(
                    table_idx,
                    region.page_number,
                    format_bounding_box(region.bounding_box),
                )
            )
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has text '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.content,
                )
            )
            for region in cell.bounding_regions:
                print(
                    "...content on page {} is within bounding box '{}'".format(
                        region.page_number,
                        format_bounding_box(region.bounding_box),
                    )
                )

    print("----------------------------------------")
async def analyze_business_card_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/business_cards/business-card-english.jpg",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-businessCard", document=f, locale="en-US")
        business_cards = await poller.result()

    for idx, business_card in enumerate(business_cards.documents):
        print("--------Analyzing business card #{}--------".format(idx + 1))
        contact_names = business_card.fields.get("ContactNames")
        if contact_names:
            for contact_name in contact_names.value:
                print("Contact First Name: {} has confidence: {}".format(
                    contact_name.value["FirstName"].value,
                    contact_name.value["FirstName"].confidence,
                ))
                print("Contact Last Name: {} has confidence: {}".format(
                    contact_name.value["LastName"].value,
                    contact_name.value["LastName"].confidence,
                ))
        company_names = business_card.fields.get("CompanyNames")
        if company_names:
            for company_name in company_names.value:
                print("Company Name: {} has confidence: {}".format(
                    company_name.value, company_name.confidence))
        departments = business_card.fields.get("Departments")
        if departments:
            for department in departments.value:
                print("Department: {} has confidence: {}".format(
                    department.value, department.confidence))
        job_titles = business_card.fields.get("JobTitles")
        if job_titles:
            for job_title in job_titles.value:
                print("Job Title: {} has confidence: {}".format(
                    job_title.value, job_title.confidence))
        emails = business_card.fields.get("Emails")
        if emails:
            for email in emails.value:
                print("Email: {} has confidence: {}".format(
                    email.value, email.confidence))
        websites = business_card.fields.get("Websites")
        if websites:
            for website in websites.value:
                print("Website: {} has confidence: {}".format(
                    website.value, website.confidence))
        addresses = business_card.fields.get("Addresses")
        if addresses:
            for address in addresses.value:
                print("Address: {} has confidence: {}".format(
                    address.value, address.confidence))
        mobile_phones = business_card.fields.get("MobilePhones")
        if mobile_phones:
            for phone in mobile_phones.value:
                print("Mobile phone number: {} has confidence: {}".format(
                    phone.content, phone.confidence))
        faxes = business_card.fields.get("Faxes")
        if faxes:
            for fax in faxes.value:
                print("Fax number: {} has confidence: {}".format(
                    fax.content, fax.confidence))
        work_phones = business_card.fields.get("WorkPhones")
        if work_phones:
            for work_phone in work_phones.value:
                print("Work phone number: {} has confidence: {}".format(
                    work_phone.content, work_phone.confidence))
        other_phones = business_card.fields.get("OtherPhones")
        if other_phones:
            for other_phone in other_phones.value:
                print("Other phone number: {} has confidence: {}".format(
                    other_phone.value, other_phone.confidence))
Exemplo n.º 25
0
async def analyze_receipts_from_url_async():
    # [START analyze_receipts_from_url_async]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
        poller = await document_analysis_client.begin_analyze_document_from_url(
            "prebuilt-receipt", document_url=url)
        receipts = await poller.result()

    for idx, receipt in enumerate(receipts.documents):
        print("--------Recognizing receipt #{}--------".format(idx + 1))
        receipt_type = receipt.fields.get("ReceiptType")
        if receipt_type:
            print("Receipt Type: {} has confidence: {}".format(
                receipt_type.value, receipt_type.confidence))
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print("Merchant Name: {} has confidence: {}".format(
                merchant_name.value, merchant_name.confidence))
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print("Transaction Date: {} has confidence: {}".format(
                transaction_date.value, transaction_date.confidence))
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx + 1))
                item_name = item.value.get("Name")
                if item_name:
                    print("......Item Name: {} has confidence: {}".format(
                        item_name.value, item_name.confidence))
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print("......Item Quantity: {} has confidence: {}".format(
                        item_quantity.value, item_quantity.confidence))
                item_price = item.value.get("Price")
                if item_price:
                    print("......Individual Item Price: {} has confidence: {}".
                          format(item_price.value, item_price.confidence))
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print(
                        "......Total Item Price: {} has confidence: {}".format(
                            item_total_price.value,
                            item_total_price.confidence))
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print("Subtotal: {} has confidence: {}".format(
                subtotal.value, subtotal.confidence))
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value,
                                                      tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value,
                                                      tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(
                total.value, total.confidence))
        print("--------------------------------------")
async def analyze_tax_us_w2_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/tax/sample_w2.png",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-tax.us.w2", document=f, locale="en-US")
        w2s = await poller.result()

    for idx, w2 in enumerate(w2s.documents):
        print("--------Recognizing US Tax W-2 Form #{}--------".format(idx +
                                                                       1))
        form_variant = w2.fields.get("W2FormVariant")
        if form_variant:
            print("Form variant: {} has confidence: {}".format(
                form_variant.value, form_variant.confidence))
        tax_year = w2.fields.get("TaxYear")
        if tax_year:
            print("Tax year: {} has confidence: {}".format(
                tax_year.value, tax_year.confidence))
        w2_copy = w2.fields.get("W2Copy")
        if w2_copy:
            print("W-2 Copy: {} has confidence: {}".format(
                w2_copy.value,
                w2_copy.confidence,
            ))
        employee = w2.fields.get("Employee")
        if employee:
            print("Employee data:")
            employee_name = employee.value.get("Name")
            if employee_name:
                print("...Name: {} has confidence: {}".format(
                    employee_name.value, employee_name.confidence))
            employee_ssn = employee.value.get("SocialSecurityNumber")
            if employee_ssn:
                print("...SSN: {} has confidence: {}".format(
                    employee_ssn.value, employee_ssn.confidence))
            employee_address = employee.value.get("Address")
            if employee_address:
                print("...Address: {} has confidence: {}".format(
                    employee_address.value, employee_address.confidence))
            employee_zipcode = employee.value.get("ZipCode")
            if employee_zipcode:
                print("...Zipcode: {} has confidence: {}".format(
                    employee_zipcode.value, employee_zipcode.confidence))
        control_number = w2.fields.get("ControlNumber")
        if control_number:
            print("Control Number: {} has confidence: {}".format(
                control_number.value, control_number.confidence))
        employer = w2.fields.get("Employer")
        if employer:
            print("Employer data:")
            employer_name = employer.value.get("Name")
            if employer_name:
                print("...Name: {} has confidence: {}".format(
                    employer_name.value, employer_name.confidence))
            employer_id = employer.value.get("IdNumber")
            if employer_id:
                print("...ID Number: {} has confidence: {}".format(
                    employer_id.value, employer_id.confidence))
            employer_address = employer.value.get("Address")
            if employer_address:
                print("...Address: {} has confidence: {}".format(
                    employer_address.value, employer_address.confidence))
            employer_zipcode = employer.value.get("ZipCode")
            if employer_zipcode:
                print("...Zipcode: {} has confidence: {}".format(
                    employer_zipcode.value, employer_zipcode.confidence))
        wages_tips = w2.fields.get("WagesTipsAndOtherCompensation")
        if wages_tips:
            print("Wages, tips, and other compensation: {} has confidence: {}".
                  format(
                      wages_tips.value,
                      wages_tips.confidence,
                  ))
        fed_income_tax_withheld = w2.fields.get("FederalIncomeTaxWithheld")
        if fed_income_tax_withheld:
            print("Federal income tax withheld: {} has confidence: {}".format(
                fed_income_tax_withheld.value,
                fed_income_tax_withheld.confidence))
        social_security_wages = w2.fields.get("SocialSecurityWages")
        if social_security_wages:
            print("Social Security wages: {} has confidence: {}".format(
                social_security_wages.value, social_security_wages.confidence))
        social_security_tax_withheld = w2.fields.get(
            "SocialSecurityTaxWithheld")
        if social_security_tax_withheld:
            print("Social Security tax withheld: {} has confidence: {}".format(
                social_security_tax_withheld.value,
                social_security_tax_withheld.confidence))
        medicare_wages_tips = w2.fields.get("MedicareWagesAndTips")
        if medicare_wages_tips:
            print("Medicare wages and tips: {} has confidence: {}".format(
                medicare_wages_tips.value, medicare_wages_tips.confidence))
        medicare_tax_withheld = w2.fields.get("MedicareTaxWithheld")
        if medicare_tax_withheld:
            print("Medicare tax withheld: {} has confidence: {}".format(
                medicare_tax_withheld.value, medicare_tax_withheld.confidence))
        social_security_tips = w2.fields.get("SocialSecurityTips")
        if social_security_tips:
            print("Social Security tips: {} has confidence: {}".format(
                social_security_tips.value, social_security_tips.confidence))
        allocated_tips = w2.fields.get("AllocatedTips")
        if allocated_tips:
            print("Allocated tips: {} has confidence: {}".format(
                allocated_tips.value,
                allocated_tips.confidence,
            ))
        verification_code = w2.fields.get("VerificationCode")
        if verification_code:
            print("Verification code: {} has confidence: {}".format(
                verification_code.value, verification_code.confidence))
        dependent_care_benefits = w2.fields.get("DependentCareBenefits")
        if dependent_care_benefits:
            print("Dependent care benefits: {} has confidence: {}".format(
                dependent_care_benefits.value,
                dependent_care_benefits.confidence,
            ))
        non_qualified_plans = w2.fields.get("NonQualifiedPlans")
        if non_qualified_plans:
            print("Non-qualified plans: {} has confidence: {}".format(
                non_qualified_plans.value,
                non_qualified_plans.confidence,
            ))
        additional_info = w2.fields.get("AdditionalInfo")
        if additional_info:
            print("Additional information:")
            for item in additional_info.value:
                letter_code = item.value.get("LetterCode")
                if letter_code:
                    print("...Letter code: {} has confidence: {}".format(
                        letter_code.value, letter_code.confidence))
                amount = item.value.get("Amount")
                if amount:
                    print("...Amount: {} has confidence: {}".format(
                        amount.value, amount.confidence))
        is_statutory_employee = w2.fields.get("IsStatutoryEmployee")
        if is_statutory_employee:
            print("Is statutory employee: {} has confidence: {}".format(
                is_statutory_employee.value, is_statutory_employee.confidence))
        is_retirement_plan = w2.fields.get("IsRetirementPlan")
        if is_retirement_plan:
            print("Is retirement plan: {} has confidence: {}".format(
                is_retirement_plan.value, is_retirement_plan.confidence))
        third_party_sick_pay = w2.fields.get("IsThirdPartySickPay")
        if third_party_sick_pay:
            print("Is third party sick pay: {} has confidence: {}".format(
                third_party_sick_pay.value, third_party_sick_pay.confidence))
        other_info = w2.fields.get("Other")
        if other_info:
            print("Other information: {} has confidence: {}".format(
                other_info.value,
                other_info.confidence,
            ))
        state_tax_info = w2.fields.get("StateTaxInfos")
        if state_tax_info:
            print("State Tax info:")
            for tax in state_tax_info.value:
                state = tax.value.get("State")
                if state:
                    print("...State: {} has confidence: {}".format(
                        state.value, state.confidence))
                employer_state_id_number = tax.value.get(
                    "EmployerStateIdNumber")
                if employer_state_id_number:
                    print("...Employer state ID number: {} has confidence: {}".
                          format(employer_state_id_number.value,
                                 employer_state_id_number.confidence))
                state_wages_tips = tax.value.get("StateWagesTipsEtc")
                if state_wages_tips:
                    print("...State wages, tips, etc: {} has confidence: {}".
                          format(state_wages_tips.value,
                                 state_wages_tips.confidence))
                state_income_tax = tax.value.get("StateIncomeTax")
                if state_income_tax:
                    print("...State income tax: {} has confidence: {}".format(
                        state_income_tax.value, state_income_tax.confidence))
        local_tax_info = w2.fields.get("LocalTaxInfos")
        if local_tax_info:
            print("Local Tax info:")
            for tax in local_tax_info.value:
                local_wages_tips = tax.value.get("LocalWagesTipsEtc")
                if local_wages_tips:
                    print("...Local wages, tips, etc: {} has confidence: {}".
                          format(local_wages_tips.value,
                                 local_wages_tips.confidence))
                local_income_tax = tax.value.get("LocalIncomeTax")
                if local_income_tax:
                    print("...Local income tax: {} has confidence: {}".format(
                        local_income_tax.value, local_income_tax.confidence))
                locality_name = tax.value.get("LocalityName")
                if locality_name:
                    print("...Locality name: {} has confidence: {}".format(
                        locality_name.value, locality_name.confidence))
async def get_elements_with_spans_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        )
    )

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-document", document=f
            )
        result = await poller.result()

    # Below is a method to search for the lines of a particular element by using spans.
    # This example uses DocumentTable, but other elements that also have a `spans` or `span` field
    # can also be used to search for related elements, such as lines in this case.
    # To see an example for searching for words which have a `span` field, see
    # `sample_get_words_on_document_line.py` under the samples v3.2-beta directory.
    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )

        lines = []

        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {}".format(
                    table_idx,
                    region.page_number,
                )
            )
            lines.extend(get_lines(table.spans, get_page(region.page_number, result.pages)))

        print("Found # {} lines in the table".format(len(lines)))
        for line in lines:
            print(
                "...Line '{}' is within bounding box: '{}'".format(
                    line.content,
                    line.bounding_box,
                )
            )

    # Below is a method to search for the style of a particular element by using spans.
    # This example uses DocumentEntity, but other elements that also have a `spans` or `span`
    # field can also be used to search for document text style.
    for entity in result.entities:
        styles = get_styles(entity.spans, result.styles)
        print(
            "Found entity '{}' of type '{}' with style:".format(
                entity.content, entity.category,
            )
        )
        if not styles:
            print(
                "...no handwritten text found"
            )
        for style in styles:
            if style.is_handwritten:
                print(
                    "...handwritten with confidence {}".format(style.confidence)
                )
    print("----------------------------------------")
Exemplo n.º 28
0
async def analyze_receipts_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/receipt/contoso-allinone.jpg",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-receipt", document=f, locale="en-US")
        receipts = await poller.result()

    for idx, receipt in enumerate(receipts.documents):
        print("--------Recognizing receipt #{}--------".format(idx + 1))
        receipt_type = receipt.fields.get("ReceiptType")
        if receipt_type:
            print("Receipt Type: {} has confidence: {}".format(
                receipt_type.value, receipt_type.confidence))
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print("Merchant Name: {} has confidence: {}".format(
                merchant_name.value, merchant_name.confidence))
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print("Transaction Date: {} has confidence: {}".format(
                transaction_date.value, transaction_date.confidence))
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx + 1))
                item_name = item.value.get("Name")
                if item_name:
                    print("......Item Name: {} has confidence: {}".format(
                        item_name.value, item_name.confidence))
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print("......Item Quantity: {} has confidence: {}".format(
                        item_quantity.value, item_quantity.confidence))
                item_price = item.value.get("Price")
                if item_price:
                    print("......Individual Item Price: {} has confidence: {}".
                          format(item_price.value, item_price.confidence))
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print(
                        "......Total Item Price: {} has confidence: {}".format(
                            item_total_price.value,
                            item_total_price.confidence))
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print("Subtotal: {} has confidence: {}".format(
                subtotal.value, subtotal.confidence))
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value,
                                                      tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value,
                                                      tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(
                total.value, total.confidence))
        print("--------------------------------------")
async def analyze_identity_documents_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/id_documents/license.jpg",
        ))

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-idDocument", document=f)
        id_documents = await poller.result()

    for idx, id_document in enumerate(id_documents.documents):
        print("--------Recognizing ID document #{}--------".format(idx + 1))
        first_name = id_document.fields.get("FirstName")
        if first_name:
            print("First Name: {} has confidence: {}".format(
                first_name.value, first_name.confidence))
        last_name = id_document.fields.get("LastName")
        if last_name:
            print("Last Name: {} has confidence: {}".format(
                last_name.value, last_name.confidence))
        document_number = id_document.fields.get("DocumentNumber")
        if document_number:
            print("Document Number: {} has confidence: {}".format(
                document_number.value, document_number.confidence))
        dob = id_document.fields.get("DateOfBirth")
        if dob:
            print("Date of Birth: {} has confidence: {}".format(
                dob.value, dob.confidence))
        doe = id_document.fields.get("DateOfExpiration")
        if doe:
            print("Date of Expiration: {} has confidence: {}".format(
                doe.value, doe.confidence))
        sex = id_document.fields.get("Sex")
        if sex:
            print("Sex: {} has confidence: {}".format(sex.value,
                                                      sex.confidence))
        address = id_document.fields.get("Address")
        if address:
            print("Address: {} has confidence: {}".format(
                address.value, address.confidence))
        country_region = id_document.fields.get("CountryRegion")
        if country_region:
            print("Country/Region: {} has confidence: {}".format(
                country_region.value, country_region.confidence))
        region = id_document.fields.get("Region")
        if region:
            print("Region: {} has confidence: {}".format(
                region.value, region.confidence))
Exemplo n.º 30
0
async def analyze_custom_documents_async(custom_model_id):
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        )
    )
    # [START analyze_custom_documents_async]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
    model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id)

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

    async with document_analysis_client:
        # Make sure your document's type is included in the list of document types the custom model can analyze
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                model=model_id, document=f
            )
        result = await poller.result()

    for idx, document in enumerate(result.documents):
        print("--------Analyzing document #{}--------".format(idx + 1))
        print("Document has type {}".format(document.doc_type))
        print("Document has document type confidence {}".format(document.confidence))
        print("Document was analyzed with model with ID {}".format(result.model_id))
        for name, field in document.fields.items():
            field_value = field.value if field.value else field.content
            print("......found field of type '{}' with value '{}' and with confidence {}".format(field.value_type, field_value, field.confidence))

    # iterate over tables, lines, and selection marks on each page
    for page in result.pages:
        print("\nLines found on page {}".format(page.page_number))
        for line in page.lines:
            print("...Line '{}'".format(line.content))
        for word in page.words:
            print(
                "...Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )
        if page.selection_marks:
            print("\nSelection marks found on page {}".format(page.page_number))
            for selection_mark in page.selection_marks:
                print(
                    "...Selection mark is '{}' and has a confidence of {}".format(
                        selection_mark.state, selection_mark.confidence
                    )
                )

    for i, table in enumerate(result.tables):
        print("\nTable {} can be found on page:".format(i + 1))
        for region in table.bounding_regions:
            print("...{}".format(i + 1, region.page_number))
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has text '{}'".format(
                    cell.row_index, cell.column_index, cell.content
                )
            )
    print("-----------------------------------")