Пример #1
0
 def callback(raw_response, _, headers):
     analyze_result = da_client._deserialize(AnalyzeResultOperation,
                                             raw_response)
     document = AnalyzeResult._from_generated(
         analyze_result.analyze_result)
     responses.append(analyze_result)
     responses.append(document)
    async def test_receipt_multipage(self, client):
        with open(self.multipage_receipt_pdf, "rb") as fd:
            receipt = fd.read()
        async with client:
            poller = await client.begin_analyze_document("prebuilt-receipt", receipt)
            result = await poller.result()

        d = result.to_dict()
        result = AnalyzeResult.from_dict(d)

        assert len(result.documents) == 2
        receipt = result.documents[0]
        assert receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond ==  WA 98052'
        assert receipt.fields.get("MerchantName").value ==  'Contoso'
        assert receipt.fields.get("MerchantPhoneNumber").value ==  '+19876543210'
        assert receipt.fields.get("Subtotal").value ==  11.7
        assert receipt.fields.get("TotalTax").value ==  1.17
        assert receipt.fields.get("Tip").value ==  1.63
        assert receipt.fields.get("Total").value ==  14.5
        assert receipt.fields.get("TransactionDate").value ==  date(year=2019, month=6, day=10)
        assert receipt.fields.get("TransactionTime").value ==  time(hour=13, minute=59, second=0)
        assert receipt.doc_type == "receipt.retailMeal"
        receipt = result.documents[1]
        assert receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond ==  WA 98052'
        assert receipt.fields.get("MerchantName").value ==  'Contoso'
        assert receipt.fields.get("Subtotal").value ==  1098.99
        assert receipt.fields.get("TotalTax").value ==  104.4
        assert receipt.fields.get("Total").value ==  1203.39
        assert receipt.fields.get("TransactionDate").value == date(year=2019, month=6, day=10)
        assert receipt.fields.get("TransactionTime").value == time(hour=13, minute=59, second=0)
        assert receipt.doc_type == "receipt.retailMeal"
        assert len(result.pages) == 2

        return {}
 def callback(raw_response, _, headers):
     analyze_result = client._deserialize(AnalyzeResultOperation,
                                          raw_response)
     extracted_receipt = AnalyzeResult._from_generated(
         analyze_result.analyze_result)
     responses.append(analyze_result)
     responses.append(extracted_receipt)
Пример #4
0
    async def test_receipt_multipage(self, client):
        with open(self.multipage_receipt_pdf, "rb") as fd:
            receipt = fd.read()
        async with client:
            poller = await client.begin_analyze_document(
                "prebuilt-receipt", receipt)
            result = await poller.result()

        d = result.to_dict()
        result = AnalyzeResult.from_dict(d)

        self.assertEqual(len(result.documents), 2)
        receipt = result.documents[0]
        self.assertEqual(
            receipt.fields.get("MerchantAddress").value,
            '123 Main Street Redmond, WA 98052')
        self.assertEqual(receipt.fields.get("MerchantName").value, 'Contoso')
        self.assertEqual(
            receipt.fields.get("MerchantPhoneNumber").value, '+19876543210')
        self.assertEqual(receipt.fields.get("Subtotal").value, 11.7)
        self.assertEqual(receipt.fields.get("Tax").value, 1.17)
        self.assertEqual(receipt.fields.get("Tip").value, 1.63)
        self.assertEqual(receipt.fields.get("Total").value, 14.5)
        self.assertEqual(
            receipt.fields.get("TransactionDate").value,
            date(year=2019, month=6, day=10))
        self.assertEqual(
            receipt.fields.get("TransactionTime").value,
            time(hour=13, minute=59, second=0))
        receipt_type = receipt.fields.get("ReceiptType")
        self.assertIsNotNone(receipt_type.confidence)
        self.assertEqual(receipt_type.value, 'Itemized')
        receipt = result.documents[1]
        self.assertEqual(
            receipt.fields.get("MerchantAddress").value,
            '123 Main Street Redmond, WA 98052')
        self.assertEqual(receipt.fields.get("MerchantName").value, 'Contoso')
        self.assertEqual(receipt.fields.get("Subtotal").value, 1098.99)
        self.assertEqual(receipt.fields.get("Tax").value, 104.4)
        self.assertEqual(receipt.fields.get("Total").value, 1203.39)
        self.assertEqual(
            receipt.fields.get("TransactionDate").value,
            date(year=2019, month=6, day=10))
        self.assertEqual(
            receipt.fields.get("TransactionTime").value,
            time(hour=13, minute=59, second=0))
        receipt_type = receipt.fields.get("ReceiptType")
        self.assertIsNotNone(receipt_type.confidence)
        self.assertEqual(receipt_type.value, 'Itemized')

        self.assertEqual(len(result.pages), 2)
Пример #5
0
async def convert_to_and_from_dict_async():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        )
    )

    from azure.core.serialization import AzureJSONEncoder
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient
    from azure.ai.formrecognizer import AnalyzeResult

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    async with document_analysis_client:
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                "prebuilt-document", document=f
            )
        result = await poller.result()

    # convert the received model to a dictionary
    analyze_result_dict = result.to_dict()

    # save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder
    # to help make types, such as dates, JSON serializable
    # NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0.
    with open('data.json', 'w') as f:
        json.dump(analyze_result_dict, f, cls=AzureJSONEncoder)

    # convert the dictionary back to the original model
    model = AnalyzeResult.from_dict(analyze_result_dict)

    # use the model as normal
    print("----Converted from dictionary AnalyzeResult----")
    print("Model ID: '{}'".format(model.model_id))
    print("Number of pages analyzed {}".format(len(model.pages)))
    print("API version used: {}".format(model.api_version))

    print("----------------------------------------")
Пример #6
0
    def test_receipt_multipage_transform(self, client):

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._deserialize(AnalyzeResultOperation,
                                                 raw_response)
            extracted_receipt = AnalyzeResult._from_generated(
                analyze_result.analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        with open(self.multipage_receipt_pdf, "rb") as fd:
            myfile = fd.read()

        poller = client.begin_analyze_document("prebuilt-receipt",
                                               document=myfile,
                                               cls=callback)

        result = poller.result()
        raw_analyze_result = responses[0].analyze_result
        d = responses[1].to_dict()
        returned_model = AnalyzeResult.from_dict(d)

        # Check AnalyzeResult
        assert returned_model.model_id == raw_analyze_result.model_id
        assert returned_model.api_version == raw_analyze_result.api_version
        assert returned_model.content == raw_analyze_result.content

        self.assertDocumentPagesTransformCorrect(returned_model.pages,
                                                 raw_analyze_result.pages)
        self.assertDocumentTransformCorrect(returned_model.documents,
                                            raw_analyze_result.documents)
        self.assertDocumentTablesTransformCorrect(returned_model.tables,
                                                  raw_analyze_result.tables)
        self.assertDocumentKeyValuePairsTransformCorrect(
            returned_model.key_value_pairs, raw_analyze_result.key_value_pairs)
        self.assertDocumentEntitiesTransformCorrect(
            returned_model.entities, raw_analyze_result.entities)
        self.assertDocumentStylesTransformCorrect(returned_model.styles,
                                                  raw_analyze_result.styles)

        # check page range
        assert len(raw_analyze_result.pages) == len(returned_model.pages)

        return {}
    def test_document_line_get_words_error(self, client, **kwargs):
        with open(self.invoice_pdf, "rb") as fd:
            document = fd.read()

        poller = client.begin_analyze_document("prebuilt-document", document)
        result = poller.result()
        
        # check the error occurs when converting a larger element that encompasses a document line
        d = result.to_dict()
        analyze_result = AnalyzeResult.from_dict(d)

        with pytest.raises(ValueError):
            elements = analyze_result.pages[0].lines[0].get_words()

        # check that the error occurs when directly converting a DocumentLine from a dict
        d = result.pages[0].lines[0].to_dict()
        line = DocumentLine.from_dict(d)
        with pytest.raises(ValueError):
            elements = line.get_words()
Пример #8
0
    def test_invoice_jpg(self, client, **kwargs):
        with open(self.invoice_jpg, "rb") as fd:
            invoice = fd.read()
        poller = client.begin_analyze_document("prebuilt-invoice", invoice)

        result = poller.result()
        d = result.to_dict()
        json.dumps(d, cls=AzureJSONEncoder)
        result = AnalyzeResult.from_dict(d)
        assert len(result.documents) == 1
        invoice = result.documents[0]

        assert result.pages

        # check dict values
        assert invoice.fields.get("AmountDue").value.amount == 610.0
        assert invoice.fields.get("AmountDue").value.symbol == "$"
        assert invoice.fields.get(
            "BillingAddress").value, "123 Bill St, Redmond WA ==  98052"
        assert invoice.fields.get(
            "BillingAddressRecipient").value == "Microsoft Finance"
        assert invoice.fields.get(
            "CustomerAddress").value, "123 Other St, Redmond WA ==  98052"
        assert invoice.fields.get(
            "CustomerAddressRecipient").value == "Microsoft Corp"
        assert invoice.fields.get("CustomerId").value == "CID-12345"
        assert invoice.fields.get(
            "CustomerName").value == "MICROSOFT CORPORATION"
        assert invoice.fields.get("DueDate").value, date(2019, 12 == 15)
        assert invoice.fields.get("InvoiceDate").value, date(2019, 11 == 15)
        assert invoice.fields.get("InvoiceId").value == "INV-100"
        assert invoice.fields.get("InvoiceTotal").value.amount == 110.0
        assert invoice.fields.get(
            "PreviousUnpaidBalance").value.amount == 500.0
        assert invoice.fields.get("PurchaseOrder").value == "PO-3333"
        assert invoice.fields.get(
            "RemittanceAddress").value, "123 Remit St New York, NY ==  10001"
        assert invoice.fields.get(
            "RemittanceAddressRecipient").value == "Contoso Billing"
        assert invoice.fields.get(
            "ServiceAddress").value, "123 Service St, Redmond WA ==  98052"
        assert invoice.fields.get(
            "ServiceAddressRecipient").value == "Microsoft Services"
        assert invoice.fields.get("ServiceEndDate").value, date(2019, 11 == 14)
        assert invoice.fields.get("ServiceStartDate").value, date(
            2019, 10 == 14)
        assert invoice.fields.get(
            "ShippingAddress").value, "123 Ship St, Redmond WA ==  98052"
        assert invoice.fields.get(
            "ShippingAddressRecipient").value == "Microsoft Delivery"
        assert invoice.fields.get("SubTotal").value.amount == 100.0
        assert invoice.fields.get("SubTotal").value.symbol == "$"
        assert invoice.fields.get("TotalTax").value.amount == 10.0
        assert invoice.fields.get("TotalTax").value.symbol == "$"
        assert invoice.fields.get("VendorName").value == "CONTOSO LTD."
        assert invoice.fields.get(
            "VendorAddress").value, "123 456th St New York, NY ==  10001"
        assert invoice.fields.get(
            "VendorAddressRecipient").value == "Contoso Headquarters"
        assert invoice.fields.get(
            "Items").value[0].value["Amount"].value.amount == 100.0
        assert invoice.fields.get(
            "Items").value[0].value["Amount"].value.symbol == "$"
        assert invoice.fields.get("Items").value[0].value[
            "Description"].value == "Consulting service"
        assert invoice.fields.get(
            "Items").value[0].value["Quantity"].value == 1.0
        assert invoice.fields.get(
            "Items").value[0].value["UnitPrice"].value.amount == 1.0
        assert invoice.fields.get(
            "Items").value[0].value["UnitPrice"].value.symbol == None