Exemplo n.º 1
0
 def test_get_service_statistics(self, api_key, endpoint, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_service_statistics()
     assert isinstance(result, dict)
     assert set(result.keys()) == {"counters", "limits"}
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from shared_code import azure_config
import json

environment_vars = azure_config()

# Set Azure Search endpoint and key
endpoint = f'https://{environment_vars["search_service_name"]}.search.windows.net'
key = environment_vars["search_api_key"]

# Your index name
index_name = 'good-books'

# Create Azure SDK client
search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))

def main(req: func.HttpRequest) -> func.HttpResponse:

    # http://localhost:7071/api/Lookup?id=100
    docid = req.params.get('id') 

    if docid:
        logging.info(f"/Lookup id = {docid}")
        returnedDocument = search_client.get_document(key=docid)
        
        full_response = {}
        full_response["document"]=returnedDocument
        
        return func.HttpResponse(body=json.dumps(full_response), mimetype="application/json", status_code=200)
    else:
Exemplo n.º 3
0
    async def test_receipt_multipage_transform(self, resource_group, location,
                                               form_recognizer_account,
                                               form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(
                AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        with open(self.multipage_invoice_pdf, "rb") as fd:
            myfile = fd.read()

        poller = await client.begin_recognize_receipts(
            receipt=myfile, include_text_content=True, cls=callback)
        result = await poller.result()

        raw_response = responses[0]
        returned_model = responses[1]
        actual = raw_response.analyze_result.document_results
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results
        page_results = raw_response.analyze_result.page_results

        # check hardcoded values
        for receipt, actual in zip(returned_model, actual):
            if not actual.fields:  # second page is blank
                continue

            # check dict values
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("MerchantAddress"),
                actual.fields.get("MerchantAddress"), read_results)
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("MerchantName"),
                actual.fields.get("MerchantName"), read_results)
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("MerchantPhoneNumber"),
                actual.fields.get("MerchantPhoneNumber"), read_results)
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("Subtotal"), actual.fields.get("Subtotal"),
                read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"),
                                                 actual.fields.get("Tax"),
                                                 read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"),
                                                 actual.fields.get("Tip"),
                                                 read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Total"),
                                                 actual.fields.get("Total"),
                                                 read_results)
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("TransactionDate"),
                actual.fields.get("TransactionDate"), read_results)
            self.assertFormFieldTransformCorrect(
                receipt.fields.get("TransactionTime"),
                actual.fields.get("TransactionTime"), read_results)

            # check page range
            self.assertEqual(receipt.page_range.first_page_number,
                             actual.page_range[0])
            self.assertEqual(receipt.page_range.last_page_number,
                             actual.page_range[1])

            # check receipt type
            receipt_type = receipt.fields.get("ReceiptType")
            self.assertEqual(receipt_type.confidence,
                             actual.fields["ReceiptType"].confidence)
            self.assertEqual(receipt_type.value,
                             actual.fields["ReceiptType"].value_string)

            # check receipt items
            self.assertReceiptItemsTransformCorrect(
                receipt.fields["Items"].value, actual.fields["Items"],
                read_results)

        # Check form pages
        self.assertFormPagesTransformCorrect(returned_model, read_results)
    async def analyze_async(self):
        # [START analyze_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics.aio import TextAnalyticsClient
        from azure.ai.textanalytics import (RecognizeEntitiesAction,
                                            RecognizeLinkedEntitiesAction,
                                            RecognizePiiEntitiesAction,
                                            ExtractKeyPhrasesAction,
                                            AnalyzeBatchActionsType)

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
        )

        documents = [
            "We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot! \
            They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe) \
            and he is super nice, coming out of the kitchen and greeted us all. We enjoyed very much dining in the place! \
            The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their \
            online menu at www.contososteakhouse.com, call 312-555-0176 or send email to [email protected]! \
            The only complaint I have is the food didn't come fast enough. Overall I highly recommend it!"
        ]

        async with text_analytics_client:
            poller = await text_analytics_client.begin_analyze_batch_actions(
                documents,
                display_name="Sample Text Analysis",
                actions=[
                    RecognizeEntitiesAction(),
                    RecognizePiiEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizeLinkedEntitiesAction()
                ])

            result = await poller.result()

            async for action_result in result:
                if action_result.is_error:
                    raise ValueError(
                        "Action has failed with message: {}".format(
                            action_result.error.message))
                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
                    print("Results of Entities Recognition action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("\nDocument text: {}".format(documents[idx]))
                        for entity in doc.entities:
                            print("Entity: {}".format(entity.text))
                            print("...Category: {}".format(entity.category))
                            print("...Confidence Score: {}".format(
                                entity.confidence_score))
                            print("...Offset: {}".format(entity.offset))
                        print("------------------------------------------")

                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
                    print("Results of PII Entities Recognition action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("Document text: {}".format(documents[idx]))
                        for entity in doc.entities:
                            print("Entity: {}".format(entity.text))
                            print("Category: {}".format(entity.category))
                            print("Confidence Score: {}\n".format(
                                entity.confidence_score))
                        print("------------------------------------------")

                if action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES:
                    print("Results of Key Phrase Extraction action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("Document text: {}\n".format(documents[idx]))
                        print("Key Phrases: {}\n".format(doc.key_phrases))
                        print("------------------------------------------")

                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
                    print("Results of Linked Entities Recognition action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("Document text: {}\n".format(documents[idx]))
                        for linked_entity in doc.entities:
                            print("Entity name: {}".format(linked_entity.name))
                            print("...Data source: {}".format(
                                linked_entity.data_source))
                            print("...Data source language: {}".format(
                                linked_entity.language))
                            print("...Data source entity ID: {}".format(
                                linked_entity.data_source_entity_id))
                            print("...Data source URL: {}".format(
                                linked_entity.url))
                            print("...Document matches:")
                            for match in linked_entity.matches:
                                print("......Match text: {}".format(
                                    match.text))
                                print(".........Confidence Score: {}".format(
                                    match.confidence_score))
                                print(".........Offset: {}".format(
                                    match.offset))
                                print(".........Length: {}".format(
                                    match.length))
                        print("------------------------------------------")
 def test_receipt_url_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_receipts_from_url(self.receipt_url_jpg)
    def test_passing_bad_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(HttpResponseError):
            poller = client.begin_recognize_custom_forms_from_url(model_id="xx", form_url="https://badurl.jpg")
Exemplo n.º 7
0
def authenticate_client():
    ta_credential = AzureKeyCredential(key1)
    text_analytics_client = TextAnalyticsClient(
        endpoint=ep,
        credential=ta_credential)
    return text_analytics_client
def sample_analyze_sentiment_with_opinion_mining():
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.textanalytics import TextAnalyticsClient

    endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
    key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))

    print(
        "In this sample we will be a hotel owner going through reviews of their hotel to find complaints."
    )

    print(
        "I first found a handful of reviews for my hotel. Let's see what we have to improve."
    )

    documents = [
        """
        The food and service were unacceptable, but the concierge were nice.
        After talking to them about the quality of the food and the process to get room service they refunded
        the money we spent at the restaurant and gave us a voucher for near by restaurants.
        """, """
        The rooms were beautiful. The AC was good and quiet, which was key for us as outside it was 100F and our baby
        was getting uncomfortable because of the heat. The breakfast was good too with good options and good servicing times.
        The thing we didn't like was that the toilet in our bathroom was smelly. It could have been that the toilet was broken before we arrived.
        Either way it was very uncomfortable. Once we notified the staff, they came and cleaned it and left candles.
        """, """
        Nice rooms! I had a great unobstructed view of the Microsoft campus but bathrooms were old and the toilet was dirty when we arrived.
        It was close to bus stops and groceries stores. If you want to be close to campus I will recommend it, otherwise, might be better to stay in a cleaner one
        """
    ]

    result = text_analytics_client.analyze_sentiment(documents,
                                                     show_opinion_mining=True)
    doc_result = [doc for doc in result if not doc.is_error]

    print("\nLet's first see the general sentiment of each of these reviews")
    positive_reviews = [
        doc for doc in doc_result if doc.sentiment == "positive"
    ]
    mixed_reviews = [doc for doc in doc_result if doc.sentiment == "mixed"]
    negative_reviews = [
        doc for doc in doc_result if doc.sentiment == "negative"
    ]
    print(
        "...We have {} positive reviews, {} mixed reviews, and {} negative reviews. "
        .format(len(positive_reviews), len(mixed_reviews),
                len(negative_reviews)))
    print(
        "\nSince these reviews seem so mixed, and since I'm interested in finding exactly what it is about my hotel that should be improved, "
        "let's find the complaints users have about individual aspects of this hotel"
    )

    print(
        "\nIn order to do that, I'm going to extract targets of a negative sentiment. "
        "I'm going to map each of these targets to the mined opinion object we get back to aggregate the reviews by target. "
    )
    target_to_complaints = {}

    for document in doc_result:
        for sentence in document.sentences:
            for mined_opinion in sentence.mined_opinions:
                target = mined_opinion.target
                if target.sentiment == 'negative':
                    target_to_complaints.setdefault(target.text, [])
                    target_to_complaints[target.text].append(mined_opinion)

    print(
        "\nLet's now go through the aspects of our hotel people have complained about and see what users have specifically said"
    )

    for target, complaints in target_to_complaints.items():
        print(
            "Users have made {} complaint(s) about '{}', specifically saying that it's '{}'"
            .format(
                len(complaints), target, "', '".join([
                    assessment.text for complaint in complaints
                    for assessment in complaint.assessments
                ])))

    print(
        "\n\nLooking at the breakdown, I can see what aspects of my hotel need improvement, and based off of both the number and "
        "content of the complaints users have made about my toilets, I need to get that fixed ASAP."
    )
Exemplo n.º 9
0
 async def test_authentication_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
     client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_analyze_document("prebuilt-receipt", b"xx")
             result = await poller.result()
Exemplo n.º 10
0
    async def recognize_business_card_async(self):
        path_to_sample_forms = os.path.abspath(
            os.path.join(
                os.path.abspath(__file__), "..", "..", "..",
                "./sample_forms/business_cards/business-card-english.jpg"))
        # [START recognize_business_cards_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))
        async with form_recognizer_client:
            with open(path_to_sample_forms, "rb") as f:
                poller = await form_recognizer_client.begin_recognize_business_cards(
                    business_card=f, locale="en-US")
            business_cards = await poller.result()

        for idx, business_card in enumerate(business_cards):
            print("--------Recognizing business card #{}--------".format(idx +
                                                                         1))
            contact_names = business_card.fields.get("ContactNames")
            if contact_names:
                for contact_name in contact_names.value:
                    print("Contact First Name: {} has confidence: {}".format(
                        contact_name.value["FirstName"].value,
                        contact_name.value["FirstName"].confidence))
                    print("Contact Last Name: {} has confidence: {}".format(
                        contact_name.value["LastName"].value,
                        contact_name.value["LastName"].confidence))
            company_names = business_card.fields.get("CompanyNames")
            if company_names:
                for company_name in company_names.value:
                    print("Company Name: {} has confidence: {}".format(
                        company_name.value, company_name.confidence))
            departments = business_card.fields.get("Departments")
            if departments:
                for department in departments.value:
                    print("Department: {} has confidence: {}".format(
                        department.value, department.confidence))
            job_titles = business_card.fields.get("JobTitles")
            if job_titles:
                for job_title in job_titles.value:
                    print("Job Title: {} has confidence: {}".format(
                        job_title.value, job_title.confidence))
            emails = business_card.fields.get("Emails")
            if emails:
                for email in emails.value:
                    print("Email: {} has confidence: {}".format(
                        email.value, email.confidence))
            websites = business_card.fields.get("Websites")
            if websites:
                for website in websites.value:
                    print("Website: {} has confidence: {}".format(
                        website.value, website.confidence))
            addresses = business_card.fields.get("Addresses")
            if addresses:
                for address in addresses.value:
                    print("Address: {} has confidence: {}".format(
                        address.value, address.confidence))
            mobile_phones = business_card.fields.get("MobilePhones")
            if mobile_phones:
                for phone in mobile_phones.value:
                    print("Mobile phone number: {} has confidence: {}".format(
                        phone.value, phone.confidence))
            faxes = business_card.fields.get("Faxes")
            if faxes:
                for fax in faxes.value:
                    print("Fax number: {} has confidence: {}".format(
                        fax.value, fax.confidence))
            work_phones = business_card.fields.get("WorkPhones")
            if work_phones:
                for work_phone in work_phones.value:
                    print("Work phone number: {} has confidence: {}".format(
                        work_phone.value, work_phone.confidence))
            other_phones = business_card.fields.get("OtherPhones")
            if other_phones:
                for other_phone in other_phones.value:
                    print("Other phone number: {} has confidence: {}".format(
                        other_phone.value, other_phone.confidence))
    def get_bounding_boxes(self):
        from azure.ai.formrecognizer import FormWord, FormLine
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./sample_forms/forms/Form_1.jpg"))
        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f, include_text_content=True)
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------RECOGNIZING FORM #{}--------".format(idx))
            print("Form has type {}".format(form.form_type))
            for name, field in form.fields.items():
                # each field is of type FormField
                # The value of the field can also be a FormField, or a list of FormFields
                # In our sample, it is not.
                print(
                    "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))
            for page in form.pages:
                print("-------Recognizing Page #{} of Form #{}-------".format(
                    page.page_number, idx))
                print(
                    "Has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'"
                    .format(page.width, page.height, page.unit,
                            page.text_angle))
                for table in page.tables:
                    for cell in table.cells:
                        print(
                            "...Cell[{}][{}] has text '{}' with confidence {} based on the following words: "
                            .format(cell.row_index, cell.column_index,
                                    cell.text, cell.confidence))
                        # text_content is only populated if you set include_text_content to True in your function call to recognize_custom_forms
                        # It is a heterogeneous list of FormWord and FormLine.
                        for content in cell.text_content:
                            if isinstance(content, FormWord):
                                print(
                                    "......Word '{}' within bounding box '{}' has a confidence of {}"
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box),
                                        content.confidence))
                            elif isinstance(content, FormLine):
                                print(
                                    "......Line '{}' within bounding box '{}' has the following words: "
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box)))
                                for word in content.words:
                                    print(
                                        ".........Word '{}' within bounding box '{}' has a confidence of {}"
                                        .format(
                                            word.text,
                                            format_bounding_box(
                                                word.bounding_box),
                                            word.confidence))

                print("---------------------------------------------------")
            print("-----------------------------------")
Exemplo n.º 12
0
 def test_get_index_statistics(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_index_statistics(index_name)
     assert set(result.keys()) == {'document_count', 'storage_size'}
Exemplo n.º 13
0
 def test_get_index(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.get_index(index_name)
     assert result.name == index_name
Exemplo n.º 14
0
 def test_list_indexes_empty(self, api_key, endpoint, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     result = client.list_indexes()
     with pytest.raises(StopIteration):
         next(result)
 def test_custom_form_url_bad_endpoint(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient("http://notreal.azure.com", AzureKeyCredential(form_recognizer_account_key))
         result = client.begin_recognize_custom_forms_from_url(model_id="xx", form_url=self.form_url_jpg)
    def test_receipt_url_transform_jpg(self, resource_group, location,
                                       form_recognizer_account,
                                       form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(
                AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_us_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        poller = client.begin_recognize_receipts_from_url(
            receipt_url=self.receipt_url_jpg,
            include_text_content=True,
            cls=callback)

        result = poller.result()
        raw_response = responses[0]
        returned_model = responses[1]
        receipt = returned_model[0]
        actual = raw_response.analyze_result.document_results[0].fields
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results

        # check hardcoded values
        self.assertFormFieldTransformCorrect(receipt.merchant_address,
                                             actual.get("MerchantAddress"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.merchant_name,
                                             actual.get("MerchantName"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.merchant_phone_number,
                                             actual.get("MerchantPhoneNumber"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.subtotal,
                                             actual.get("Subtotal"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.tax, actual.get("Tax"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.tip, actual.get("Tip"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.total,
                                             actual.get("Total"), read_results)
        self.assertFormFieldTransformCorrect(receipt.transaction_date,
                                             actual.get("TransactionDate"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.transaction_time,
                                             actual.get("TransactionTime"),
                                             read_results)

        # check dict values
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantAddress"),
            actual.get("MerchantAddress"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantName"), actual.get("MerchantName"),
            read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("MerchantPhoneNumber"),
            actual.get("MerchantPhoneNumber"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"),
                                             actual.get("Subtotal"),
                                             read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"),
                                             actual.get("Tax"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"),
                                             actual.get("Tip"), read_results)
        self.assertFormFieldTransformCorrect(receipt.fields.get("Total"),
                                             actual.get("Total"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("TransactionDate"),
            actual.get("TransactionDate"), read_results)
        self.assertFormFieldTransformCorrect(
            receipt.fields.get("TransactionTime"),
            actual.get("TransactionTime"), read_results)

        # check page range
        self.assertEqual(receipt.page_range.first_page_number,
                         document_results[0].page_range[0])
        self.assertEqual(receipt.page_range.last_page_number,
                         document_results[0].page_range[1])

        # check receipt type
        self.assertEqual(receipt.receipt_type.confidence,
                         actual["ReceiptType"].confidence)
        self.assertEqual(receipt.receipt_type.type,
                         actual["ReceiptType"].value_string)

        # check receipt items
        self.assertReceiptItemsTransformCorrect(receipt.receipt_items,
                                                actual["Items"], read_results)

        # Check page metadata
        self.assertFormPagesTransformCorrect(receipt.pages, read_results)
 def test_url_authentication_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         result = client.begin_recognize_custom_forms_from_url(model_id="xx", form_url=self.form_url_jpg)
async def analyze_custom_documents_async(custom_model_id):
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "..",
            "./sample_forms/forms/Form_1.jpg",
        ))
    # [START analyze_custom_documents_async]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer.aio import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
    model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id)

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))

    async with document_analysis_client:
        # Make sure your document's type is included in the list of document types the custom model can analyze
        with open(path_to_sample_documents, "rb") as f:
            poller = await document_analysis_client.begin_analyze_document(
                model=model_id, document=f)
        result = await poller.result()

    for idx, document in enumerate(result.documents):
        print("--------Analyzing document #{}--------".format(idx + 1))
        print("Document has type {}".format(document.doc_type))
        print("Document has document type confidence {}".format(
            document.confidence))
        print("Document was analyzed with model with ID {}".format(
            result.model_id))
        for name, field in document.fields.items():
            field_value = field.value if field.value else field.content
            print(
                "......found field of type '{}' with value '{}' and with confidence {}"
                .format(field.value_type, field_value, field.confidence))

    # iterate over tables, lines, and selection marks on each page
    for page in result.pages:
        print("\nLines found on page {}".format(page.page_number))
        for line in page.lines:
            print("...Line '{}'".format(line.content))
        for word in page.words:
            print("...Word '{}' has a confidence of {}".format(
                word.content, word.confidence))
        if page.selection_marks:
            print("\nSelection marks found on page {}".format(
                page.page_number))
            for selection_mark in page.selection_marks:
                print("...Selection mark is '{}' and has a confidence of {}".
                      format(selection_mark.state, selection_mark.confidence))

    for i, table in enumerate(result.tables):
        print("\nTable {} can be found on page:".format(i + 1))
        for region in table.bounding_regions:
            print("...{}".format(i + 1, region.page_number))
        for cell in table.cells:
            print("...Cell[{}][{}] has text '{}'".format(
                cell.row_index, cell.column_index, cell.content))
    print("-----------------------------------")
Exemplo n.º 19
0
def authenticate_client():
    ta_credential = AzureKeyCredential(os.environ.get('AZURE_KEY'))
    text_analytics_client = TextAnalyticsClient(
        endpoint=os.environ.get('AZURE_ENDPOINT'), credential=ta_credential)
    return text_analytics_client
Exemplo n.º 20
0
def analyze_receipts():
    path_to_sample_documents = os.path.abspath(
        os.path.join(
            os.path.abspath(__file__),
            "..",
            "..",
            "./sample_forms/receipt/contoso-allinone.jpg",
        )
    )

    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    with open(path_to_sample_documents, "rb") as f:
        poller = document_analysis_client.begin_analyze_document(
            "prebuilt-receipt", document=f, locale="en-US"
        )
    receipts = poller.result()

    for idx, receipt in enumerate(receipts.documents):
        print("--------Recognizing receipt #{}--------".format(idx + 1))
        receipt_type = receipt.fields.get("ReceiptType")
        if receipt_type:
            print(
                "Receipt Type: {} has confidence: {}".format(
                    receipt_type.value, receipt_type.confidence
                )
            )
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print(
                "Merchant Name: {} has confidence: {}".format(
                    merchant_name.value, merchant_name.confidence
                )
            )
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print(
                "Transaction Date: {} has confidence: {}".format(
                    transaction_date.value, transaction_date.confidence
                )
            )
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx + 1))
                item_name = item.value.get("Name")
                if item_name:
                    print(
                        "......Item Name: {} has confidence: {}".format(
                            item_name.value, item_name.confidence
                        )
                    )
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print(
                        "......Item Quantity: {} has confidence: {}".format(
                            item_quantity.value, item_quantity.confidence
                        )
                    )
                item_price = item.value.get("Price")
                if item_price:
                    print(
                        "......Individual Item Price: {} has confidence: {}".format(
                            item_price.value, item_price.confidence
                        )
                    )
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print(
                        "......Total Item Price: {} has confidence: {}".format(
                            item_total_price.value, item_total_price.confidence
                        )
                    )
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print(
                "Subtotal: {} has confidence: {}".format(
                    subtotal.value, subtotal.confidence
                )
            )
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(total.value, total.confidence))
        print("--------------------------------------")
Exemplo n.º 21
0
    async def create_composed_model_async(self):
        # [START begin_create_composed_model_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormTrainingClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        po_supplies = os.environ['PURCHASE_ORDER_OFFICE_SUPPLIES_SAS_URL']
        po_equipment = os.environ['PURCHASE_ORDER_OFFICE_EQUIPMENT_SAS_URL']
        po_furniture = os.environ['PURCHASE_ORDER_OFFICE_FURNITURE_SAS_URL']
        po_cleaning_supplies = os.environ[
            'PURCHASE_ORDER_OFFICE_CLEANING_SUPPLIES_SAS_URL']

        form_training_client = FormTrainingClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))
        async with form_training_client:
            supplies_poller = await form_training_client.begin_training(
                po_supplies,
                use_training_labels=True,
                model_name="Purchase order - Office supplies")
            equipment_poller = await form_training_client.begin_training(
                po_equipment,
                use_training_labels=True,
                model_name="Purchase order - Office Equipment")
            furniture_poller = await form_training_client.begin_training(
                po_furniture,
                use_training_labels=True,
                model_name="Purchase order - Furniture")
            cleaning_supplies_poller = await form_training_client.begin_training(
                po_cleaning_supplies,
                use_training_labels=True,
                model_name="Purchase order - Cleaning Supplies")
            supplies_model = await supplies_poller.result()
            equipment_model = await equipment_poller.result()
            furniture_model = await furniture_poller.result()
            cleaning_supplies_model = await cleaning_supplies_poller.result()

            models_trained_with_labels = [
                supplies_model.model_id, equipment_model.model_id,
                furniture_model.model_id, cleaning_supplies_model.model_id
            ]

            poller = await form_training_client.begin_create_composed_model(
                models_trained_with_labels,
                model_name="Office Supplies Composed Model")
            model = await poller.result()

        print("Office Supplies Composed Model Info:")
        print("Model ID: {}".format(model.model_id))
        print("Model name: {}".format(model.model_name))
        print("Is this a composed model?: {}".format(
            model.properties.is_composed_model))
        print("Status: {}".format(model.status))
        print("Composed model creation started on: {}".format(
            model.training_started_on))
        print("Creation completed on: {}".format(model.training_completed_on))

        # [END begin_create_composed_model_async]

        print("Recognized fields:")
        for submodel in model.submodels:
            print("The submodel has model ID: {}".format(submodel.model_id))
            print(
                "...The submodel with form type {} has an average accuracy '{}'"
                .format(submodel.form_type, submodel.accuracy))
            for name, field in submodel.fields.items():
                print(
                    "...The model found the field '{}' with an accuracy of {}".
                    format(name, field.accuracy))

        # Training result information
        for doc in model.training_documents:
            print("Document was used to train model with ID: {}".format(
                doc.model_id))
            print("Document name: {}".format(doc.name))
            print("Document status: {}".format(doc.status))
            print("Document page count: {}".format(doc.page_count))
            print("Document errors: {}".format(doc.errors))
Exemplo n.º 22
0
def result_types_lookup(subscription_key):
    """WebSearchResultTypesLookup.

    This will look up a single query (Xbox) and print out name and url for first web, image, news and videos results.
    """
    client = WebSearchClient(AzureKeyCredential(SUBSCRIPTION_KEY))

    try:

        web_data = client.web.search(query="xbox")
        print("Searched for Query# \" Xbox \"")

        # WebPages
        if web_data.web_pages.value:

            print("Webpage Results#{}".format(len(web_data.web_pages.value)))

            first_web_page = web_data.web_pages.value[0]
            print("First web page name: {} ".format(first_web_page.name))
            print("First web page URL: {} ".format(first_web_page.url))

        else:
            print("Didn't see any Web data..")

        # Images
        if web_data.images.value:

            print("Image Results#{}".format(len(web_data.images.value)))

            first_image = web_data.images.value[0]
            print("First Image name: {} ".format(first_image.name))
            print("First Image URL: {} ".format(first_image.url))

        else:
            print("Didn't see any Image..")

        # News
        if web_data.news.value:

            print("News Results#{}".format(len(web_data.news.value)))

            first_news = web_data.news.value[0]
            print("First News name: {} ".format(first_news.name))
            print("First News URL: {} ".format(first_news.url))

        else:
            print("Didn't see any News..")

        # Videos
        if web_data.videos.value:

            print("Videos Results#{}".format(len(web_data.videos.value)))

            first_video = web_data.videos.value[0]
            print("First Videos name: {} ".format(first_video.name))
            print("First Videos URL: {} ".format(first_video.url))

        else:
            print("Didn't see any Videos..")

    except Exception as err:
        print("Encountered exception. {}".format(err))
 def test_receipt_url_bad_endpoint(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient("http://notreal.azure.com", AzureKeyCredential(form_recognizer_account_key))
         poller = client.begin_recognize_receipts_from_url(self.receipt_url_jpg)
Exemplo n.º 24
0
    async def recognize_custom_forms(self, labeled_model_id,
                                     unlabeled_model_id):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_trained_with_labels_id = os.getenv(
            "ID_OF_MODEL_TRAINED_WITH_LABELS", labeled_model_id)
        model_trained_without_labels_id = os.getenv(
            "ID_OF_MODEL_TRAINED_WITHOUT_LABELS", unlabeled_model_id)

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..", "..", "..",
                         "./sample_forms/forms/Form_1.jpg"))
        async with FormRecognizerClient(
                endpoint=endpoint,
                credential=AzureKeyCredential(key)) as form_recognizer_client:

            with open(path_to_sample_forms, "rb") as f:
                form = f.read()
            with_labels_poller = await form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_trained_with_labels_id, form=form)
            forms_with_labeled_model = await with_labels_poller.result()

            without_labels_poller = await form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_trained_without_labels_id, form=form)
            forms_with_unlabeled_model = await without_labels_poller.result()

            # With a form recognized by a model trained with labels, the `name` key will be its label given during training.
            # `value` will contain the typed field value and `value_data` will contain information about the field value
            # `label_data` is not populated for a model trained with labels as this was the given label used to extract the key
            print(
                "---------Recognizing forms using models trained with labeled data---------"
            )
            for labeled_form in forms_with_labeled_model:
                for name, field in labeled_form.fields.items():
                    print(
                        "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

            # Find a specific labeled field. Substitute "Merchant" with your specific training-time label
            try:
                print(
                    "\nValue for a specific labeled field using the training-time label:"
                )
                training_time_label = "Merchant"
                for labeled_form in forms_with_labeled_model:
                    print("The Merchant is {}\n".format(
                        labeled_form.fields[training_time_label].value))
            except KeyError:
                print(
                    "'Merchant' training-time label does not exist. Substitute with your own training-time label.\n"
                )

            # With a form recognized by a model trained without labels, the `name` key will be denoted by numeric indices.
            # Non-unique form field label names will be found in the `label_data.text`
            # Information about the form field label and the field value are found in `label_data` and `value_data`
            print(
                "-----------------------------------------------------------------------"
            )
            print(
                "-------Recognizing forms using models trained with unlabeled data-------"
            )
            for unlabeled_form in forms_with_unlabeled_model:
                for name, field in unlabeled_form.fields.items():
                    print(
                        "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.label_data.text,
                            format_bounding_box(field.label_data.bounding_box),
                            field.confidence))
                    print(
                        "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

            # Find the value of a specific unlabeled field. Will only be found if sample training forms used
            print("\nValue for a specific unlabeled field:")
            field_label = "Vendor Name:"
            for unlabeled_form in forms_with_unlabeled_model:
                for name, field in unlabeled_form.fields.items():
                    if field.label_data.text == field_label:
                        print("The Vendor Name is {}\n".format(field.value))
Exemplo n.º 25
0
 def authenticate_client(self):
     ta_credential = AzureKeyCredential(self.__key)
     text_analytics_client = TextAnalyticsClient(endpoint=self.__endpoint,
                                                 credential=ta_credential)
     return text_analytics_client
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------
try:
    from unittest import mock
except ImportError:
    import mock

from azure.search.documents import (
    SearchIndexingBufferedSender,
)
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError
from azure.search.documents.models import IndexingResult

CREDENTIAL = AzureKeyCredential(key="test_api_key")

class TestSearchBatchingClient(object):
    def test_search_indexing_buffered_sender_kwargs(self):
        with SearchIndexingBufferedSender("endpoint", "index name", CREDENTIAL, window=100) as client:
            assert client._batch_action_count == 512
            assert client._max_retries == 3
            assert client._auto_flush_interval == 60
            assert client._auto_flush

    def test_batch_queue(self):
        with SearchIndexingBufferedSender("endpoint", "index name", CREDENTIAL, auto_flush=False) as client:
            assert client._index_documents_batch
            client.upload_documents(["upload1"])
            client.delete_documents(["delete1", "delete2"])
            client.merge_documents(["merge1", "merge2", "merge3"])
Exemplo n.º 27
0
    python sample_authentication.py
    Set the environment variables with your own values before running the sample:
    1) EG_ACCESS_KEY - The access key of your eventgrid account.
    2) EG_TOPIC_HOSTNAME - The topic hostname. Typically it exists in the format
    "<YOUR-TOPIC-NAME>.<REGION-NAME>.eventgrid.azure.net".
    3) EVENTGRID_SAS - The shared access signature that is to be used to authenticate the client.
"""
# [START client_auth_with_key_cred]
import os
from azure.eventgrid import EventGridPublisherClient
from azure.core.credentials import AzureKeyCredential

topic_key = os.environ["EG_ACCESS_KEY"]
endpoint = os.environ["EG_TOPIC_HOSTNAME"]

credential = AzureKeyCredential(topic_key)
client = EventGridPublisherClient(endpoint, credential)
# [END client_auth_with_key_cred]

# [START client_auth_with_sas_cred]
import os
from azure.eventgrid import EventGridPublisherClient
from azure.core.credentials import AzureSasCredential

signature = os.environ["EVENTGRID_SAS"]
endpoint = os.environ["EG_TOPIC_HOSTNAME"]

credential = AzureSasCredential(signature)
client = EventGridPublisherClient(endpoint, credential)
# [END client_auth_with_sas_cred]
 def test_custom_form_empty_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     with self.assertRaises(ValueError):
         client.begin_recognize_custom_forms_from_url(model_id="", form_url="https://badurl.jpg")
Exemplo n.º 29
0
                                      "and has a confidence of {}".format(
                                        element.state,
                                        format_bounding_box(element.bounding_box),
                                        element.confidence
                                        ))
                print("---------------------------------------------------")
            print("-----------------------------------")


if __name__ == '__main__':
    sample = GetBoundingBoxesSample()
    model_id = None
    if os.getenv("CONTAINER_SAS_URL_V2"):

        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormTrainingClient

        endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
        key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")

        if not endpoint or not key:
            raise ValueError("Please provide endpoint and API key to run the samples.")

        form_training_client = FormTrainingClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )
        model = form_training_client.begin_training(os.getenv("CONTAINER_SAS_URL_V2"), use_training_labels=False).result()
        model_id = model.model_id

    sample.get_bounding_boxes(model_id)
Exemplo n.º 30
0
 def test_analyze_text(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
     analyze_request = AnalyzeTextOptions(text="One's <two/>", analyzer_name="standard.lucene")
     result = client.analyze_text(index_name, analyze_request)
     assert len(result.tokens) == 2