Python FormRecognizerClient.begin_recognize_content примеры использования

Язык программирования: Python

Пространство имен/Пакет: azure.ai.formrecognizer

Класс/Тип: FormRecognizerClient

Метод/Функция: begin_recognize_content

Примеров на hotexamples.com: 20

Python FormRecognizerClient.begin_recognize_content - 20 примеров найдено. Это лучшие примеры Python кода для azure.ai.formrecognizer.FormRecognizerClient.begin_recognize_content, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FormRecognizerClient(30)

begin_recognize_receipts(26)

begin_recognize_receipts_from_url(25)

begin_recognize_custom_forms(22)

begin_recognize_content(20)

begin_recognize_content_from_url(16)

begin_recognize_custom_forms_from_url(10)

begin_recognize_invoices_from_url(8)

begin_recognize_invoices(6)

begin_recognize_business_cards(5)

begin_recognize_business_cards_from_url(5)

begin_recognize_identity_documents(5)

begin_recognize_id_documents(4)

begin_recognize_identity_documents_from_url(3)

begin_recognize_id_documents_from_url(2)

begin_extract_receipts_from_url(1)

close(1)

get_form_training_client(1)

Пример #1

Показать файл

 def test_damaged_file_bytes_io_fails_autodetect(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55")  # doesn't match any magic file numbers
     with self.assertRaises(ValueError):
         poller = client.begin_recognize_content(
             damaged_pdf,
         )

Пример #2

Показать файл

    def test_content_multipage_table_span_pdf(self, resource_group, location,
                                              form_recognizer_account,
                                              form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.multipage_table_pdf, "rb") as stream:
            poller = client.begin_recognize_content(stream)

        result = poller.result()
        self.assertEqual(len(result), 2)
        layout = result[0]
        self.assertEqual(layout.page_number, 1)
        self.assertEqual(len(layout.tables), 2)
        self.assertEqual(layout.tables[0].row_count, 30)
        self.assertEqual(layout.tables[0].column_count, 5)
        self.assertEqual(layout.tables[0].page_number, 1)
        self.assertEqual(layout.tables[1].row_count, 6)
        self.assertEqual(layout.tables[1].column_count, 5)
        self.assertEqual(layout.tables[1].page_number, 1)
        layout = result[1]
        self.assertEqual(len(layout.tables), 1)
        self.assertEqual(layout.page_number, 2)
        self.assertEqual(layout.tables[0].row_count, 24)
        self.assertEqual(layout.tables[0].column_count, 5)
        self.assertEqual(layout.tables[0].page_number, 2)
        self.assertFormPagesHasValues(result)

Пример #3

Показать файл

    def test_content_stream_transform_jpg(self, resource_group, location,
                                          form_recognizer_account,
                                          form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.form_jpg, "rb") as fd:
            myform = fd.read()

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(
                AnalyzeOperationResult, raw_response)
            extracted_layout = prepare_content_result(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_layout)

        poller = client.begin_recognize_content(myform, cls=callback)
        result = poller.result()
        raw_response = responses[0]
        layout = responses[1]
        page_results = raw_response.analyze_result.page_results
        read_results = raw_response.analyze_result.read_results

        # Check form pages
        self.assertFormPagesTransformCorrect(layout, read_results,
                                             page_results)

Пример #4

Показать файл

 def test_damaged_file_passed_as_bytes_io(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = BytesIO(b"\x25\x50\x44\x46\x55\x55\x55")  # still has correct bytes to be recognized as PDF
     with self.assertRaises(HttpResponseError):
         poller = client.begin_recognize_content(
             damaged_pdf,
         )

Пример #5

Показать файл

Файл: test_content.py Проект: teachmeabhinav/azure-sdk-for-python

 def test_content_authentication_bad_key(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_content(
             b"xx", content_type="application/pdf")

Пример #6

Показать файл

    def recognize_content(self):
        from azure.ai.formrecognizer import FormWord, FormLine
        # [START recognize_content]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient
        form_recognizer_client = FormRecognizerClient(endpoint=self.endpoint, credential=AzureKeyCredential(self.key))
        with open("sample_forms/forms/Invoice_1.pdf", "rb") as f:
            poller = form_recognizer_client.begin_recognize_content(stream=f.read())
        contents = poller.result()

        for idx, content in enumerate(contents):
            print("----Recognizing content from page #{}----".format(idx))
            print("Has width: {} and height: {}, measured with unit: {}".format(
                content.width,
                content.height,
                content.unit
            ))
            for table_idx, table in enumerate(content.tables):
                print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
                for cell in table.cells:
                    print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
                        cell.row_index,
                        cell.column_index,
                        cell.text,
                        format_bounding_box(cell.bounding_box)
                    ))
                    # [END recognize_content]
            for line_idx, line in enumerate(content.lines):
                print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
                    line_idx,
                    len(line.words),
                    line.text,
                    format_bounding_box(line.bounding_box)
                ))
            print("----------------------------------------")

Пример #7

Показать файл

    def test_content_continuation_token(self, resource_group, location,
                                        form_recognizer_account,
                                        form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.form_jpg, "rb") as fd:
            myfile = fd.read()
        initial_poller = client.begin_recognize_content(myfile)
        cont_token = initial_poller.continuation_token()

        poller = client.begin_recognize_content(myfile,
                                                continuation_token=cont_token)
        result = poller.result()
        self.assertIsNotNone(result)
        initial_poller.wait(
        )  # necessary so azure-devtools doesn't throw assertion error

Пример #8

Показать файл

 def test_content_authentication_bad_key(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_content(
             b"xx", content_type="application/pdf")

Пример #9

Показать файл

    def test_content_multipage(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
        with open(self.multipage_invoice_pdf, "rb") as fd:
            invoice = fd.read()
        poller = client.begin_recognize_content(invoice)
        result = poller.result()

        self.assertEqual(len(result), 3)
        self.assertFormPagesHasValues(result)

Пример #10

Показать файл

Файл: test_content.py Проект: teachmeabhinav/azure-sdk-for-python

 def test_content_bad_endpoint(self, formrecognizer_test_endpoint,
                               formrecognizer_test_api_key):
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_recognize_content(myfile)

Пример #11

Показать файл

 def test_passing_bad_content_type_param_passed(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ValueError):
         poller = client.begin_recognize_content(
             myfile,
             content_type="application/jpeg"
         )

Пример #12

Показать файл

    def test_blank_page(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.blank_pdf, "rb") as stream:
            poller = client.begin_recognize_content(
                stream,
            )
        result = poller.result()
        self.assertIsNotNone(result)

Пример #13

Показать файл

    def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.unsupported_content_py, "rb") as fd:
            myfile = fd.read()

        with self.assertRaises(ValueError):
            poller = client.begin_recognize_content(
                myfile
            )

Пример #14

Показать файл

    def test_content_stream_passing_url(self, resource_group, location,
                                        form_recognizer_account,
                                        form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(TypeError):
            poller = client.begin_recognize_content(
                "https://badurl.jpg", content_type="application/json")

Пример #15

Показать файл

 def test_content_authentication_successful_key(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     poller = client.begin_recognize_content(myfile)
     result = poller.result()

Пример #16

Показать файл

 def test_passing_enum_content_type(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     poller = client.begin_recognize_content(
         myfile,
         content_type=FormContentType.application_pdf
     )
     result = poller.result()
     self.assertIsNotNone(result)

Пример #17

Показать файл

Файл: sample_recognize_content.py Проект: sacheu/azure-sdk-for-python

    def recognize_content(self):
        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./sample_forms/forms/form_selection_mark.png"))
        # [START recognize_content]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_content(form=f)
        form_pages = poller.result()

        for idx, content in enumerate(form_pages):
            print("----Recognizing content from page #{}----".format(idx + 1))
            print("Page has width: {} and height: {}, measured with unit: {}".
                  format(content.width, content.height, content.unit))
            for table_idx, table in enumerate(content.tables):
                print("Table # {} has {} rows and {} columns".format(
                    table_idx, table.row_count, table.column_count))
                print("Table # {} location on page: {}".format(
                    table_idx, format_bounding_box(table.bounding_box)))
                for cell in table.cells:
                    print(
                        "...Cell[{}][{}] has text '{}' within bounding box '{}'"
                        .format(cell.row_index, cell.column_index, cell.text,
                                format_bounding_box(cell.bounding_box)))

            for line_idx, line in enumerate(content.lines):
                print(
                    "Line # {} has word count '{}' and text '{}' within bounding box '{}'"
                    .format(line_idx, len(line.words), line.text,
                            format_bounding_box(line.bounding_box)))
                if line.appearance:
                    if line.appearance.style_name == "handwriting" and line.appearance.style_confidence > 0.8:
                        print(
                            "Text line '{}' is handwritten and might be a signature."
                            .format(line.text))
                for word in line.words:
                    print("...Word '{}' has a confidence of {}".format(
                        word.text, word.confidence))

            for selection_mark in content.selection_marks:
                print(
                    "Selection mark is '{}' within bounding box '{}' and has a confidence of {}"
                    .format(selection_mark.state,
                            format_bounding_box(selection_mark.bounding_box),
                            selection_mark.confidence))
            print("----------------------------------------")

Пример #18

Показать файл

    def test_content_stream_pdf(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account,
                                      AzureKeyCredential(form_recognizer_account_key))
        with open(self.invoice_pdf, "rb") as fd:
            myform = fd.read()

        poller = client.begin_recognize_content(myform)
        result = poller.result()
        self.assertEqual(len(result), 1)
        layout = result[0]
        self.assertEqual(layout.page_number, 1)
        self.assertFormPagesHasValues(result)
        self.assertEqual(layout.tables[0].row_count, 2)
        self.assertEqual(layout.tables[0].column_count, 6)
        self.assertEqual(layout.tables[0].page_number, 1)

Пример #19

Показать файл

Файл: __main__.py Проект: FelixKleineBoesing/OCRDemo

def main():
    load_dotenv("../../.env")
    credential = AzureKeyCredential(os.environ.get("COGNITIVE_SERVICE_KEY"))

    form_recognizer_client = FormRecognizerClient(
        endpoint="https://ocrdemo1.cognitiveservices.azure.com/",
        credential=credential
    )
    with open("../../data/invoice2.png", "rb") as f:
        invoice = f.read()
    poller = form_recognizer_client.begin_recognize_content(invoice)
    page = poller.result()

    img = imread("../../data/invoice2.png")
    img = draw_blocks(img, page)
    cv2.imshow("img", img)
    cv2.waitKey(0)

Пример #20

Показать файл

    def recognize_content(self):
        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
                                                            "..", "./sample_forms/forms/Invoice_1.pdf"))
        # [START recognize_content]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_content(form=f)
        form_pages = poller.result()

        for idx, content in enumerate(form_pages):
            print("----Recognizing content from page #{}----".format(idx+1))
            print("Page has width: {} and height: {}, measured with unit: {}".format(
                content.width,
                content.height,
                content.unit
            ))
            for table_idx, table in enumerate(content.tables):
                print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
                for cell in table.cells:
                    print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
                        cell.row_index,
                        cell.column_index,
                        cell.text,
                        format_bounding_box(cell.bounding_box)
                    ))
                    # [END recognize_content]
            for line_idx, line in enumerate(content.lines):
                print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
                    line_idx,
                    len(line.words),
                    line.text,
                    format_bounding_box(line.bounding_box)
                ))
                for word in line.words:
                    print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))
            print("----------------------------------------")