def test_user_agent(self, client): def callback(resp): self.assertIn("azsdk-python-ai-textanalytics/{} Python/{} ({})".format( VERSION, platform.python_version(), platform.platform()), resp.http_request.headers["User-Agent"] ) docs = [{"id": "1", "text": "I will go to the park."}, {"id": "2", "text": "I did not like the hotel we stayed at."}, {"id": "3", "text": "The restaurant had really good food."}] poller = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ], polling_interval=self._interval(), raw_response_hook=callback ) self.assertIn("azsdk-python-ai-textanalytics/{} Python/{} ({})".format( VERSION, platform.python_version(), platform.platform()), poller._polling_method._initial_response.http_request.headers["User-Agent"] ) poller.result() # need to call this before tearDown runs even though we don't need the response for the test.
def test_show_stats_and_model_version_multiple_tasks(self, client): def callback(resp): if resp.raw_response: a = "b" docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] poller = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="latest"), RecognizePiiEntitiesAction(model_version="latest") ], show_stats=True, polling_interval=self._interval(), raw_response_hook=callback, ) response = poller.result() # assert response.statistics action_results = list(response) assert len(action_results) == 3 assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])
async def test_user_agent(self, client): def callback(resp): self.assertIn( "azsdk-python-ai-textanalytics/{} Python/{} ({})".format( VERSION, platform.python_version(), platform.platform()), resp.http_request.headers["User-Agent"]) docs = [{ "id": "1", "text": "I will go to the park." }, { "id": "2", "text": "I did not like the hotel we stayed at." }, { "id": "3", "text": "The restaurant had really good food." }] async with client: poller = await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ], polling_interval=self._interval(), raw_response_hook=callback, ) await poller.wait()
def test_out_of_order_ids_multiple_tasks(self, client): docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] response = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result() action_results = list(response) assert len(action_results) == 3 assert action_results[0].is_error assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES action_results = [r for r in action_results if not r.is_error] assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)]) in_order = ["56", "0", "19", "1"] for action_result in action_results: for idx, resp in enumerate(action_result.document_results): self.assertEqual(resp.id, in_order[idx])
def test_bad_model_version_error_multiple_tasks(self, client): # TODO: verify behavior of service docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}] response = client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad"), RecognizeLinkedEntitiesAction(model_version="bad"), AnalyzeSentimentAction(model_version="bad") ], polling_interval=self._interval(), ).result() action_results = list(response) assert action_results[0].is_error == False assert action_results[0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES assert action_results[1].is_error == True assert action_results[1].error.code == "InvalidRequest" assert action_results[2].is_error == True assert action_results[2].error.code == "InvalidRequest" assert action_results[3].is_error == True assert action_results[3].error.code == "InvalidRequest" assert action_results[4].is_error == True assert action_results[4].error.code == "InvalidRequest"
async def test_bad_model_version_error_multiple_tasks( self, client): # TODO: verify behavior of service docs = [{ "id": "1", "language": "english", "text": "I did not like the hotel we stayed at." }] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad") ], polling_interval=self._interval())).result() action_results = [] async for p in response: action_results.append(p) assert action_results[0].is_error == False assert action_results[ 0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].is_error == True assert action_results[1].error.code == "InvalidRequest" assert action_results[2].is_error == True assert action_results[2].error.code == "InvalidRequest"
def test_disable_service_logs(self, client): actions = [ RecognizeEntitiesAction(disable_service_logs=True), ExtractKeyPhrasesAction(disable_service_logs=True), RecognizePiiEntitiesAction(disable_service_logs=True), RecognizeLinkedEntitiesAction(disable_service_logs=True), AnalyzeSentimentAction(disable_service_logs=True), ExtractSummaryAction(disable_service_logs=True), ] for action in actions: assert action.disable_service_logs def callback(resp): tasks = json.loads(resp.http_request.body)["tasks"] assert len(tasks) == len(actions) for task in tasks.values(): assert task[0]["parameters"]["loggingOptOut"] client.begin_analyze_actions( documents=["Test for logging disable"], actions=actions, polling_interval=self._interval(), raw_response_hook=callback, ).result()
async def test_multiple_pages_of_results_with_errors_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) for idx, action_result in enumerate(pages): if idx % 3 == 0: assert action_result.is_error else: assert all([ doc for doc in action_result.document_results if not doc.is_error ])
def test_all_successful_passing_text_document_input_entities_task(self, client): docs = [ TextDocumentInput(id="1", text="Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975", language="en"), TextDocumentInput(id="2", text="Microsoft fue fundado por Bill Gates y Paul Allen el 4 de abril de 1975.", language="es"), TextDocumentInput(id="3", text="Microsoft wurde am 4. April 1975 von Bill Gates und Paul Allen gegründet.", language="de"), ] response = client.begin_analyze_batch_actions( docs, actions=[RecognizeEntitiesAction()], show_stats=True, polling_interval=self._interval(), ).result() action_results = list(response) assert len(action_results) == 1 action_result = action_results[0] assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert len(action_result.document_results) == len(docs) for doc in action_result.document_results: self.assertEqual(len(doc.entities), 4) self.assertIsNotNone(doc.id) # self.assertIsNotNone(doc.statistics) for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) self.assertIsNotNone(entity.offset) self.assertIsNotNone(entity.confidence_score)
async def test_show_stats_and_model_version_multiple_tasks(self, client): docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="latest"), RecognizePiiEntitiesAction(model_version="latest") ], show_stats=True, polling_interval=self._interval() )).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 3 assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])
async def test_rotate_subscription_key(self, resource_group, location, text_analytics_account, text_analytics_account_key): credential = AzureKeyCredential(text_analytics_account_key) client = TextAnalyticsClient(text_analytics_account, credential) docs = [{ "id": "1", "text": "I will go to the park." }, { "id": "2", "text": "I did not like the hotel we stayed at." }, { "id": "3", "text": "The restaurant had really good food." }] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ], polling_interval=self._interval())).result() self.assertIsNotNone(response) credential.update("xxx") # Make authentication fail with self.assertRaises(ClientAuthenticationError): response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ], polling_interval=self._interval())).result() credential.update( text_analytics_account_key) # Authenticate successfully again response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ], polling_interval=self._interval())).result() self.assertIsNotNone(response)
async def test_multiple_pages_of_results_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], show_stats=True, polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) recognize_entities_results = [] extract_key_phrases_results = [] recognize_pii_entities_results = [] for idx, action_result in enumerate(pages): if idx % 3 == 0: assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES recognize_entities_results.append(action_result) elif idx % 3 == 1: assert action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES extract_key_phrases_results.append(action_result) else: assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES recognize_pii_entities_results.append(action_result) if idx < 3: # first page of task results assert len(action_result.document_results) == 20 else: assert len(action_result.document_results) == 5 assert all([ action_result for action_result in recognize_entities_results if len(action_result.document_results) == len(docs) ]) assert all([ action_result for action_result in extract_key_phrases_results if len(action_result.document_results) == len(docs) ]) assert all([ action_result for action_result in recognize_pii_entities_results if len(action_result.document_results) == len(docs) ])
async def test_out_of_order_ids_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], polling_interval=self._interval())).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 5 assert action_results[ 0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES assert action_results[ 1].action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES assert action_results[ 2].action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES assert action_results[ 3].action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES assert action_results[ 4].action_type == AnalyzeActionsType.ANALYZE_SENTIMENT action_results = [r for r in action_results if not r.is_error] assert all([ action_result for action_result in action_results if len(action_result.document_results) == len(docs) ]) in_order = ["56", "0", "19", "1"] for action_result in action_results: for idx, resp in enumerate(action_result.document_results): self.assertEqual(resp.id, in_order[idx])
async def test_bad_credentials(self, client): with self.assertRaises(ClientAuthenticationError): async with client: response = await (await client.begin_analyze_batch_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result()
def test_empty_credential_class(self, client): with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_batch_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), )
def test_bad_model_version_error_single_task(self, client): # TODO: verify behavior of service docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}] with self.assertRaises(HttpResponseError): result = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ], polling_interval=self._interval(), ).result()
def test_pass_cls(self, client): def callback(pipeline_response, deserialized, _): return "cls result" res = client.begin_analyze_batch_actions( documents=["Test passing cls to endpoint"], actions=[ RecognizeEntitiesAction(), ], cls=callback, polling_interval=self._interval(), ).result() assert res == "cls result"
def test_multiple_pages_of_results_returned_successfully(self, client): single_doc = "hello world" docs = [{"id": str(idx), "text": val} for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25)))] # max number of documents is 25 result = client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], show_stats=True, polling_interval=self._interval(), ).result() recognize_entities_results = [] extract_key_phrases_results = [] recognize_pii_entities_results = [] recognize_linked_entities_results = [] analyze_sentiment_results = [] action_results = list(result) # do 2 pages of 5 task results for idx, action_result in enumerate(action_results): if idx % 5 == 0: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES recognize_entities_results.append(action_result) elif idx % 5 == 1: assert action_result.action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES extract_key_phrases_results.append(action_result) elif idx % 5 == 2: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES recognize_pii_entities_results.append(action_result) elif idx % 5 == 3: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES recognize_linked_entities_results.append(action_result) else: assert action_result.action_type == AnalyzeActionsType.ANALYZE_SENTIMENT analyze_sentiment_results.append(action_result) if idx < 5: # first page of task results assert len(action_result.document_results) == 20 else: assert len(action_result.document_results) == 5 assert all([action_result for action_result in recognize_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in extract_key_phrases_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in recognize_pii_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in recognize_linked_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in analyze_sentiment_results if len(action_result.document_results) == len(docs)])
async def test_missing_input_records_error(self, client): docs = [] with pytest.raises(ValueError) as excinfo: async with client: await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval())).result() assert "Input documents can not be empty or None" in str(excinfo.value)
async def test_empty_credential_class(self, client): with self.assertRaises(ClientAuthenticationError): async with client: await (await client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], polling_interval=self._interval())).result()
async def test_not_passing_list_for_docs(self, client): docs = {"id": "1", "text": "hello world"} with pytest.raises(TypeError) as excinfo: async with client: await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval())).result() assert "Input documents cannot be a dict" in str(excinfo.value)
async def test_bad_document_input(self, client): docs = "This is the wrong type" with self.assertRaises(TypeError): async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result()
async def test_multiple_pages_of_results_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], show_stats=True, polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) assert len(pages) == len(docs) action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] action_type_to_document_results = defaultdict(list) for doc_idx, page in enumerate(pages): for action_idx, document_result in enumerate(page): self.assertEqual(document_result.id, str(doc_idx)) action_type = self.document_result_to_action_type( document_result) self.assertEqual(action_type, action_order[action_idx]) action_type_to_document_results[action_type].append( document_result) assert len(action_type_to_document_results) == len(action_order) for document_results in action_type_to_document_results.values(): assert len(document_results) == len(docs)
async def test_out_of_order_ids_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result() results = [] async for p in response: results.append(p) assert len(results) == len(docs) document_order = ["56", "0", "19", "1"] action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] for doc_idx, document_results in enumerate(results): assert len(document_results) == 6 for action_idx, document_result in enumerate(document_results): self.assertEqual(document_result.id, document_order[doc_idx]) self.assertEqual( self.document_result_to_action_type(document_result), action_order[action_idx])
async def test_bad_model_version_error_all_tasks(self, client): # TODO: verify behavior of service docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}] with self.assertRaises(HttpResponseError): async with client: result = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad") ], polling_interval=self._interval() )).result()
def test_too_many_documents(self, client): docs = list(itertools.repeat("input document", 26)) # Maximum number of documents per request is 25 with pytest.raises(HttpResponseError) as excinfo: client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval(), ) assert excinfo.value.status_code == 400
def test_bad_credentials(self, client): with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval(), )
async def sample_model_version_async(): print("--------------Choosing model_version sample--------------") from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics.aio import TextAnalyticsClient from azure.ai.textanalytics import RecognizeEntitiesAction endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key)) documents = [ "I work for Foo Company, and we hired Contoso for our annual founding ceremony. The food \ was amazing and we all can't say enough good words about the quality and the level of service." ] async with text_analytics_client: print("\nSetting model_version='latest' with recognize_entities") result = await text_analytics_client.recognize_entities( documents, model_version="latest") result = [review for review in result if not review.is_error] print("...Results of Recognize Entities:") for review in result: for entity in review.entities: print( f"......Entity '{entity.text}' has category '{entity.category}'" ) print( "\nSetting model_version='latest' with recognize entities action in begin_analyze_actions" ) poller = await text_analytics_client.begin_analyze_actions( documents, actions=[RecognizeEntitiesAction(model_version="latest")]) print("...Results of Recognize Entities Action:") document_results = await poller.result() async for action_results in document_results: recognize_entities_result = action_results[0] if recognize_entities_result.is_error: print( "......Is an error with code '{}' and message '{}'".format( recognize_entities_result.code, recognize_entities_result.message)) else: for entity in recognize_entities_result.entities: print( f"......Entity '{entity.text}' has category '{entity.category}'" )
def test_duplicate_ids_error(self, client): # TODO: verify behavior of service # Duplicate Ids docs = [{"id": "1", "text": "hello world"}, {"id": "1", "text": "I did not like the hotel we stayed at."}] with self.assertRaises(HttpResponseError): result = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result()
def test_invalid_language_hint_method(self, client): response = list(client.begin_analyze_batch_actions( ["This should fail because we're passing in an invalid language hint"], language="notalanguage", actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval(), ).result()) for action_result in response: for doc in action_result.document_results: assert doc.is_error