async def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput( id="1", text="Microsoft was founded by Bill Gates and Paul Allen"), TextDocumentInput( id="2", text="Microsoft fue fundado por Bill Gates y Paul Allen") ] response = await client.recognize_linked_entities(docs) for doc in response: assert len(doc.entities) == 3 for entity in doc.entities: assert entity.name is not None assert entity.language is not None assert entity.data_source_entity_id is not None assert entity.url is not None assert entity.data_source is not None assert entity.matches is not None for match in entity.matches: assert match.offset is not None
async def test_mixing_inputs(self, client): docs = [ { "id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, TextDocumentInput( id="2", text= "I did not like the hotel we stayed at. It was too expensive." ), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = await client.extract_key_phrases(docs)
async def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput(id="1", text="My SSN is 859-98-0987."), TextDocumentInput(id="2", text="Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."), TextDocumentInput(id="3", text="Is 998.214.865-68 your Brazilian CPF number?") ] response = await client.recognize_pii_entities(docs, show_stats=True) self.assertEqual(response[0].entities[0].text, "859-98-0987") self.assertEqual(response[0].entities[0].category, "USSocialSecurityNumber") self.assertEqual(response[1].entities[0].text, "111000025") # self.assertEqual(response[1].entities[0].category, "ABA Routing Number") # Service is currently returning PhoneNumber here # commenting out brazil cpf, currently service is not returning it # self.assertEqual(response[2].entities[0].text, "998.214.865-68") # self.assertEqual(response[2].entities[0].category, "Brazil CPF Number") for doc in response: self.assertIsNotNone(doc.id) self.assertIsNotNone(doc.statistics) for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) self.assertIsNotNone(entity.offset) self.assertIsNotNone(entity.confidence_score)
async def test_mixing_inputs(self, client): docs = [ { "id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, TextDocumentInput( id="2", text= "I did not like the hotel we stayed at. It was too expensive." ), "You cannot mix string input with the above inputs" ] with pytest.raises(TypeError): response = await client.recognize_linked_entities(docs)
async def test_all_successful_passing_text_document_input(self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient(text_analytics_account, TextAnalyticsApiKeyCredential(text_analytics_account_key)) docs = [ TextDocumentInput(id="1", text="I should take my cat to the veterinarian"), TextDocumentInput(id="2", text="Este es un document escrito en Español."), TextDocumentInput(id="3", text="猫は幸せ"), TextDocumentInput(id="4", text="Fahrt nach Stuttgart und dann zum Hotel zu Fu.") ] response = await text_analytics.detect_language(docs) self.assertEqual(response[0].primary_language.name, "English") self.assertEqual(response[1].primary_language.name, "Spanish") self.assertEqual(response[2].primary_language.name, "Japanese") self.assertEqual(response[3].primary_language.name, "German") self.assertEqual(response[0].primary_language.iso6391_name, "en") self.assertEqual(response[1].primary_language.iso6391_name, "es") self.assertEqual(response[2].primary_language.iso6391_name, "ja") self.assertEqual(response[3].primary_language.iso6391_name, "de") for doc in response: self.assertIsNotNone(doc.primary_language.score)
def test_whole_batch_language_hint_and_obj_input( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsApiKeyCredential(text_analytics_account_key)) def callback(resp): language_str = "\"language\": \"de\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 3) docs = [ TextDocumentInput( id="1", text="I should take my cat to the veterinarian."), TextDocumentInput(id="4", text="Este es un document escrito en Español."), TextDocumentInput(id="3", text="猫は幸せ"), ] response = text_analytics.analyze_sentiment(docs, language="de", raw_response_hook=callback)
async def test_whole_batch_language_hint_and_obj_per_item_hints( self, client): def callback(resp): language_str = "\"language\": \"es\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 2) language_str = "\"language\": \"en\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 1) docs = [ TextDocumentInput(id="1", text="I should take my cat to the veterinarian.", language="es"), TextDocumentInput(id="2", text="Este es un document escrito en Español.", language="es"), TextDocumentInput(id="3", text="猫は幸せ"), ] response = await client.extract_key_phrases(docs, language="en", raw_response_hook=callback)
def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput(id="1", text="My SSN is 859-98-0987."), TextDocumentInput(id="2", text="Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."), TextDocumentInput(id="3", text="Is 998.214.865-68 your Brazilian CPF number?") ] response = client.recognize_pii_entities(docs, show_stats=True) assert response[0].entities[0].text == "859-98-0987" assert response[0].entities[0].category == "USSocialSecurityNumber" assert response[1].entities[0].text == "111000025" # assert response[1].entities[0].category == "ABA Routing Number" # Service is currently returning PhoneNumber here # commenting out brazil cpf, currently service is not returning it # assert response[2].entities[0].text == "998.214.865-68" # assert response[2].entities[0].category == "Brazil CPF Number" for doc in response: assert doc.id is not None assert doc.statistics is not None for entity in doc.entities: assert entity.text is not None assert entity.category is not None assert entity.offset is not None assert entity.confidence_score is not None
async def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput( id="1", text="Patient does not suffer from high blood pressure."), TextDocumentInput( id="2", text="Prescribed 100mg ibuprofen, taken twice daily."), ] async with client: result = await (await client.begin_analyze_healthcare_entities( docs, polling_interval=self._interval())).result() self.assertIsNone(result.statistics) # show_stats=False by default response = [] async for r in result: response.append(r) for doc in response: self.assertIsNotNone(doc.id) self.assertIsNone(doc.statistics) self.assertIsNotNone(doc.entities) self.assertEqual(len(response[0].entities), 2) entity1 = list(filter(lambda x: x.text == "high", response[0].entities))[0] entity2 = list( filter(lambda x: x.text == "blood pressure", response[0].entities))[0] self.assertEqual(len(entity1.related_entities), 1) related_entity, relation_type = entity1.related_entities.popitem() self.assertEqual(related_entity, entity2) self.assertEqual(relation_type, "ValueOfExamination") self.assertEqual(len(entity2.related_entities), 0)
def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput( id="1", text="Microsoft was founded by Bill Gates and Paul Allen."), TextDocumentInput( id="2", text= "I did not like the hotel we stayed at. It was too expensive." ), TextDocumentInput( id="3", text= "The restaurant had really good food. I recommend you try it." ), ] response = client.analyze_sentiment(docs) assert response[0].sentiment == "neutral" assert response[1].sentiment == "negative" assert response[2].sentiment == "positive" for doc in response: self.validateConfidenceScores(doc.confidence_scores) assert doc.sentences is not None assert len(response[0].sentences) == 1 assert response[0].sentences[ 0].text == "Microsoft was founded by Bill Gates and Paul Allen." assert len(response[1].sentences) == 2 assert response[1].sentences[ 0].text == "I did not like the hotel we stayed at." assert response[1].sentences[1].text == "It was too expensive." assert len(response[2].sentences) == 2 assert response[2].sentences[ 0].text == "The restaurant had really good food." assert response[2].sentences[1].text == "I recommend you try it."
def test_whole_batch_language_hint_and_obj_input(self, client): def callback(resp): language_str = "\"language\": \"de\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 3) docs = [ TextDocumentInput( id="1", text="I should take my cat to the veterinarian."), TextDocumentInput(id="4", text="Este es un document escrito en Español."), TextDocumentInput(id="3", text="猫は幸せ"), ] response = list( client.begin_analyze( docs, entities_recognition_tasks=[EntitiesRecognitionTask()], key_phrase_extraction_tasks=[KeyPhraseExtractionTask()], pii_entities_recognition_tasks=[PiiEntitiesRecognitionTask()], language="en", polling_interval=self._interval(), ).result()) task_types = [ "entities_recognition_results", "key_phrase_extraction_results", "pii_entities_recognition_results" ] for task_type in task_types: task_results = getattr(response[0], task_type) self.assertEqual(len(task_results), 1) results = task_results[0].results for r in results: self.assertFalse(r.is_error)
def test_mixing_inputs(self, client): docs = [ { "id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, TextDocumentInput( id="2", text= "I did not like the hotel we stayed at. It was too expensive." ), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = client.begin_analyze_healthcare_entities( docs, polling_interval=self._interval())
async def test_all_successful_passing_text_document_input( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsApiKeyCredential(text_analytics_account_key)) docs = [ TextDocumentInput(id="1", text="My SSN is 555-55-5555."), TextDocumentInput( id="2", text= "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check." ), TextDocumentInput( id="3", text="Is 998.214.865-68 your Brazilian CPF number?") ] response = await text_analytics.recognize_pii_entities(docs, show_stats=True) self.assertEqual(response[0].entities[0].text, "555-55-5555") self.assertEqual(response[0].entities[0].category, "U.S. Social Security Number (SSN)") self.assertEqual(response[1].entities[0].text, "111000025") # self.assertEqual(response[1].entities[0].category, "ABA Routing Number") # Service is currently returning PhoneNumber here self.assertEqual(response[2].entities[0].text, "998.214.865-68") self.assertEqual(response[2].entities[0].category, "Brazil CPF Number") for doc in response: self.assertIsNotNone(doc.id) self.assertIsNotNone(doc.statistics) for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) self.assertIsNotNone(entity.grapheme_offset) self.assertIsNotNone(entity.grapheme_length) self.assertIsNotNone(entity.confidence_score)
async def test_output_same_order_as_input_multiple_tasks(self, client): docs = [ TextDocumentInput(id="1", text="one"), TextDocumentInput(id="2", text="two"), TextDocumentInput(id="3", text="three"), TextDocumentInput(id="4", text="four"), TextDocumentInput(id="5", text="five") ] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizePiiEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(model_version="bad"), ], polling_interval=self._interval() )).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 3 action_result = action_results[0] assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].is_error assert all([action_result for action_result in action_results if not action_result.is_error and len(action_result.document_results) == len(docs)]) for action_result in action_results: if not action_result.is_error: for idx, doc in enumerate(action_result.document_results): self.assertEqual(str(idx + 1), doc.id)
def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput(id="1", text="I should take my cat to the veterinarian"), TextDocumentInput(id="2", text="Este es un document escrito en Español."), TextDocumentInput(id="3", text="猫は幸せ"), TextDocumentInput( id="4", text="Fahrt nach Stuttgart und dann zum Hotel zu Fu.") ] response = client.detect_language(docs) self.assertEqual(response[0].primary_language.name, "English") self.assertEqual(response[1].primary_language.name, "Spanish") self.assertEqual(response[2].primary_language.name, "Japanese") self.assertEqual(response[3].primary_language.name, "German") self.assertEqual(response[0].primary_language.iso6391_name, "en") self.assertEqual(response[1].primary_language.iso6391_name, "es") self.assertEqual(response[2].primary_language.iso6391_name, "ja") self.assertEqual(response[3].primary_language.iso6391_name, "de") for doc in response: self.assertIsNotNone(doc.primary_language.score)
async def test_all_successful_passing_text_document_input( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, AzureKeyCredential(text_analytics_account_key)) docs = [ TextDocumentInput( id="1", text="Microsoft was founded by Bill Gates and Paul Allen", language="en"), TextDocumentInput( id="2", text="Microsoft fue fundado por Bill Gates y Paul Allen", language="es") ] response = await text_analytics.extract_key_phrases(docs) for phrases in response: self.assertIn("Paul Allen", phrases.key_phrases) self.assertIn("Bill Gates", phrases.key_phrases) self.assertIn("Microsoft", phrases.key_phrases) self.assertIsNotNone(phrases.id)
def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput(id="1", text="Microsoft was founded by Bill Gates and Paul Allen."), TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."), TextDocumentInput(id="3", text="The restaurant had really good food. I recommend you try it."), ] response = client.analyze_sentiment(docs) self.assertEqual(response[0].sentiment, "neutral") self.assertEqual(response[1].sentiment, "negative") self.assertEqual(response[2].sentiment, "positive") for doc in response: self.assertIsNotNone(doc.confidence_scores) self.assertIsNotNone(doc.sentences) self.assertEqual(len(response[0].sentences), 1) self.assertEqual(response[0].sentences[0].text, "Microsoft was founded by Bill Gates and Paul Allen.") self.assertEqual(len(response[1].sentences), 2) self.assertEqual(response[1].sentences[0].text, "I did not like the hotel we stayed at.") self.assertEqual(response[1].sentences[1].text, "It was too expensive.") self.assertEqual(len(response[2].sentences), 2) self.assertEqual(response[2].sentences[0].text, "The restaurant had really good food.") self.assertEqual(response[2].sentences[1].text, "I recommend you try it.")
def test_all_successful_passing_text_document_input(self, client): docs = [ TextDocumentInput( id="1", text="Microsoft was founded by Bill Gates and Paul Allen"), TextDocumentInput( id="2", text="Microsoft fue fundado por Bill Gates y Paul Allen") ] response = client.recognize_linked_entities(docs) for doc in response: self.assertEqual(len(doc.entities), 3) for entity in doc.entities: self.assertIsNotNone(entity.name) self.assertIsNotNone(entity.language) self.assertIsNotNone(entity.data_source_entity_id) self.assertIsNotNone(entity.url) self.assertIsNotNone(entity.data_source) self.assertIsNotNone(entity.matches) for match in entity.matches: self.assertIsNotNone(match.offset) self.assertIsNotNone(match.length) self.assertNotEqual(match.length, 0)
def test_mixing_inputs(self, client): docs = [ {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen."}, TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result()
def test_mixing_inputs(self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient(text_analytics_account, text_analytics_account_key) docs = [ { "id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, TextDocumentInput( id="2", text= "I did not like the hotel we stayed it. It was too expensive." ), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = text_analytics.analyze_sentiment(docs)
def test_mixing_inputs(self, client): docs = [ { "id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, TextDocumentInput( id="2", text= "I did not like the hotel we stayed at. It was too expensive." ), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = client.begin_analyze( docs, entities_recognition_tasks=[EntitiesRecognitionTask()], key_phrase_extraction_tasks=[KeyPhraseExtractionTask()], pii_entities_recognition_tasks=[PiiEntitiesRecognitionTask()], polling_interval=self._interval(), ).result()