def test_single_recognize_linked_entities_empty_text_input(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(HttpResponseError): response = single_recognize_linked_entities( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text="", )
def test_single_recognize_linked_entities_empty_credential_class(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(ClientAuthenticationError): response = single_recognize_linked_entities( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(""), input_text="Microsoft was founded by Bill Gates.", )
def test_single_extract_key_phrases_bad_credentials(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(ClientAuthenticationError): response = single_extract_key_phrases( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential("xxxxxxxxxxxx"), input_text="Microsoft was founded by Bill Gates.", )
def test_single_language_detection_bad_credentials(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(ClientAuthenticationError): response = single_detect_language( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential("xxxxxxxxxxxx"), input_text="This is written in English.", )
async def test_user_agent_async(self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) def callback(resp): self.assertIn( "azsdk-python-azure-ai-textanalytics/{} Python/{} ({})".format( VERSION, platform.python_version(), platform.platform()), resp.http_request.headers["User-Agent"]) docs = [{ "id": "1", "text": "I will go to the park." }, { "id": "2", "text": "I did not like the hotel we stayed it." }, { "id": "3", "text": "The restaurant had really good food." }] response = await text_analytics.analyze_sentiment( docs, response_hook=callback)
def detect_languages(self): # [START batch_detect_languages] from azure.ai.textanalytics import TextAnalyticsClient, TextAnalyticsAPIKeyCredential text_analytics_client = TextAnalyticsClient( endpoint=self.endpoint, credential=TextAnalyticsAPIKeyCredential(self.key)) documents = [ "This document is written in English.", "Este es un document escrito en Español.", "这是一个用中文写的文件", "Dies ist ein Dokument in englischer Sprache.", "Detta är ett dokument skrivet på engelska." ] result = text_analytics_client.detect_languages(documents) for idx, doc in enumerate(result): if not doc.is_error: print("Document text: {}".format(documents[idx])) print("Language detected: {}".format( doc.primary_language.name)) print("ISO6391 name: {}".format( doc.primary_language.iso6391_name)) print("Confidence score: {}\n".format( doc.primary_language.score)) if doc.is_error: print(doc.id, doc.error)
def test_single_analyze_sentiment_non_text_input(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(TypeError): response = single_analyze_sentiment( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text={"id": "1", "text": "hello world"} )
async def test_successful_recognize_pii_entities_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "text": "My SSN is 555-55-5555." }, { "id": "2", "text": "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check." }, { "id": "3", "text": "Is 998.214.865-68 your Brazilian CPF number?" }] response = await text_analytics.recognize_pii_entities(docs) self.assertEqual(response[0].entities[0].text, "555-55-5555") self.assertEqual(response[0].entities[0].type, "U.S. Social Security Number (SSN)") self.assertEqual(response[1].entities[0].text, "111000025") self.assertEqual(response[1].entities[0].type, "ABA Routing Number") self.assertEqual(response[2].entities[0].text, "998.214.865-68") self.assertEqual(response[2].entities[0].type, "Brazil CPF Number")
def test_single_recognize_pii_entities_bad_credentials(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(ClientAuthenticationError): response = single_recognize_pii_entities( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential("xxxxxxxxxxxx"), input_text="My SSN is 555-55-5555", )
async def test_whole_batch_language_hint_and_obj_per_item_hints_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) def callback(resp): language_str = "\"language\": \"es\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 2) language_str = "\"language\": \"en\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 1) docs = [ TextDocumentInput(id="1", text="I should take my cat to the veterinarian.", language="es"), TextDocumentInput(id="2", text="Este es un document escrito en Español.", language="es"), TextDocumentInput(id="3", text="猫は幸せ"), ] response = await text_analytics.analyze_sentiment( docs, language="en", response_hook=callback)
async def analyze_sentiment_async(self): # [START single_analyze_sentiment_async] from azure.ai.textanalytics.aio import single_analyze_sentiment from azure.ai.textanalytics import TextAnalyticsAPIKeyCredential text = "I visited the restaurant last week. The portions were very generous. However, I did not like what " \ "I ordered." result = await single_analyze_sentiment( endpoint=self.endpoint, credential=TextAnalyticsAPIKeyCredential(self.key), input_text=text, language="en") print("Overall sentiment: {}".format(result.sentiment)) print( "Overall scores: positive={0:.3f}; neutral={1:.3f}; negative={2:.3f} \n" .format( result.document_scores.positive, result.document_scores.neutral, result.document_scores.negative, )) for idx, sentence in enumerate(result.sentences): print("Sentence {} sentiment: {}".format(idx + 1, sentence.sentiment)) print("Offset: {}".format(sentence.offset)) print("Length: {}".format(sentence.length)) print( "Sentence score: positive={0:.3f}; neutral={1:.3f}; negative={2:.3f} \n" .format( sentence.sentence_scores.positive, sentence.sentence_scores.neutral, sentence.sentence_scores.negative, ))
async def test_all_errors_detect_language_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) text = "" for _ in range(5121): text += "x" docs = [{ "id": "1", "text": "" }, { "id": "2", "text": "" }, { "id": "3", "text": "" }, { "id": "4", "text": text }] response = await text_analytics.detect_languages(docs) for resp in response: self.assertTrue(resp.is_error)
async def recognize_pii_entities_async(self): # [START batch_recognize_pii_entities_async] from azure.ai.textanalytics.aio import TextAnalyticsClient from azure.ai.textanalytics import TextAnalyticsAPIKeyCredential text_analytics_client = TextAnalyticsClient( endpoint=self.endpoint, credential=TextAnalyticsAPIKeyCredential(self.key)) documents = [ "The employee's SSN is 555-55-5555.", "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", "Is 998.214.865-68 your Brazilian CPF number?" ] async with text_analytics_client: result = await text_analytics_client.recognize_pii_entities( documents) docs = [doc for doc in result if not doc.is_error] for idx, doc in enumerate(docs): print("Document text: {}".format(documents[idx])) for entity in doc.entities: print("Entity: {}".format(entity.text)) print("Type: {}".format(entity.type)) print("Confidence Score: {}\n".format(entity.score))
def test_single_analyze_sentiment_bad_credentials(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(ClientAuthenticationError): response = single_analyze_sentiment( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential("xxxxxxxxxxxx"), input_text="I was unhappy with the food at the restaurant.", )
async def test_per_item_dont_use_language_hint_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) def callback(resp): language_str = "\"language\": \"\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 2) language_str = "\"language\": \"en\"" language = resp.http_request.body.count(language_str) self.assertEqual(language, 1) docs = [{ "id": "1", "language": "", "text": "I will go to the park." }, { "id": "2", "language": "", "text": "I did not like the hotel we stayed it." }, { "id": "3", "text": "The restaurant had really good food." }] response = await text_analytics.analyze_sentiment( docs, response_hook=callback)
async def test_whole_batch_country_hint_and_dict_per_item_hints_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) def callback(resp): country_str = "\"countryHint\": \"CA\"" country = resp.http_request.body.count(country_str) self.assertEqual(country, 1) country_str = "\"countryHint\": \"US\"" country = resp.http_request.body.count(country_str) self.assertEqual(country, 2) docs = [{ "id": "1", "country_hint": "US", "text": "I will go to the park." }, { "id": "2", "country_hint": "US", "text": "I did not like the hotel we stayed it." }, { "id": "3", "text": "The restaurant had really good food." }] response = await text_analytics.detect_languages( docs, country_hint="CA", response_hook=callback)
async def test_successful_analyze_sentiment_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "language": "en", "text": "Microsoft was founded by Bill Gates and Paul Allen." }, { "id": "2", "language": "en", "text": "I did not like the hotel we stayed it. It was too expensive." }, { "id": "3", "language": "en", "text": "The restaurant had really good food. I recommend you try it." }] response = await text_analytics.analyze_sentiment(docs) self.assertEqual(response[0].sentiment, "neutral") self.assertEqual(response[1].sentiment, "negative") self.assertEqual(response[2].sentiment, "positive")
async def test_whole_batch_country_hint_and_obj_per_item_hints_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) def callback(resp): country_str = "\"countryHint\": \"CA\"" country = resp.http_request.body.count(country_str) self.assertEqual(country, 2) country_str = "\"countryHint\": \"US\"" country = resp.http_request.body.count(country_str) self.assertEqual(country, 1) docs = [ DetectLanguageInput( id="1", text="I should take my cat to the veterinarian.", country_hint="CA"), DetectLanguageInput(id="2", text="Este es un document escrito en Español.", country_hint="CA"), DetectLanguageInput(id="3", text="猫は幸せ"), ] response = await text_analytics.detect_languages( docs, country_hint="US", response_hook=callback)
async def test_some_errors_detect_language_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "country_hint": "United States", "text": "I should take my cat to the veterinarian." }, { "id": "2", "text": "Este es un document escrito en Español." }, { "id": "3", "text": "" }, { "id": "4", "text": "Fahrt nach Stuttgart und dann zum Hotel zu Fu." }] response = await text_analytics.detect_languages(docs) self.assertTrue(response[0].is_error) self.assertFalse(response[1].is_error) self.assertTrue(response[2].is_error) self.assertFalse(response[3].is_error)
async def test_validate_multilanguage_input_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [ TextDocumentInput( id="1", text="Microsoft was founded by Bill Gates and Paul Allen."), TextDocumentInput( id="2", text= "I did not like the hotel we stayed it. It was too expensive." ), TextDocumentInput( id="3", text= "The restaurant had really good food. I recommend you try it." ), ] response = await text_analytics.analyze_sentiment(docs) self.assertEqual(response[0].sentiment, "neutral") self.assertEqual(response[1].sentiment, "negative") self.assertEqual(response[2].sentiment, "positive")
async def test_out_of_order_ids_async(self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "22", "text": "" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] response = await text_analytics.analyze_sentiment(docs) in_order = ["56", "0", "22", "19", "1"] for idx, resp in enumerate(response): self.assertEqual(resp.id, in_order[idx])
async def test_all_errors_analyze_sentiment_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "language": "en", "text": "" }, { "id": "2", "language": "english", "text": "I did not like the hotel we stayed it. It was too expensive." }, { "id": "3", "language": "en", "text": "" }] response = await text_analytics.analyze_sentiment(docs) self.assertTrue(response[0].is_error) self.assertTrue(response[1].is_error) self.assertTrue(response[2].is_error)
async def test_successful_detect_language_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "text": "I should take my cat to the veterinarian." }, { "id": "2", "text": "Este es un document escrito en Español." }, { "id": "3", "text": "猫は幸せ" }, { "id": "4", "text": "Fahrt nach Stuttgart und dann zum Hotel zu Fu." }] response = await text_analytics.detect_languages(docs) self.assertEqual(response[0].primary_language.name, "English") self.assertEqual(response[1].primary_language.name, "Spanish") self.assertEqual(response[2].primary_language.name, "Japanese") self.assertEqual(response[3].primary_language.name, "German")
def test_single_language_detection_empty_text_input(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(HttpResponseError): response = single_detect_language( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text="", )
def recognize_linked_entities(self): # [START batch_recognize_linked_entities] from azure.ai.textanalytics import TextAnalyticsClient, TextAnalyticsAPIKeyCredential text_analytics_client = TextAnalyticsClient( endpoint=self.endpoint, credential=TextAnalyticsAPIKeyCredential(self.key)) documents = [ "Microsoft moved its headquarters to Bellevue, Washington in January 1979.", "Steve Ballmer stepped down as CEO of Microsoft and was succeeded by Satya Nadella.", "Microsoft superó a Apple Inc. como la compañía más valiosa que cotiza en bolsa en el mundo.", ] result = text_analytics_client.recognize_linked_entities(documents) docs = [doc for doc in result if not doc.is_error] for idx, doc in enumerate(docs): print("Document text: {}\n".format(documents[idx])) for entity in doc.entities: print("Entity: {}".format(entity.name)) print("Url: {}".format(entity.url)) print("Data Source: {}".format(entity.data_source)) for match in entity.matches: print("Score: {0:.3f}".format(match.score)) print("Offset: {}".format(match.offset)) print("Length: {}\n".format(match.length)) print("------------------------------------------")
async def test_some_errors_recognize_entities_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "language": "en", "text": "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975." }, { "id": "2", "language": "Spanish", "text": "Hola" }, { "id": "3", "language": "de", "text": "" }] response = await text_analytics.recognize_entities(docs) self.assertFalse(response[0].is_error) self.assertTrue(response[1].is_error) self.assertTrue(response[2].is_error)
async def test_successful_recognize_entities_async( self, resource_group, location, text_analytics_account, text_analytics_account_key): text_analytics = TextAnalyticsClient( text_analytics_account, TextAnalyticsAPIKeyCredential(text_analytics_account_key)) docs = [{ "id": "1", "language": "en", "text": "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975." }, { "id": "2", "language": "es", "text": "Microsoft fue fundado por Bill Gates y Paul Allen el 4 de abril de 1975." }, { "id": "3", "language": "de", "text": "Microsoft wurde am 4. April 1975 von Bill Gates und Paul Allen gegründet." }] response = await text_analytics.recognize_entities(docs) for doc in response: self.assertEqual(len(doc.entities), 4)
def test_single_recognize_pii_entities_bad_language_hint(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(HttpResponseError): response = single_recognize_pii_entities( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text="My SSN is 555-55-5555", language="English" )
def test_single_extract_key_phrases_bad_language_hint(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(HttpResponseError): response = single_extract_key_phrases( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text="Microsoft was founded by Bill Gates.", language="English" )
def test_single_analyze_sentiment_bad_language_hint(self, resource_group, location, text_analytics_account, text_analytics_account_key): with self.assertRaises(HttpResponseError): response = single_analyze_sentiment( endpoint=text_analytics_account, credential=TextAnalyticsAPIKeyCredential(text_analytics_account_key), input_text="I was unhappy with the food at the restaurant.", language="English" )