def get_links(self, text: str) -> List[Pair]:
     topics_response = meaningcloud.TopicsResponse(
         meaningcloud.TopicsRequest(MEANING_CLOUD_KEYS[randint(
             0,
             len(MEANING_CLOUD_KEYS) - 1)],
                                    txt=text,
                                    lang='en',
                                    topicType='e').sendReq())
     if topics_response.isSuccessful():
         entities = topics_response.getEntities()
         links = []
         for entity in entities:
             if 'semld_list' not in entity:
                 continue
             found = list(
                 filter(lambda x: x[:23] == 'http://en.wikipedia.org',
                        entity['semld_list']))
             if len(found) > 0:
                 links.append(
                     Pair(
                         f"http://dbpedia.org/resource/{found[0][found[0].rfind('/') + 1:]}",
                         entity['form'], 'entity'))
         return links
     else:
         if topics_response.getResponse() is None:
             print("\nOh no! The request sent did not return a Json\n")
         else:
             print("\nOh no! There was the following error: " +
                   topics_response.getStatusMsg() + "\n")
         return []
    def testIsUserDefined(self):
        self.assertEqual(
            self.response.isUserDefined(self.response.getEntities()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getConcepts()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getTimeExpressions()[0]),
            False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getMoneyExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getQuantityExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getOtherExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getQuotations()[0]),
            False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getRelations()[0]),
            False)

        responseWithUserDefinedEntities = '{"status":{"code":"0","msg":"OK","credits":"1"},"entity_list":[{"form":"Lincoln Trikru","official_form":"Lincoln","dictionary":"test1","id":"ent_sin_tag","sementity":{"class":"instance","type":"Top>People>Grounders"},"variant_list":[{"form":"Lincoln","inip":"0","endp":"6"}],"relevance":"100"}],"concept_list":[{"form":"dropship","id":"concepto_sin_tag","dictionary":"test1","sementity":{"class":"class"},"variant_list":[{"form":"dropship","inip":"19","endp":"26"}],"relevance":"100"}]}'
        responseWithUD = meaningcloud.TopicsResponse(
            responseWithUserDefinedEntities)
        self.assertEqual(
            responseWithUD.isUserDefined(responseWithUD.getEntities()[0]),
            True)
        self.assertEqual(
            responseWithUD.isUserDefined(responseWithUD.getConcepts()[0]),
            True)
def get_topics(row):
    try:
        topics_response =  meaningcloud.TopicsResponse(meaningcloud.TopicsRequest('ed573dcdee15b76ef892775da22bd5d4',
                                                                                  txt=row.sentence, lang='en',
                                                                                  topicType='a').sendReq())
        sleep(0.5)
        return topics_response
    except:
        print('Error in row {}'.format(row.name))
def extractTopics(text, fibo, relevance):
    
    entities = ''
    concepts = ''
    if fibo:
        fibo_concepts = ''

    print("\tGetting entities and concepts...") 
    topics_req = meaningcloud.TopicsRequest(license_key, txt=text, lang='en', topicType='ec', otherparams={'txtf':'markup'})
    if fibo:
        topics_req.addParam('ud', 'FIBO_en')
        
    topics_response = meaningcloud.TopicsResponse(topics_req.sendReq())

    # If there are no errors in the request, we extract the entities and concepts
    if topics_response.isSuccessful():
        entities_list = topics_response.getEntities()
        formatted_entities = []
        if entities_list:
            for entity in entities_list:
                if int(topics_response.getTopicRelevance(entity)) >= relevance: #we limit the entities to those with relevance higher than 80
                    formatted_entities.append(topics_response.getTopicForm(entity) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + ')')
            entities = ', '.join(formatted_entities)
        else:
            entities = '(none)'

        concepts_list = topics_response.getConcepts()
        formatted_concepts = []
        formatted_fibo_concepts = []
        if concepts_list:
            for concept in concepts_list:
                if fibo and 'dictionary' in concept.keys() and concept['dictionary'] == 'FIBO_en':
                    formatted_fibo_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')')
                #we limit the concepts to those with relevance higher than 80 or multiwords, or user defined concepts
                elif int(topics_response.getTopicRelevance(concept)) >= relevance  or (' ' in topics_response.getTopicForm(concept) and int(topics_response.getTopicRelevance(concept)) >= (relevance/2)) or topics_response.isUserDefined(concept):
                    formatted_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')')

            concepts = ', '.join(formatted_concepts) if formatted_concepts else '(none)'
            fibo_concepts = ', '.join(formatted_fibo_concepts) if formatted_fibo_concepts else '(none)'
        else:
            concepts = "(none)"
            fibo_concepts = "(none)"
    else:            
        print("\tOops! Request to topics was not succesful: (" + topics_response.getStatusCode() + ') ' + topics_response.getStatusMsg())


    return entities, concepts, fibo_concepts if fibo else entities, concepts
Пример #5
0
def topics():
    try:
        # We are going to make a request to the Topics Extraction API
        topics_response = meaningcloud.TopicsResponse(
            meaningcloud.TopicsRequest(license_key,
                                       doc=document,
                                       lang='en',
                                       topicType='e').sendReq())

        # If there are no errors in the request, we print the output
        if topics_response.isSuccessful():
            # print("\nThe request to 'Topics Extraction' finished successfully!\n")

            entities = topics_response.getEntities()
            if entities:
                print("\t Found entities")
                print("\tEntities detected (" + str(len(entities)) + "):\n")
                print(entities)
                for entity in entities:
                    print("\t\t" + topics_response.getTopicForm(entity) +
                          ' --> ' + topics_response.getTypeLastNode(
                              topics_response.getOntoType(entity)) + ' --> ' +
                          topics_response.getOntoType(entity) + ' --> ' +
                          str(topics_response.getNumberOfAppearances(entity)) +
                          "\n")

                    if ("semld_list" in entity.keys()):
                        print(entity["semld_list"][0])

            else:
                print("\tNo entities detected!\n")
        else:
            if topics_response.getResponse() is None:
                print("\nOh no! The request sent did not return a Json\n")
            else:
                print("\nOh no! There was the following error: " +
                      topics_response.getStatusMsg() + "\n")

        return (topics_response, entities)
    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))
    def get_topic_extraction(self):
        logger.debug('TextAnalysis.get_topic_extraction()')
        if self.topic_extraction is None:
            logger.debug('topic_extraction not set')
            logger.debug('setting topic_extraction')

            topics_response = meaningcloud.TopicsResponse(
                meaningcloud.TopicsRequest(
                    self.license_key,
                    txt=self.text,
                    lang=self.language,
                    topicType='ec',
                ).sendReq())

            # Save the raw and serialized topic extraction
            self.topic_extraction_raw = topics_response.getResults()
            self.topic_extraction = MeaningCloudResponse(
                self.topic_extraction_raw).topic()

        logger.debug(self.topic_extraction)
        return self.topic_extraction
Пример #7
0
    def testGetTypeFirstNode(self):
        # correct_values
        firstEntityFirstNode = self.response.getTypeFirstNode(self.response.getOntoType(self.response.getEntities()[0]))
        self.assertIsNotNone(firstEntityFirstNode)
        self.assertEqual(firstEntityFirstNode, 'Location')

        firstConceptFirstNode = self.response.getTypeFirstNode(self.response.getOntoType(self.response.getConcepts()[0]))
        self.assertIsNotNone(firstConceptFirstNode)
        self.assertEqual(firstConceptFirstNode, 'Location')

        responseNoFirstNode = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"entity_list":[{"form":"DummyTopValue","id":"__madeUpID","sementity":{"class":"instance","id":"__madeupValue","type":"Top"},"variant_list":[{"form":"DummyTopValue","inip":"0","endp":"12"}],"relevance":"100"}]}'
        localResponse = meaningcloud.TopicsResponse(responseNoFirstNode)
        localFirstEntityFirstNode = localResponse.getTypeFirstNode(localResponse.getOntoType(localResponse.getEntities()[0]))
        self.assertIsNotNone(localFirstEntityFirstNode)
        self.assertEqual(localFirstEntityFirstNode, 'Top')

        # wrong_values
        wrongFormat = self.response.getTypeLastNode('dummy_value')
        self.assertEqual(wrongFormat, 'dummy_value')

        wrongFormatArray = self.response.getTypeLastNode({'dummy_key': 'dummy_value'})
        self.assertEqual(wrongFormatArray, "")
Пример #8
0
def extractTopic(text):
    try:
        results = []
        # We are going to make a request to the Topics Extraction API
        topics_response = meaningcloud.TopicsResponse(
            meaningcloud.TopicsRequest(license_key,
                                       txt=text,
                                       lang='en',
                                       topicType='e').sendReq())

        # If there are no errors in the request, we print the output
        if topics_response.isSuccessful():
            # print("\nThe request to 'Topics Extraction' finished successfully!\n")

            entities = topics_response.getEntities()
            if entities:
                # print("\tEntities detected (" + str(len(entities)) + "):\n")
                for entity in entities:
                    # print("\t\t" + topics_response.getTopicForm(entity) + ' --> ' +  topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + "\n")
                    results.append([
                        topics_response.getTopicForm(entity),
                        topics_response.getTypeLastNode(
                            topics_response.getOntoType(entity))
                    ])

                return results

            # else:
            #     print("\tNo entities detected!\n")
        else:
            if topics_response.getResponse() is None:
                print("\nOh no! The request sent did not return a Json\n")
            else:
                print("\nOh no! There was the following error: " +
                      topics_response.getStatusMsg() + "\n")

    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))
 def testGetNonexistentRelations(self):
     responseWithNoRelations = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[]}'
     local_response = meaningcloud.TopicsResponse(responseWithNoRelations)
     self.assertTrue(isinstance(local_response.getRelations(), dict))
     self.assertIsNotNone(local_response.getRelations())
 def testGetNonexistentTimeExpressions(self):
     responseWithNoTimeExpressions = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
     local_response = meaningcloud.TopicsResponse(
         responseWithNoTimeExpressions)
     self.assertTrue(isinstance(local_response.getTimeExpressions(), dict))
     self.assertIsNotNone(self.response.getTimeExpressions())
class TopicsResponseTest(unittest.TestCase):

    outputOK = '{"status":{"code":"0","msg":"OK","credits":"1"},"entity_list":[{"form":"London","id":"01d0d69c7d","sementity":{"class":"instance","fiction":"nonfiction","id":"ODENTITY_CITY","type":"Top>Location>GeoPoliticalEntity>City"},"semgeo_list":[{"adm1":{"form":"England","id":"98db781864"},"adm2":{"form":"Greater London","id":"ed00f6dec4"},"continent":{"form":"Europe","id":"0404ea4d6c"},"country":{"form":"United Kingdom","id":"d29f412b4b","standard_list":[{"id":"ISO3166-1-a2","value":"GB"},{"id":"ISO3166-1-a3","value":"GBR"}]}}],"semld_list":["http://en.wikipedia.org/wiki/London","http://ar.wikipedia.org/wiki/لندن","http://ca.wikipedia.org/wiki/Londres","http://cs.wikipedia.org/wiki/Londýn","http://da.wikipedia.org/wiki/London","http://de.wikipedia.org/wiki/London","http://es.wikipedia.org/wiki/Londres","http://fi.wikipedia.org/wiki/Lontoo","http://fr.wikipedia.org/wiki/Londres","http://he.wikipedia.org/wiki/לונדון","http://hi.wikipedia.org/wiki/लंदन","http://id.wikipedia.org/wiki/London","http://it.wikipedia.org/wiki/Londra","http://ja.wikipedia.org/wiki/ロンドン","http://ko.wikipedia.org/wiki/런던","http://nl.wikipedia.org/wiki/Londen","http://no.wikipedia.org/wiki/London","http://pl.wikipedia.org/wiki/Londyn","http://pt.wikipedia.org/wiki/Londres","http://ro.wikipedia.org/wiki/Londra","http://ru.wikipedia.org/wiki/Лондон","http://sv.wikipedia.org/wiki/London","http://th.wikipedia.org/wiki/ลอนดอน","http://tr.wikipedia.org/wiki/Londra","http://zh.wikipedia.org/wiki/伦敦","http://d-nb.info/gnd/4074335-4","http://linkedgeodata.org/triplify/node107775","http://linked-web-apis.fit.cvut.cz/resource/london_city","http://linked-web-apis.fit.cvut.cz/resource/london_uk_city","http://data.nytimes.com/14085781296239331901","http://sw.cyc.com/concept/Mx4rvVjWPJwpEbGdrcN5Y29ycA","http://umbel.org/umbel/rc/Location_Underspecified","http://umbel.org/umbel/rc/PopulatedPlace","http://umbel.org/umbel/rc/Village","http://sws.geonames.org/2643743/","@BBCLondres2012","@LDN","@OlimpicoCaracol","@TelevisaLondres","@TimeOutLondon","@visitlondon","sumo:City"],"variant_list":[{"form":"London","inip":"0","endp":"5"}],"relevance":"100"},{"form":"London","id":"76075d4877","sementity":{"class":"instance","fiction":"nonfiction","id":"ODENTITY_LAST_NAME","type":"Top>Person>LastName"},"semld_list":["sumo:LastName"],"variant_list":[{"form":"London","inip":"0","endp":"5"}],"relevance":"100"}],"concept_list":[{"form":"city","id":"817857ee40","sementity":{"class":"class","fiction":"nonfiction","id":"ODENTITY_CITY","type":"Top>Location>GeoPoliticalEntity>City"},"semld_list":["http://en.wikipedia.org/wiki/City","http://ar.wikipedia.org/wiki/مدينة","http://ca.wikipedia.org/wiki/Ciutat","http://cs.wikipedia.org/wiki/Město","http://de.wikipedia.org/wiki/Stadt","http://es.wikipedia.org/wiki/Ciudad","http://fi.wikipedia.org/wiki/Kaupunki","http://fr.wikipedia.org/wiki/Ville","http://he.wikipedia.org/wiki/עיר","http://hi.wikipedia.org/wiki/शहर","http://id.wikipedia.org/wiki/Kota","http://it.wikipedia.org/wiki/Città","http://ja.wikipedia.org/wiki/都市","http://ko.wikipedia.org/wiki/도시","http://nl.wikipedia.org/wiki/Stad","http://no.wikipedia.org/wiki/By","http://pl.wikipedia.org/wiki/Miasto","http://pt.wikipedia.org/wiki/Cidade","http://ro.wikipedia.org/wiki/Oraș","http://ru.wikipedia.org/wiki/Город","http://sv.wikipedia.org/wiki/Stad","http://th.wikipedia.org/wiki/นคร","http://tr.wikipedia.org/wiki/Şehir","http://zh.wikipedia.org/wiki/城市","http://d-nb.info/gnd/4056723-0","sumo:City"],"variant_list":[{"form":"city","inip":"17","endp":"20"}],"relevance":"100"},{"form":"$","id":"__9145003407816029121","sementity":{"class":"class","type":"Top>Unit>Currency"},"variant_list":[{"form":"$","inip":"30","endp":"30"}],"relevance":"100"},{"form":"tortoise","id":"1019079343","sementity":{"class":"class","fiction":"nonfiction","id":"ODENTITY_REPTILE","type":"Top>LivingThing>Animal>Vertebrate>Reptile"},"semld_list":["http://en.wikipedia.org/wiki/Tortoise","http://ar.wikipedia.org/wiki/سلاحف_برية","http://ca.wikipedia.org/wiki/Testudínid","http://cs.wikipedia.org/wiki/Testudovití","http://de.wikipedia.org/wiki/Landschildkröten","http://es.wikipedia.org/wiki/Testudinidae","http://fi.wikipedia.org/wiki/Testudinidae","http://fr.wikipedia.org/wiki/Tortues_terrestres","http://he.wikipedia.org/wiki/צבים_יבשתיים","http://hi.wikipedia.org/wiki/स्थलीय_कछुआ","http://id.wikipedia.org/wiki/Kura-kura","http://it.wikipedia.org/wiki/Testudinidae","http://ja.wikipedia.org/wiki/リクガメ科","http://ko.wikipedia.org/wiki/땅거북과","http://nl.wikipedia.org/wiki/Landschildpadden","http://no.wikipedia.org/wiki/Landskilpadder","http://pl.wikipedia.org/wiki/Żółwie_lądowe","http://pt.wikipedia.org/wiki/Testudinidae","http://ro.wikipedia.org/wiki/Testudinidae","http://ru.wikipedia.org/wiki/Сухопутные_черепахи","http://sv.wikipedia.org/wiki/Landsköldpaddor","http://tr.wikipedia.org/wiki/Kara_kaplumbağası","http://zh.wikipedia.org/wiki/陸龜","sumo:Reptile"],"semtheme_list":[{"id":"ODTHEME_ZOOLOGY","type":"Top>NaturalSciences>Zoology"}],"variant_list":[{"form":"turtles","inip":"41","endp":"47"}],"relevance":"100"}],"time_expression_list":[{"form":"the 5th of November","normalized_form":"|||||11|5||||","actual_time":"2017-11-05","precision":"day","inip":"53","endp":"71"},{"form":"5th of November","normalized_form":"|||||11|5||||","actual_time":"2017-11-05","precision":"day","inip":"57","endp":"71"}],"money_expression_list":[{"form":"$5","amount_form":"5","numeric_value":"5","currency":"USD","inip":"30","endp":"31"}],"quantity_expression_list":[{"form":"two turtles","amount_form":"two","numeric_value":"2","unit":"turtle","inip":"37","endp":"47"}],"other_expression_list":[{"form":"1245FG","type":"unknown","inip":"104","endp":"109"}],"quotation_list":[{"form":"he was tired in flight 1245FG.","verb":{"form":"said","lemma":"say"},"inip":"81","endp":"110"}],"relation_list":[{"form":"On the 5th of November he said he was tired in flight 1245FG.","inip":"73","endp":"109","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"said","lemma_list":["say"],"sense_id_list":["ODENTITY_COMMUNICATION_PROCESS","ODENTITY_LINGUISTIC_COMMUNICATION","ODENTITY_PROCESS"]},"complement_list":[{"form":"he was tired in flight 1245FG","type":"isDirectObject"}],"degree":"1"},{"form":"London is a nice city.","inip":"0","endp":"20","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a nice city","type":"isAttribute"}],"degree":"1"},{"form":"I have $5 and two turtles.","inip":"23","endp":"47","subject":{"form":"I","lemma_list":["I"],"sense_id_list":["PRONHUMAN"]},"verb":{"form":"have","lemma_list":["have"]},"complement_list":[{"form":"$5 and two turtles","type":"isDirectObject"}],"degree":"1"},{"form":"On the 5th of November he said he was tired in flight 1245FG.","inip":"81","endp":"109","subject":{"form":"he","lemma_list":["he"],"sense_id_list":["PRONHUMAN"]},"verb":{"form":"was tired","lemma_list":["tire"]},"complement_list":[{"form":"in flight","type":"isComplement"}],"degree":"1"}]}'
    response = meaningcloud.TopicsResponse(outputOK)

    def testConstruct(self):
        self.assertIsNotNone(self.response.getResponse())

    def testConstructWithWrongJson(self):
        outputWrong = 'malformed json'
        with self.assertRaises(json.JSONDecodeError):
            meaningcloud.TopicsResponse(outputWrong)

    def testConstructWithEmptyParam(self):
        with self.assertRaises(Exception):
            meaningcloud.TopicsResponse('')

    def testGetEntities(self):
        self.assertIsNotNone(self.response.getEntities())
        self.assertTrue(isinstance(self.response.getEntities(), list))

    def testGetNonexistentEntities(self):
        responseWithNoEntities = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
        local_response = meaningcloud.TopicsResponse(responseWithNoEntities)
        self.assertTrue(isinstance(local_response.getEntities(), dict))
        self.assertEqual(local_response.getEntities(), {})

    def testGetConcepts(self):
        self.assertIsNotNone(self.response.getConcepts())
        self.assertTrue(isinstance(self.response.getConcepts(), list))

    def testGetNonexistentConcepts(self):
        responseWithNoConcepts = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
        local_response = meaningcloud.TopicsResponse(responseWithNoConcepts)
        self.assertTrue(isinstance(local_response.getConcepts(), dict))
        self.assertIsNotNone(local_response.getConcepts())

    def testGetMoneyExpressions(self):
        self.assertIsNotNone(self.response.getMoneyExpressions())
        self.assertTrue(isinstance(self.response.getMoneyExpressions(), list))

    def testGetNonexistentMoneyExpressions(self):
        responseWithNoMoneyExpressions = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
        local_response = meaningcloud.TopicsResponse(
            responseWithNoMoneyExpressions)
        self.assertTrue(isinstance(local_response.getMoneyExpressions(), dict))
        self.assertIsNotNone(local_response.getMoneyExpressions())

    def testGetQuantityExpressions(self):
        self.assertIsNotNone(self.response.getQuantityExpressions())
        self.assertTrue(
            isinstance(self.response.getQuantityExpressions(), list))

    def testGetNonexistentQuantityExpressions(self):
        responseWithNoQuantityExpressions = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
        local_response = meaningcloud.TopicsResponse(
            responseWithNoQuantityExpressions)
        self.assertTrue(
            isinstance(local_response.getQuantityExpressions(), dict))

    def testGetTimeExpressions(self):
        self.assertIsNotNone(self.response.getTimeExpressions())
        self.assertTrue(isinstance(self.response.getTimeExpressions(), list))

    def testGetNonexistentTimeExpressions(self):
        responseWithNoTimeExpressions = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[],"relation_list":[{"form":"London is a very nice city.","inip":"0","endp":"25","subject":{"form":"London","lemma_list":["London"],"sense_id_list":["01d0d69c7d","76075d4877"]},"verb":{"form":"is","lemma_list":["be"]},"complement_list":[{"form":"a very nice city","type":"isAttribute"}],"degree":"1"}]}'
        local_response = meaningcloud.TopicsResponse(
            responseWithNoTimeExpressions)
        self.assertTrue(isinstance(local_response.getTimeExpressions(), dict))
        self.assertIsNotNone(self.response.getTimeExpressions())

    def testGetQuotations(self):
        self.assertIsNotNone(self.response.getQuotations())
        self.assertTrue(isinstance(self.response.getQuotations(), list))

    def testGetNonexistentQuotations(self):
        responseWithNoQuotations = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[]}'
        local_response = meaningcloud.TopicsResponse(responseWithNoQuotations)
        self.assertTrue(isinstance(local_response.getQuotations(), dict))
        self.assertIsNotNone(local_response.getQuotations())

    def testGetRelations(self):
        self.assertIsNotNone(self.response.getRelations())
        self.assertTrue(isinstance(self.response.getRelations(), list))

    def testGetNonexistentRelations(self):
        responseWithNoRelations = '{"status":{"code":"0","msg":"OK","credits":"1","remaining_credits":"5000"},"time_expression_list":[],"money_expression_list":[],"quantity_expression_list":[],"other_expression_list":[],"quotation_list":[]}'
        local_response = meaningcloud.TopicsResponse(responseWithNoRelations)
        self.assertTrue(isinstance(local_response.getRelations(), dict))
        self.assertIsNotNone(local_response.getRelations())

    def testGetForm(self):
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getEntities()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getConcepts()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getTimeExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getMoneyExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(
                self.response.getQuantityExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getQuotations()[0]))
        self.assertIsNotNone(
            self.response.getTopicForm(self.response.getRelations()[0]))

    def testGetRelevance(self):
        self.assertIsNotNone(
            self.response.getTopicRelevance(self.response.getEntities()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(self.response.getConcepts()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(
                self.response.getTimeExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(
                self.response.getMoneyExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(
                self.response.getQuantityExpressions()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(self.response.getQuotations()[0]))
        self.assertIsNotNone(
            self.response.getTopicRelevance(self.response.getRelations()[0]))

    def testGetOntoType(self):
        # correct_values
        firstEntityOntoType = self.response.getOntoType(
            self.response.getEntities()[0])
        self.assertIsNotNone(firstEntityOntoType)
        self.assertEqual(firstEntityOntoType,
                         'Top>Location>GeoPoliticalEntity>City')

        firstConceptOntoType = self.response.getOntoType(
            self.response.getConcepts()[0])
        self.assertIsNotNone(firstConceptOntoType)
        self.assertEqual(firstConceptOntoType,
                         'Top>Location>GeoPoliticalEntity>City')

        # wrong_values
        with self.assertRaises(AttributeError):
            wrongFormatOntoType = self.response.getOntoType('dummy_value')

        wrongFormatArrayOntoType = self.response.getOntoType(
            {'dummy_key': 'dummy_value'})
        self.assertEqual(wrongFormatArrayOntoType, "")

    def testGetTypeLastNode(self):
        # correct_values
        firstEntityLastNode = self.response.getTypeLastNode(
            self.response.getOntoType(self.response.getEntities()[0]))
        self.assertIsNotNone(firstEntityLastNode)
        self.assertEqual(firstEntityLastNode, 'City')

        firstConceptLastNode = self.response.getTypeLastNode(
            self.response.getOntoType(self.response.getConcepts()[0]))
        self.assertIsNotNone(firstConceptLastNode)
        self.assertEqual(firstConceptLastNode, 'City')

        # wrong_values
        wrongFormat = self.response.getTypeLastNode('dummy_value')
        self.assertEqual(wrongFormat, 'dummy_value')

        wrongFormatArray = self.response.getTypeLastNode(
            {'dummy_key': 'dummy_value'})
        self.assertEqual(wrongFormatArray, "")

    def testIsUserDefined(self):
        self.assertEqual(
            self.response.isUserDefined(self.response.getEntities()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getConcepts()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getTimeExpressions()[0]),
            False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getMoneyExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getQuantityExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(
                self.response.getOtherExpressions()[0]), False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getQuotations()[0]),
            False)
        self.assertEqual(
            self.response.isUserDefined(self.response.getRelations()[0]),
            False)

        responseWithUserDefinedEntities = '{"status":{"code":"0","msg":"OK","credits":"1"},"entity_list":[{"form":"Lincoln Trikru","official_form":"Lincoln","dictionary":"test1","id":"ent_sin_tag","sementity":{"class":"instance","type":"Top>People>Grounders"},"variant_list":[{"form":"Lincoln","inip":"0","endp":"6"}],"relevance":"100"}],"concept_list":[{"form":"dropship","id":"concepto_sin_tag","dictionary":"test1","sementity":{"class":"class"},"variant_list":[{"form":"dropship","inip":"19","endp":"26"}],"relevance":"100"}]}'
        responseWithUD = meaningcloud.TopicsResponse(
            responseWithUserDefinedEntities)
        self.assertEqual(
            responseWithUD.isUserDefined(responseWithUD.getEntities()[0]),
            True)
        self.assertEqual(
            responseWithUD.isUserDefined(responseWithUD.getConcepts()[0]),
            True)

    def testGetNumberOfAppearances(self):
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getEntities()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getConcepts()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getTimeExpressions()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getMoneyExpressions()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getQuantityExpressions()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getOtherExpressions()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getQuotations()[0]), 1)
        self.assertEqual(
            self.response.getNumberOfAppearances(
                self.response.getRelations()[0]), 1)

        # wrong value
        self.assertEqual(self.response.getNumberOfAppearances(None), 0)
 def testConstructWithEmptyParam(self):
     with self.assertRaises(Exception):
         meaningcloud.TopicsResponse('')
 def testConstructWithWrongJson(self):
     outputWrong = 'malformed json'
     with self.assertRaises(json.JSONDecodeError):
         meaningcloud.TopicsResponse(outputWrong)
Пример #14
0
  sentence_vectors.append(vector)


"""# **FEATURE EXTRACTION**"""

""" meaning cloud topic extraction"""

#print("Extracting name entities with MeaningCloud...")


#!pip install meaningcloud-python

license_key = 'b59424e48a94e5061bef29cbd29bdacd'

# We are going to make a request to the Topics Extraction API
topics_response = meaningcloud.TopicsResponse(meaningcloud.TopicsRequest(license_key, txt=ner_report, lang='en',topicType='e').sendReq())

# If there are no errors in the request, we print the output
if topics_response.isSuccessful():
  #print("\nThe request to 'Topics Extraction' finished successfully!\n")

  entities = topics_response.getEntities()
  important_entities = []

  if entities:
      #print("\tEntities detected (" + str(len(entities)) + "):\n")
      for entity in entities:
        ent = topics_response.getTopicForm(entity).lower() #topics_response.getTopicRelevance(entity)                              
        important_entities.append(ent)
        if len(important_entities) == 20:
          break
Пример #15
0
def analyzeText(text, language, threshold, tt, ud):
    global index_count
    print("Extracting topics for text #%s" % str(index_count))

    # this is where we are going to store our results
    topics = {
        "person": [],
        "organization": [],
        "location": [],
        "product": [],
        "id": [],
        "event": [],
        "other": [],
        "quantity": [],
    }

    try:
        # We are going to make a request to the Topics Extraction API
        request = meaningcloud.TopicsRequest(
            license_key,
            txt=text,
            lang=language,
            topicType=tt,
            server=server,
            otherparams={"ud": ud},
        )
        setRequestSource(request)
        response = meaningcloud.TopicsResponse(request.sendReq())

        if response.isSuccessful():
            if "e" in tt:
                entity_list = response.getEntities()
                if entity_list:
                    for entity in entity_list:
                        if int(response.getTopicRelevance(entity)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(entity)
                            ).lower()
                            form = str(response.getTopicForm(entity))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "c" in tt:
                concept_list = response.getConcepts()
                if concept_list:
                    for concept in concept_list:
                        if int(response.getTopicRelevance(concept)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(concept)
                            ).lower()
                            form = str(response.getTopicForm(concept))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "m" in tt:
                money_expression_list = response.getMoneyExpressions()
                if money_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(money))
                        )
                        for money in money_expression_list
                    ]
            if "n" in tt:
                quantity_expression_list = response.getQuantityExpressions()
                if quantity_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(quantity))
                        )
                        for quantity in quantity_expression_list
                    ]
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Topics Extraction for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                topics = {
                    "person": "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg(),
                    "organization": "",
                    "location": "",
                    "product": "",
                    "id": "",
                    "event": "",
                    "other": "",
                    "quantity": "",
                }

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1
    return pd.Series(topics)
Пример #16
0
def analyzeText(text):
    global index_count
    print("Analyzing text " + str(index_count))

    # this is where we are going to store our results
    polarity = ''
    entities = ''
    concepts = ''
    iab2 = ''

    try:
        # We are going to make a request to the Sentiment Analysis API
        print("\tGetting sentiment analysis...")
        sentiment_response = meaningcloud.SentimentResponse(
            meaningcloud.SentimentRequest(license_key,
                                          lang='en',
                                          txt=text,
                                          txtf='markup').sendReq())
        if sentiment_response.isSuccessful():
            polarity = sentiment_response.getGlobalScoreTag()
        else:
            print('Request to sentiment was not succesful: ' +
                  sentiment_response.getStatusMsg())

        # We are going to make a request to the Topics Extraction API
        print("\tGetting entities and concepts...")
        topics_req = meaningcloud.TopicsRequest(license_key,
                                                txt=text,
                                                lang='en',
                                                topicType='ec',
                                                otherparams={'txtf': 'markup'})
        topics_response = meaningcloud.TopicsResponse(topics_req.sendReq())

        # If there are no errors in the request, we extract the entities and concepts
        if topics_response.isSuccessful():
            entities_list = topics_response.getEntities()
            formatted_entities = []
            if entities_list:
                for entity in entities_list:
                    if int(
                            topics_response.getTopicRelevance(entity)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_entities.append(
                            topics_response.getTopicForm(entity) + ' (' +
                            topics_response.getTypeLastNode(
                                topics_response.getOntoType(entity)) + ')')
                entities = ', '.join(formatted_entities)

            concepts_list = topics_response.getConcepts()
            formatted_concepts = []
            if concepts_list:
                for concept in concepts_list:
                    if int(
                            topics_response.getTopicRelevance(concept)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_concepts.append(
                            topics_response.getTopicForm(concept))

                concepts = ', '.join(list(dict.fromkeys(formatted_concepts)))
        else:
            print('Request to topics was not succesful: ' +
                  topics_response.getStatusMsg())

        # We are going to make a request to the Deep Categorization API
        print("\tGetting IAB 2.0 classification...")
        deepcat_response = meaningcloud.DeepCategorizationResponse(
            meaningcloud.DeepCategorizationRequest(license_key,
                                                   model='IAB_2.0_en',
                                                   txt=text,
                                                   otherparams={
                                                       'txtf': 'markup'
                                                   }).sendReq())
        if deepcat_response.isSuccessful():
            categories = deepcat_response.getCategories()
            iab2 = (', '.join(
                deepcat_response.getCategoryCode(cat)
                for cat in categories[:1])) if categories else ''
        else:
            print('Request to Deep Categorization was not succesful: ' +
                  deepcat_response.getStatusMsg())

    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))

    index_count += 1

    return pd.Series([polarity, entities, concepts, iab2])