示例#1
0
    def testConstruct(self):
        request = self.request
        self.assertEqual(self.URL, request.getUrl())
        self.assertIsNotNone(request.getParams())
        params = request.getParams()
        self.assertEqual('key' in params.keys(), True)
        self.assertEqual(params['key'], self.KEY)
        self.assertIsNotNone(request.getTimeout())
        self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout())

        extraHeaders = ["Accept: application/json"]
        request2 = meaningcloud.TopicsRequest(self.KEY,
                                              txt=self.text,
                                              lang="en",
                                              extraheaders=extraHeaders)
        self.assertIsNotNone(request2.sendReq())

        otherparams = {'key2': 'my_key2'}
        request3 = meaningcloud.TopicsRequest(self.KEY,
                                              txt=self.text,
                                              lang="en",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)
        self.assertIsNotNone('key2' in request3.getParams().keys(), True)
        self.assertEqual(request3.getParams()['key2'], 'my_key2')

        url = 'https://en.wikipedia.org/wiki/Star_Trek'
        request4 = meaningcloud.TopicsRequest(self.KEY,
                                              url=url,
                                              lang="en",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)
        self.assertIsNotNone('url' in request4.getParams().keys(), True)
        self.assertEqual(request4.getParams()['url'], url)

        file = self.RESOURCES_DIR + 'file.txt'
        request5 = meaningcloud.TopicsRequest(self.KEY,
                                              doc=file,
                                              lang="en",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)

        self.assertIsNotNone('doc' in request5.getParams().keys(), False)
        doc = request5._file['doc'].read().decode('utf-8')

        request5._file['doc'].close()
        aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb')
        aux_conten = aux_doc.read().decode('utf-8')
        aux_doc.close()
        self.assertEqual(aux_conten, doc)

        return request
 def get_links(self, text: str) -> List[Pair]:
     topics_response = meaningcloud.TopicsResponse(
         meaningcloud.TopicsRequest(MEANING_CLOUD_KEYS[randint(
             0,
             len(MEANING_CLOUD_KEYS) - 1)],
                                    txt=text,
                                    lang='en',
                                    topicType='e').sendReq())
     if topics_response.isSuccessful():
         entities = topics_response.getEntities()
         links = []
         for entity in entities:
             if 'semld_list' not in entity:
                 continue
             found = list(
                 filter(lambda x: x[:23] == 'http://en.wikipedia.org',
                        entity['semld_list']))
             if len(found) > 0:
                 links.append(
                     Pair(
                         f"http://dbpedia.org/resource/{found[0][found[0].rfind('/') + 1:]}",
                         entity['form'], 'entity'))
         return links
     else:
         if topics_response.getResponse() is None:
             print("\nOh no! The request sent did not return a Json\n")
         else:
             print("\nOh no! There was the following error: " +
                   topics_response.getStatusMsg() + "\n")
         return []
def get_topics(row):
    try:
        topics_response =  meaningcloud.TopicsResponse(meaningcloud.TopicsRequest('ed573dcdee15b76ef892775da22bd5d4',
                                                                                  txt=row.sentence, lang='en',
                                                                                  topicType='a').sendReq())
        sleep(0.5)
        return topics_response
    except:
        print('Error in row {}'.format(row.name))
def extractTopics(text, fibo, relevance):
    
    entities = ''
    concepts = ''
    if fibo:
        fibo_concepts = ''

    print("\tGetting entities and concepts...") 
    topics_req = meaningcloud.TopicsRequest(license_key, txt=text, lang='en', topicType='ec', otherparams={'txtf':'markup'})
    if fibo:
        topics_req.addParam('ud', 'FIBO_en')
        
    topics_response = meaningcloud.TopicsResponse(topics_req.sendReq())

    # If there are no errors in the request, we extract the entities and concepts
    if topics_response.isSuccessful():
        entities_list = topics_response.getEntities()
        formatted_entities = []
        if entities_list:
            for entity in entities_list:
                if int(topics_response.getTopicRelevance(entity)) >= relevance: #we limit the entities to those with relevance higher than 80
                    formatted_entities.append(topics_response.getTopicForm(entity) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + ')')
            entities = ', '.join(formatted_entities)
        else:
            entities = '(none)'

        concepts_list = topics_response.getConcepts()
        formatted_concepts = []
        formatted_fibo_concepts = []
        if concepts_list:
            for concept in concepts_list:
                if fibo and 'dictionary' in concept.keys() and concept['dictionary'] == 'FIBO_en':
                    formatted_fibo_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')')
                #we limit the concepts to those with relevance higher than 80 or multiwords, or user defined concepts
                elif int(topics_response.getTopicRelevance(concept)) >= relevance  or (' ' in topics_response.getTopicForm(concept) and int(topics_response.getTopicRelevance(concept)) >= (relevance/2)) or topics_response.isUserDefined(concept):
                    formatted_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')')

            concepts = ', '.join(formatted_concepts) if formatted_concepts else '(none)'
            fibo_concepts = ', '.join(formatted_fibo_concepts) if formatted_fibo_concepts else '(none)'
        else:
            concepts = "(none)"
            fibo_concepts = "(none)"
    else:            
        print("\tOops! Request to topics was not succesful: (" + topics_response.getStatusCode() + ') ' + topics_response.getStatusMsg())


    return entities, concepts, fibo_concepts if fibo else entities, concepts
示例#5
0
def topics():
    try:
        # We are going to make a request to the Topics Extraction API
        topics_response = meaningcloud.TopicsResponse(
            meaningcloud.TopicsRequest(license_key,
                                       doc=document,
                                       lang='en',
                                       topicType='e').sendReq())

        # If there are no errors in the request, we print the output
        if topics_response.isSuccessful():
            # print("\nThe request to 'Topics Extraction' finished successfully!\n")

            entities = topics_response.getEntities()
            if entities:
                print("\t Found entities")
                print("\tEntities detected (" + str(len(entities)) + "):\n")
                print(entities)
                for entity in entities:
                    print("\t\t" + topics_response.getTopicForm(entity) +
                          ' --> ' + topics_response.getTypeLastNode(
                              topics_response.getOntoType(entity)) + ' --> ' +
                          topics_response.getOntoType(entity) + ' --> ' +
                          str(topics_response.getNumberOfAppearances(entity)) +
                          "\n")

                    if ("semld_list" in entity.keys()):
                        print(entity["semld_list"][0])

            else:
                print("\tNo entities detected!\n")
        else:
            if topics_response.getResponse() is None:
                print("\nOh no! The request sent did not return a Json\n")
            else:
                print("\nOh no! There was the following error: " +
                      topics_response.getStatusMsg() + "\n")

        return (topics_response, entities)
    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))
    def get_topic_extraction(self):
        logger.debug('TextAnalysis.get_topic_extraction()')
        if self.topic_extraction is None:
            logger.debug('topic_extraction not set')
            logger.debug('setting topic_extraction')

            topics_response = meaningcloud.TopicsResponse(
                meaningcloud.TopicsRequest(
                    self.license_key,
                    txt=self.text,
                    lang=self.language,
                    topicType='ec',
                ).sendReq())

            # Save the raw and serialized topic extraction
            self.topic_extraction_raw = topics_response.getResults()
            self.topic_extraction = MeaningCloudResponse(
                self.topic_extraction_raw).topic()

        logger.debug(self.topic_extraction)
        return self.topic_extraction
示例#7
0
def extractTopic(text):
    try:
        results = []
        # We are going to make a request to the Topics Extraction API
        topics_response = meaningcloud.TopicsResponse(
            meaningcloud.TopicsRequest(license_key,
                                       txt=text,
                                       lang='en',
                                       topicType='e').sendReq())

        # If there are no errors in the request, we print the output
        if topics_response.isSuccessful():
            # print("\nThe request to 'Topics Extraction' finished successfully!\n")

            entities = topics_response.getEntities()
            if entities:
                # print("\tEntities detected (" + str(len(entities)) + "):\n")
                for entity in entities:
                    # print("\t\t" + topics_response.getTopicForm(entity) + ' --> ' +  topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + "\n")
                    results.append([
                        topics_response.getTopicForm(entity),
                        topics_response.getTypeLastNode(
                            topics_response.getOntoType(entity))
                    ])

                return results

            # else:
            #     print("\tNo entities detected!\n")
        else:
            if topics_response.getResponse() is None:
                print("\nOh no! The request sent did not return a Json\n")
            else:
                print("\nOh no! There was the following error: " +
                      topics_response.getStatusMsg() + "\n")

    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))
class TopicsRequesTest(unittest.TestCase):

    URL = 'https://api.meaningcloud.com/topics-2.0'
    KEY = 'MY_KEY'
    TIMEOUT_DEFAULT = 60
    RESOURCES_DIR = './resources/'
    text = 'London is big'
    request = meaningcloud.TopicsRequest(KEY, txt=text, topicType="a")

    def testConstruct(self):
        request = self.request
        self.assertEqual(self.URL, request.getUrl())
        self.assertIsNotNone(request.getParams())
        params = request.getParams()
        self.assertEqual('key' in params.keys(), True)
        self.assertEqual(params['key'], self.KEY)
        self.assertIsNotNone(request.getTimeout())
        self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout())

        extraHeaders = ["Accept: application/json"]
        request2 = meaningcloud.TopicsRequest(self.KEY,
                                              txt=self.text,
                                              topicType="a",
                                              extraheaders=extraHeaders)
        self.assertIsNotNone(request2.sendReq())

        otherparams = {'key2': 'my_key2'}
        request3 = meaningcloud.TopicsRequest(self.KEY,
                                              txt=self.text,
                                              topicType="a",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)
        self.assertIsNotNone('key2' in request3.getParams().keys(), True)
        self.assertEqual(request3.getParams()['key2'], 'my_key2')

        url = 'https://en.wikipedia.org/wiki/Star_Trek'
        request4 = meaningcloud.TopicsRequest(self.KEY,
                                              url=url,
                                              topicType="a",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)
        self.assertIsNotNone('url' in request4.getParams().keys(), True)
        self.assertEqual(request4.getParams()['url'], url)

        file = self.RESOURCES_DIR + 'file.txt'
        request5 = meaningcloud.TopicsRequest(self.KEY,
                                              doc=file,
                                              topicType="a",
                                              extraheaders=extraHeaders,
                                              otherparams=otherparams)

        self.assertIsNotNone('doc' in request5.getParams().keys(), False)
        doc = request5._file['doc'].read().decode('utf-8')

        request5._file['doc'].close()
        aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb')
        aux_conten = aux_doc.read().decode('utf-8')
        aux_doc.close()
        self.assertEqual(aux_conten, doc)

        return request

    def testSendReq(self):
        request = self.request
        requestRq = request.sendReq()
        self.assertIsNotNone(requestRq)
示例#9
0
  sentence_vectors.append(vector)


"""# **FEATURE EXTRACTION**"""

""" meaning cloud topic extraction"""

#print("Extracting name entities with MeaningCloud...")


#!pip install meaningcloud-python

license_key = 'b59424e48a94e5061bef29cbd29bdacd'

# We are going to make a request to the Topics Extraction API
topics_response = meaningcloud.TopicsResponse(meaningcloud.TopicsRequest(license_key, txt=ner_report, lang='en',topicType='e').sendReq())

# If there are no errors in the request, we print the output
if topics_response.isSuccessful():
  #print("\nThe request to 'Topics Extraction' finished successfully!\n")

  entities = topics_response.getEntities()
  important_entities = []

  if entities:
      #print("\tEntities detected (" + str(len(entities)) + "):\n")
      for entity in entities:
        ent = topics_response.getTopicForm(entity).lower() #topics_response.getTopicRelevance(entity)                              
        important_entities.append(ent)
        if len(important_entities) == 20:
          break
def analyzeText(text, language, threshold, tt, ud):
    global index_count
    print("Extracting topics for text #%s" % str(index_count))

    # this is where we are going to store our results
    topics = {
        "person": [],
        "organization": [],
        "location": [],
        "product": [],
        "id": [],
        "event": [],
        "other": [],
        "quantity": [],
    }

    try:
        # We are going to make a request to the Topics Extraction API
        request = meaningcloud.TopicsRequest(
            license_key,
            txt=text,
            lang=language,
            topicType=tt,
            server=server,
            otherparams={"ud": ud},
        )
        setRequestSource(request)
        response = meaningcloud.TopicsResponse(request.sendReq())

        if response.isSuccessful():
            if "e" in tt:
                entity_list = response.getEntities()
                if entity_list:
                    for entity in entity_list:
                        if int(response.getTopicRelevance(entity)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(entity)
                            ).lower()
                            form = str(response.getTopicForm(entity))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "c" in tt:
                concept_list = response.getConcepts()
                if concept_list:
                    for concept in concept_list:
                        if int(response.getTopicRelevance(concept)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(concept)
                            ).lower()
                            form = str(response.getTopicForm(concept))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "m" in tt:
                money_expression_list = response.getMoneyExpressions()
                if money_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(money))
                        )
                        for money in money_expression_list
                    ]
            if "n" in tt:
                quantity_expression_list = response.getQuantityExpressions()
                if quantity_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(quantity))
                        )
                        for quantity in quantity_expression_list
                    ]
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Topics Extraction for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                topics = {
                    "person": "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg(),
                    "organization": "",
                    "location": "",
                    "product": "",
                    "id": "",
                    "event": "",
                    "other": "",
                    "quantity": "",
                }

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1
    return pd.Series(topics)
示例#11
0
def analyzeText(text):
    global index_count
    print("Analyzing text " + str(index_count))

    # this is where we are going to store our results
    polarity = ''
    entities = ''
    concepts = ''
    iab2 = ''

    try:
        # We are going to make a request to the Sentiment Analysis API
        print("\tGetting sentiment analysis...")
        sentiment_response = meaningcloud.SentimentResponse(
            meaningcloud.SentimentRequest(license_key,
                                          lang='en',
                                          txt=text,
                                          txtf='markup').sendReq())
        if sentiment_response.isSuccessful():
            polarity = sentiment_response.getGlobalScoreTag()
        else:
            print('Request to sentiment was not succesful: ' +
                  sentiment_response.getStatusMsg())

        # We are going to make a request to the Topics Extraction API
        print("\tGetting entities and concepts...")
        topics_req = meaningcloud.TopicsRequest(license_key,
                                                txt=text,
                                                lang='en',
                                                topicType='ec',
                                                otherparams={'txtf': 'markup'})
        topics_response = meaningcloud.TopicsResponse(topics_req.sendReq())

        # If there are no errors in the request, we extract the entities and concepts
        if topics_response.isSuccessful():
            entities_list = topics_response.getEntities()
            formatted_entities = []
            if entities_list:
                for entity in entities_list:
                    if int(
                            topics_response.getTopicRelevance(entity)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_entities.append(
                            topics_response.getTopicForm(entity) + ' (' +
                            topics_response.getTypeLastNode(
                                topics_response.getOntoType(entity)) + ')')
                entities = ', '.join(formatted_entities)

            concepts_list = topics_response.getConcepts()
            formatted_concepts = []
            if concepts_list:
                for concept in concepts_list:
                    if int(
                            topics_response.getTopicRelevance(concept)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_concepts.append(
                            topics_response.getTopicForm(concept))

                concepts = ', '.join(list(dict.fromkeys(formatted_concepts)))
        else:
            print('Request to topics was not succesful: ' +
                  topics_response.getStatusMsg())

        # We are going to make a request to the Deep Categorization API
        print("\tGetting IAB 2.0 classification...")
        deepcat_response = meaningcloud.DeepCategorizationResponse(
            meaningcloud.DeepCategorizationRequest(license_key,
                                                   model='IAB_2.0_en',
                                                   txt=text,
                                                   otherparams={
                                                       'txtf': 'markup'
                                                   }).sendReq())
        if deepcat_response.isSuccessful():
            categories = deepcat_response.getCategories()
            iab2 = (', '.join(
                deepcat_response.getCategoryCode(cat)
                for cat in categories[:1])) if categories else ''
        else:
            print('Request to Deep Categorization was not succesful: ' +
                  deepcat_response.getStatusMsg())

    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))

    index_count += 1

    return pd.Series([polarity, entities, concepts, iab2])