def testConstruct(self): request = self.request self.assertEqual(self.URL, request.getUrl()) self.assertIsNotNone(request.getParams()) params = request.getParams() self.assertEqual('key' in params.keys(), True) self.assertEqual(params['key'], self.KEY) self.assertIsNotNone(request.getTimeout()) self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout()) extraHeaders = ["Accept: application/json"] request2 = meaningcloud.TopicsRequest(self.KEY, txt=self.text, lang="en", extraheaders=extraHeaders) self.assertIsNotNone(request2.sendReq()) otherparams = {'key2': 'my_key2'} request3 = meaningcloud.TopicsRequest(self.KEY, txt=self.text, lang="en", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('key2' in request3.getParams().keys(), True) self.assertEqual(request3.getParams()['key2'], 'my_key2') url = 'https://en.wikipedia.org/wiki/Star_Trek' request4 = meaningcloud.TopicsRequest(self.KEY, url=url, lang="en", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('url' in request4.getParams().keys(), True) self.assertEqual(request4.getParams()['url'], url) file = self.RESOURCES_DIR + 'file.txt' request5 = meaningcloud.TopicsRequest(self.KEY, doc=file, lang="en", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('doc' in request5.getParams().keys(), False) doc = request5._file['doc'].read().decode('utf-8') request5._file['doc'].close() aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb') aux_conten = aux_doc.read().decode('utf-8') aux_doc.close() self.assertEqual(aux_conten, doc) return request
def get_links(self, text: str) -> List[Pair]: topics_response = meaningcloud.TopicsResponse( meaningcloud.TopicsRequest(MEANING_CLOUD_KEYS[randint( 0, len(MEANING_CLOUD_KEYS) - 1)], txt=text, lang='en', topicType='e').sendReq()) if topics_response.isSuccessful(): entities = topics_response.getEntities() links = [] for entity in entities: if 'semld_list' not in entity: continue found = list( filter(lambda x: x[:23] == 'http://en.wikipedia.org', entity['semld_list'])) if len(found) > 0: links.append( Pair( f"http://dbpedia.org/resource/{found[0][found[0].rfind('/') + 1:]}", entity['form'], 'entity')) return links else: if topics_response.getResponse() is None: print("\nOh no! The request sent did not return a Json\n") else: print("\nOh no! There was the following error: " + topics_response.getStatusMsg() + "\n") return []
def get_topics(row): try: topics_response = meaningcloud.TopicsResponse(meaningcloud.TopicsRequest('ed573dcdee15b76ef892775da22bd5d4', txt=row.sentence, lang='en', topicType='a').sendReq()) sleep(0.5) return topics_response except: print('Error in row {}'.format(row.name))
def extractTopics(text, fibo, relevance): entities = '' concepts = '' if fibo: fibo_concepts = '' print("\tGetting entities and concepts...") topics_req = meaningcloud.TopicsRequest(license_key, txt=text, lang='en', topicType='ec', otherparams={'txtf':'markup'}) if fibo: topics_req.addParam('ud', 'FIBO_en') topics_response = meaningcloud.TopicsResponse(topics_req.sendReq()) # If there are no errors in the request, we extract the entities and concepts if topics_response.isSuccessful(): entities_list = topics_response.getEntities() formatted_entities = [] if entities_list: for entity in entities_list: if int(topics_response.getTopicRelevance(entity)) >= relevance: #we limit the entities to those with relevance higher than 80 formatted_entities.append(topics_response.getTopicForm(entity) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + ')') entities = ', '.join(formatted_entities) else: entities = '(none)' concepts_list = topics_response.getConcepts() formatted_concepts = [] formatted_fibo_concepts = [] if concepts_list: for concept in concepts_list: if fibo and 'dictionary' in concept.keys() and concept['dictionary'] == 'FIBO_en': formatted_fibo_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')') #we limit the concepts to those with relevance higher than 80 or multiwords, or user defined concepts elif int(topics_response.getTopicRelevance(concept)) >= relevance or (' ' in topics_response.getTopicForm(concept) and int(topics_response.getTopicRelevance(concept)) >= (relevance/2)) or topics_response.isUserDefined(concept): formatted_concepts.append(topics_response.getTopicForm(concept) + ' (' + topics_response.getTypeLastNode(topics_response.getOntoType(concept)) + ')') concepts = ', '.join(formatted_concepts) if formatted_concepts else '(none)' fibo_concepts = ', '.join(formatted_fibo_concepts) if formatted_fibo_concepts else '(none)' else: concepts = "(none)" fibo_concepts = "(none)" else: print("\tOops! Request to topics was not succesful: (" + topics_response.getStatusCode() + ') ' + topics_response.getStatusMsg()) return entities, concepts, fibo_concepts if fibo else entities, concepts
def topics(): try: # We are going to make a request to the Topics Extraction API topics_response = meaningcloud.TopicsResponse( meaningcloud.TopicsRequest(license_key, doc=document, lang='en', topicType='e').sendReq()) # If there are no errors in the request, we print the output if topics_response.isSuccessful(): # print("\nThe request to 'Topics Extraction' finished successfully!\n") entities = topics_response.getEntities() if entities: print("\t Found entities") print("\tEntities detected (" + str(len(entities)) + "):\n") print(entities) for entity in entities: print("\t\t" + topics_response.getTopicForm(entity) + ' --> ' + topics_response.getTypeLastNode( topics_response.getOntoType(entity)) + ' --> ' + topics_response.getOntoType(entity) + ' --> ' + str(topics_response.getNumberOfAppearances(entity)) + "\n") if ("semld_list" in entity.keys()): print(entity["semld_list"][0]) else: print("\tNo entities detected!\n") else: if topics_response.getResponse() is None: print("\nOh no! The request sent did not return a Json\n") else: print("\nOh no! There was the following error: " + topics_response.getStatusMsg() + "\n") return (topics_response, entities) except ValueError: e = sys.exc_info()[0] print("\nException: " + str(e))
def get_topic_extraction(self): logger.debug('TextAnalysis.get_topic_extraction()') if self.topic_extraction is None: logger.debug('topic_extraction not set') logger.debug('setting topic_extraction') topics_response = meaningcloud.TopicsResponse( meaningcloud.TopicsRequest( self.license_key, txt=self.text, lang=self.language, topicType='ec', ).sendReq()) # Save the raw and serialized topic extraction self.topic_extraction_raw = topics_response.getResults() self.topic_extraction = MeaningCloudResponse( self.topic_extraction_raw).topic() logger.debug(self.topic_extraction) return self.topic_extraction
def extractTopic(text): try: results = [] # We are going to make a request to the Topics Extraction API topics_response = meaningcloud.TopicsResponse( meaningcloud.TopicsRequest(license_key, txt=text, lang='en', topicType='e').sendReq()) # If there are no errors in the request, we print the output if topics_response.isSuccessful(): # print("\nThe request to 'Topics Extraction' finished successfully!\n") entities = topics_response.getEntities() if entities: # print("\tEntities detected (" + str(len(entities)) + "):\n") for entity in entities: # print("\t\t" + topics_response.getTopicForm(entity) + ' --> ' + topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + "\n") results.append([ topics_response.getTopicForm(entity), topics_response.getTypeLastNode( topics_response.getOntoType(entity)) ]) return results # else: # print("\tNo entities detected!\n") else: if topics_response.getResponse() is None: print("\nOh no! The request sent did not return a Json\n") else: print("\nOh no! There was the following error: " + topics_response.getStatusMsg() + "\n") except ValueError: e = sys.exc_info()[0] print("\nException: " + str(e))
class TopicsRequesTest(unittest.TestCase): URL = 'https://api.meaningcloud.com/topics-2.0' KEY = 'MY_KEY' TIMEOUT_DEFAULT = 60 RESOURCES_DIR = './resources/' text = 'London is big' request = meaningcloud.TopicsRequest(KEY, txt=text, topicType="a") def testConstruct(self): request = self.request self.assertEqual(self.URL, request.getUrl()) self.assertIsNotNone(request.getParams()) params = request.getParams() self.assertEqual('key' in params.keys(), True) self.assertEqual(params['key'], self.KEY) self.assertIsNotNone(request.getTimeout()) self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout()) extraHeaders = ["Accept: application/json"] request2 = meaningcloud.TopicsRequest(self.KEY, txt=self.text, topicType="a", extraheaders=extraHeaders) self.assertIsNotNone(request2.sendReq()) otherparams = {'key2': 'my_key2'} request3 = meaningcloud.TopicsRequest(self.KEY, txt=self.text, topicType="a", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('key2' in request3.getParams().keys(), True) self.assertEqual(request3.getParams()['key2'], 'my_key2') url = 'https://en.wikipedia.org/wiki/Star_Trek' request4 = meaningcloud.TopicsRequest(self.KEY, url=url, topicType="a", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('url' in request4.getParams().keys(), True) self.assertEqual(request4.getParams()['url'], url) file = self.RESOURCES_DIR + 'file.txt' request5 = meaningcloud.TopicsRequest(self.KEY, doc=file, topicType="a", extraheaders=extraHeaders, otherparams=otherparams) self.assertIsNotNone('doc' in request5.getParams().keys(), False) doc = request5._file['doc'].read().decode('utf-8') request5._file['doc'].close() aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb') aux_conten = aux_doc.read().decode('utf-8') aux_doc.close() self.assertEqual(aux_conten, doc) return request def testSendReq(self): request = self.request requestRq = request.sendReq() self.assertIsNotNone(requestRq)
sentence_vectors.append(vector) """# **FEATURE EXTRACTION**""" """ meaning cloud topic extraction""" #print("Extracting name entities with MeaningCloud...") #!pip install meaningcloud-python license_key = 'b59424e48a94e5061bef29cbd29bdacd' # We are going to make a request to the Topics Extraction API topics_response = meaningcloud.TopicsResponse(meaningcloud.TopicsRequest(license_key, txt=ner_report, lang='en',topicType='e').sendReq()) # If there are no errors in the request, we print the output if topics_response.isSuccessful(): #print("\nThe request to 'Topics Extraction' finished successfully!\n") entities = topics_response.getEntities() important_entities = [] if entities: #print("\tEntities detected (" + str(len(entities)) + "):\n") for entity in entities: ent = topics_response.getTopicForm(entity).lower() #topics_response.getTopicRelevance(entity) important_entities.append(ent) if len(important_entities) == 20: break
def analyzeText(text, language, threshold, tt, ud): global index_count print("Extracting topics for text #%s" % str(index_count)) # this is where we are going to store our results topics = { "person": [], "organization": [], "location": [], "product": [], "id": [], "event": [], "other": [], "quantity": [], } try: # We are going to make a request to the Topics Extraction API request = meaningcloud.TopicsRequest( license_key, txt=text, lang=language, topicType=tt, server=server, otherparams={"ud": ud}, ) setRequestSource(request) response = meaningcloud.TopicsResponse(request.sendReq()) if response.isSuccessful(): if "e" in tt: entity_list = response.getEntities() if entity_list: for entity in entity_list: if int(response.getTopicRelevance(entity)) >= threshold: first_node = response.getTypeFirstNode( response.getOntoType(entity) ).lower() form = str(response.getTopicForm(entity)) insertInList(topics.get("other"), form) if topics.get( first_node ) is None else insertInList(topics.get(first_node), form) if "c" in tt: concept_list = response.getConcepts() if concept_list: for concept in concept_list: if int(response.getTopicRelevance(concept)) >= threshold: first_node = response.getTypeFirstNode( response.getOntoType(concept) ).lower() form = str(response.getTopicForm(concept)) insertInList(topics.get("other"), form) if topics.get( first_node ) is None else insertInList(topics.get(first_node), form) if "m" in tt: money_expression_list = response.getMoneyExpressions() if money_expression_list: [ insertInList( topics.get("quantity"), str(response.getTopicForm(money)) ) for money in money_expression_list ] if "n" in tt: quantity_expression_list = response.getQuantityExpressions() if quantity_expression_list: [ insertInList( topics.get("quantity"), str(response.getTopicForm(quantity)) ) for quantity in quantity_expression_list ] else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) else: print( "Oops! The request to Topics Extraction for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) topics = { "person": "ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg(), "organization": "", "location": "", "product": "", "id": "", "event": "", "other": "", "quantity": "", } except ValueError as e: raise ValueError(str(e)) index_count += 1 return pd.Series(topics)
def analyzeText(text): global index_count print("Analyzing text " + str(index_count)) # this is where we are going to store our results polarity = '' entities = '' concepts = '' iab2 = '' try: # We are going to make a request to the Sentiment Analysis API print("\tGetting sentiment analysis...") sentiment_response = meaningcloud.SentimentResponse( meaningcloud.SentimentRequest(license_key, lang='en', txt=text, txtf='markup').sendReq()) if sentiment_response.isSuccessful(): polarity = sentiment_response.getGlobalScoreTag() else: print('Request to sentiment was not succesful: ' + sentiment_response.getStatusMsg()) # We are going to make a request to the Topics Extraction API print("\tGetting entities and concepts...") topics_req = meaningcloud.TopicsRequest(license_key, txt=text, lang='en', topicType='ec', otherparams={'txtf': 'markup'}) topics_response = meaningcloud.TopicsResponse(topics_req.sendReq()) # If there are no errors in the request, we extract the entities and concepts if topics_response.isSuccessful(): entities_list = topics_response.getEntities() formatted_entities = [] if entities_list: for entity in entities_list: if int( topics_response.getTopicRelevance(entity) ) >= 100: #we limit the entities to those with relevance higher than 100 formatted_entities.append( topics_response.getTopicForm(entity) + ' (' + topics_response.getTypeLastNode( topics_response.getOntoType(entity)) + ')') entities = ', '.join(formatted_entities) concepts_list = topics_response.getConcepts() formatted_concepts = [] if concepts_list: for concept in concepts_list: if int( topics_response.getTopicRelevance(concept) ) >= 100: #we limit the entities to those with relevance higher than 100 formatted_concepts.append( topics_response.getTopicForm(concept)) concepts = ', '.join(list(dict.fromkeys(formatted_concepts))) else: print('Request to topics was not succesful: ' + topics_response.getStatusMsg()) # We are going to make a request to the Deep Categorization API print("\tGetting IAB 2.0 classification...") deepcat_response = meaningcloud.DeepCategorizationResponse( meaningcloud.DeepCategorizationRequest(license_key, model='IAB_2.0_en', txt=text, otherparams={ 'txtf': 'markup' }).sendReq()) if deepcat_response.isSuccessful(): categories = deepcat_response.getCategories() iab2 = (', '.join( deepcat_response.getCategoryCode(cat) for cat in categories[:1])) if categories else '' else: print('Request to Deep Categorization was not succesful: ' + deepcat_response.getStatusMsg()) except ValueError: e = sys.exc_info()[0] print("\nException: " + str(e)) index_count += 1 return pd.Series([polarity, entities, concepts, iab2])