def grouped_category(texts): """ get category and corresponding statistic for multiplt texts INPUT: text -> text array to be analyzed OUTPUT: statistic -> dict that has the frequence of each category, like{"soccer": 12, "football": 24} category -> array that each text belongs to in sequence """ alchemyapi = AlchemyAPI() decoder = json.JSONDecoder() statistic = {"null": 0} category = []; for text in texts: response = alchemyapi.category('text',text) if response['status'] == 'OK': analysizedData = decoder.decode(json.dumps(response)) category.append(analysizedData.get("category")) if (statistic.get(category[-1]) != None): statistic[category[-1]] = statistic.get(category[-1]) + 1 else: statistic[category[-1]] = 1 else: statistic["null"] = statistic.get("null") + 1 category.append("null") return statistic, category
def extractCategoryFromUrl(self, url): """method for extracting the category associated with the given url""" # creating AlchemyAPI object alchemyapi = AlchemyAPI() # requesting json response from AlchemyAPI server response = alchemyapi.category("text", url) if response["status"] == "OK": # get the category from the response self.categoryFromUrl.setCategory(response["category"]) # get the confidence score from the response self.categoryFromUrl.setScore(response["score"]) else: print("Error in text categorization call: ", response["statusInfo"])
def category(text): """ find the category that input text belongs to INPUT: test -> input text that need to be analyze OUTPUT: category string that input belongs to. "null" means alchemyapi fails somehow """ alchemyapi = AlchemyAPI() decoder = json.JSONDecoder() response = alchemyapi.category('text',text) if response['status'] == 'OK': analysizedData = decoder.decode(json.dumps(response)) category = analysizedData.get("category") return category else: return "null"
response = alchemyapi.relations('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.relations('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.relations('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.relations('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Relation tests complete!') print('') #Category print('Checking category . . . ') response = alchemyapi.category('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.category('html', test_html, {'url':'test'}); assert(response['status'] == 'OK') response = alchemyapi.category('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.category('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Category tests complete!') print('') #Feeds print('Checking feeds . . . ') response = alchemyapi.feeds('text', test_text);
else: print('Error in relation extaction call: ', response['statusInfo']) print('') print('') print('') print('############################################') print('# Text Categorization Example #') print('############################################') print('') print('') print('Processing text: ', demo_text) print('') response = alchemyapi.category('text', demo_text) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Category ##') print('text: ', response['category']) print('score: ', response['score']) print('') else: print('Error in text categorization call: ', response['statusInfo']) print('') print('')
def index(request): alchemyapi = AlchemyAPI() template = loader.get_template('main/index.html') api = twitter.Api(consumer_key='CCC',consumer_secret='SSS', access_token_key='AAA', access_token_secret='TTT') infile = open(filename,'r') statuses = pickle.load(infile) statuses2 = [] if GETNEWTWEETS: statuses2 = api.GetHomeTimeline(NUM_STATUSES) countclash = 0 countpass = 0 passt = 0 if not ONTHEFLY: for item in statuses2: passt = 1 for fitem in statuses: if fitem.id == item.id: countclash = countclash + 1 passt = 0 if passt > 0: temp = clean(item.text) response = alchemyapi.category('text',temp) category = "unknown" if response['status'] == 'OK': category = response['category'] cat_score = response['score'] else: print('Error in entity extraction call: ', response['statusInfo']) item.lang = category statuses.append(item) countpass = countpass + 1 #print("%i clash %i pass" % (countclash,countpass)) ai = -1 bi = -1 size = len(statuses) for a in statuses: ai = ai + 1 cmax = a.retweet_count cmaxindex = ai for bi in range(ai,size): if statuses[bi].retweet_count > cmax: cmaxindex = bi cmax = statuses[bi].retweet_count if cmaxindex != ai: statuses[ai], statuses[cmaxindex] = statuses[cmaxindex], statuses[ai] tweet_list = [] index = 0 for s in statuses: if index < MAXTWEETS: if s.retweet_count > MIN_RETWEETS: tweet_list.append(s) index = index + 1 newsize = len(statuses) if not ONTHEFLY: # del statuses[NUM_STATUSES:newsize] f = open(filename,'w') pickle.dump(statuses, f) context = RequestContext(request, {'tweet_list' : tweet_list}) return HttpResponse(template.render(context))
def findCategory(text): alchemyapi = AlchemyAPI() response = alchemyapi.category('text', text) return response
#Relations print('Checking relations . . . ') response = alchemyapi.relations('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.relations('html', test_html) assert (response['status'] == 'OK') response = alchemyapi.relations('url', test_url) assert (response['status'] == 'OK') response = alchemyapi.relations('random', test_url) assert (response['status'] == 'ERROR') #invalid flavor print('Relation tests complete!') print('') #Category print('Checking category . . . ') response = alchemyapi.category('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.category('html', test_html, {'url': 'test'}) assert (response['status'] == 'OK') response = alchemyapi.category('url', test_url) assert (response['status'] == 'OK') response = alchemyapi.category('random', test_url) assert (response['status'] == 'ERROR') #invalid flavor print('Category tests complete!') print('') #Feeds print('Checking feeds . . . ') response = alchemyapi.feeds('text', test_text) assert (response['status'] == 'ERROR') #only works for html and url content response = alchemyapi.feeds('html', test_html, {'url': 'test'})
print("Error in relation extaction call: ", response["statusInfo"]) print("") print("") print("") print("############################################") print("# Text Categorization Example #") print("############################################") print("") print("") print("Processing text: ", demo_text) print("") response = alchemyapi.category("text", demo_text) if response["status"] == "OK": print("## Response Object ##") print(json.dumps(response, indent=4)) print("") print("## Category ##") print("text: ", response["category"]) print("score: ", response["score"]) print("") else: print("Error in text categorization call: ", response["statusInfo"]) print("")
myText = "" with open("cg_entities_and_descr.csv", "r") as file: data = file.read() myText = data.split() print type(myText) print myText for i,x in enumerate(lis): #print the list items print # #print "line{0} = {1}".format(i,x) # #print "{1}".format(i,x) # names = x.split(',', 2)[0] # entities.append(x) #categorize entities in Civic Graph based on their descriptions (data = 'text') for i in range(0, len(lis)): categ_result = alchemyapi.category('text', entities[i]); pp.pprint(categ_result) i += 1 categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven"); pp.pprint(categ_result['category'])
for entity in response['entities']: # add each entity to our master list if entities.has_key(entity['text']): entities[entity['text']] += int(entity['count']) else: entities[entity['text']] = int(entity['count']) print "[*] Retrieved %d entities from %s" % (len(entities), pdf_file) else: print "[!] Error receiving Alchemy response: %s" % response[ 'statusInfo'] # get the category response = alchemyapi.category('text', full_text) if response['status'] == 'OK': if categories.has_key(response['category']): categories[response['category']] += 1 else: categories[response['category']] = 1 print "[*] Categorized %s as %s" % (pdf_file, response['category']) # grab the concepts response = alchemyapi.concepts('text', full_text) if response['status'] == 'OK':
class AlchemyPost: def __init__(self, post_tumblr, post_id, consumer_key, consumer_secret, oauth_token, oauth_secret): self.post_tumblr = post_tumblr self.post_id = post_id self._init_tumblr(consumer_key, consumer_secret, oauth_token, oauth_secret) self._init_alchemy() def _init_tumblr(self, consumer_key, consumer_secret, oauth_token, oauth_secret): self._client = pytumblr.TumblrRestClient(consumer_key, consumer_secret, oauth_token, oauth_secret) def _init_alchemy(self): self.alchemyapi = AlchemyAPI() self.content = {} def analyze_post(self): self.post = self._get_content_post() self._alchemy_entities() self._alchemy_keywords() self._alchemy_concepts() self._alchemy_sentiment() self._alchemy_relations() self._alchemy_category() self._alchemy_feeds() self._alchemy_taxonomy() def print_content(self): print(json.dumps(self.content, indent=4)) def _get_content_post(self): print "*", infos = self._get_infos_post() self.title = '' self.tags = [] if 'tags' in infos: self.tags = infos['tags'] if infos['type'] == 'text': return self._get_content_text(infos) if infos['type'] == 'quote': return self._get_content_quote(infos) return '' def _get_infos_post(self): infos = self._client.posts(self.post_tumblr, id=self.post_id) if 'posts' in infos and len(infos['posts'])>0: return infos['posts'][0] return {} def _get_content_text(self, infos): content = "<h1>" + str(infos['title']) + "</h1>" content += " <br>" + str(infos['body']) content += " <br>" + " ".join(infos['tags']) return content def _get_content_quote(self, infos): content = str(infos['text']) content += " <br>" + str(infos['source']) content += " <br>" + " ".join(infos['tags']) return content def _alchemy_entities(self): print ".", response = self.alchemyapi.entities('html', self.post) if response['status'] != 'OK': return False self.content['entities'] = response['entities'] return True def _alchemy_keywords(self): print ".", response = self.alchemyapi.keywords('html', self.post) if response['status'] != 'OK': return False self.content['keywords'] = response['keywords'] return True def _alchemy_concepts(self): print ".", response = self.alchemyapi.concepts('html', self.post) if response['status'] != 'OK': return False self.content['concepts'] = response['concepts'] return True def _alchemy_sentiment(self): print ".", response = self.alchemyapi.sentiment('html', self.post) if response['status'] != 'OK': return False self.content['sentiment'] = response['docSentiment'] return True def _alchemy_relations(self): print ".", response = self.alchemyapi.relations('html', self.post) if response['status'] != 'OK': return False self.content['relations'] = response['relations'] return True def _alchemy_category(self): print ".", response = self.alchemyapi.category('html', self.post) if response['status'] != 'OK': return False self.content['category'] = response['category'] self.content['score'] = response['score'] return True def _alchemy_feeds(self): print ".", response = self.alchemyapi.feeds('html', self.post) if response['status'] != 'OK': return False self.content['feeds'] = response['feeds'] return True def _alchemy_taxonomy(self): print ".", response = self.alchemyapi.taxonomy('html', self.post) if response['status'] != 'OK': return False self.content['taxonomy'] = response['taxonomy'] return True
def user_analysis_sentiments(request): if request.method == 'GET': print request.GET.get('user', '') user = request.GET.get('user', '') messages = [] message = Message.objects.filter(user_send=user.decode("utf8")) for m in message: messages.append(m.message_text) text = ",".join(messages) alchemyapi = AlchemyAPI() #keywords response = alchemyapi.keywords('text', text, {'sentiment': 1}) if response['status'] == 'OK': keywords = [] for keyword in response['keywords']: keyword_text = keyword['text'].encode('utf-8') keyword_relevance = keyword['relevance'] keyword_sentiment = keyword['sentiment']['type'] key_word = {'keyword_text': keyword_text, 'keyword_relevance': keyword_relevance, 'keyword_sentiment': keyword_sentiment} keywords.append(key_word) else: print('Error in keyword extaction call: ', response['statusInfo']) response = alchemyapi.concepts('text', text) if response['status'] == 'OK': concepts = [] for concept in response['concepts']: concept_text = concept['text'] concept_relevance = concept['relevance'] concept_entity = {'concept_text': concept_text, 'concept_relevance': concept_relevance} concepts.append(concept_entity) else: print('Error in concept tagging call: ', response['statusInfo']) response = alchemyapi.language('text', text) if response['status'] == 'OK': print(response['wikipedia']) language = response['language'] iso_639_1 = response['iso-639-1'] native_speakers = response['native-speakers'] wikipedia = response['wikipedia'] language_id = {'language': language, 'iso_639_1': iso_639_1, 'native_speakers': native_speakers, 'wikipedia': wikipedia} else: print('Error in language detection call: ', response['statusInfo']) response = alchemyapi.relations('text', text) if response['status'] == 'OK': relations = [] for relation in response['relations']: if 'subject' in relation: relation_subject_text = relation['subject']['text'].encode('utf-8') if 'action' in relation: relation_action_text = relation['action']['text'].encode('utf-8') if 'object' in relation: relation_object_text = relation['object']['text'].encode('utf-8') relation_entity = {'relation_subject_text': relation_subject_text, 'relation_action_text': relation_action_text, 'relation_object_text': relation_object_text} relations.append(relation_entity) else: print('Error in relation extaction call: ', response['statusInfo']) response = alchemyapi.category('text', text) if response['status'] == 'OK': print('text: ', response['category']) category = response['category'] print('score: ', response['score']) score = response['score'] categories = {'category': category, 'score': score} else: print('Error in text categorization call: ', response['statusInfo']) response = alchemyapi.taxonomy('text', text) if response['status'] == 'OK': taxonomies = [] for category in response['taxonomy']: taxonomy_label = category['label'] taxonomy_score = category['score'] taxonomy = {'taxonomy_label': taxonomy_label, 'taxonomy_score': taxonomy_score} taxonomies.append(taxonomy) else: print('Error in taxonomy call: ', response['statusInfo']) response = alchemyapi.combined('text', text) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') user = {'user_name': 'LOL', 'keywords': keywords, 'concepts': concepts, 'language_id': language_id, 'relations': relations, 'categories': categories, 'taxonomies': taxonomies} return HttpResponse(json.dumps(user), content_type="application/json")
alchemyapi = AlchemyAPI() # API Key: ff8f993db5ee0b907a3e41f19bbd57b8b4cbc24a pp = pprint.PrettyPrinter(indent=4) #Read in data myText = "" with open("cg_entities_and_descr.csv", "r") as file: data = file.read() myText = data.split() print type(myText) print myText #categorize entities in Civic Graph based on their descriptions (data = 'text') for i in range(0, len(lis)): categ_result = alchemyapi.category('text', entities[i]); pp.pprint(categ_result) i += 1 #Test API call # categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven"); # pp.pprint(categ_result['category'])
for entity in response['entities']: # add each entity to our master list if entities.has_key(entity['text']): entities[entity['text']] += int(entity['count']) else: entities[entity['text']] = int(entity['count']) print "[*] Retrieved %d entities from %s" % (len(entities),pdf_file) else: print "[!] Error receiving Alchemy response: %s" % response['statusInfo'] # get the category response = alchemyapi.category('text',full_text) if response['status'] == 'OK': if categories.has_key(response['category']): categories[response['category']] += 1 else: categories[response['category']] = 1 print "[*] Categorized %s as %s" % (pdf_file,response['category']) # grab the concepts response = alchemyapi.concepts('text',full_text) if response['status'] == 'OK':
print('') print('') print('') print('############################################') print('# Text Categorization Example #') print('############################################') print('') print('') print('Processing text: ', demo_text) print('') response = alchemyapi.category('text',demo_text) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Category ##') print('text: ', response['category']) print('score: ', response['score']) print('') else: print('Error in text categorization call: ', response['statusInfo'])
from alchemyapi import AlchemyAPI import json import pprint #Reads in file containing names of entities in Civic Graph and their associated descriptions #Attempts to categorize entities in Civic Graph based on descriptions using AlchemyAPI's "category" function alchemyapi = AlchemyAPI() # API Key: ff8f993db5ee0b907a3e41f19bbd57b8b4cbc24a pp = pprint.PrettyPrinter(indent=4) #Read in data myText = "" with open("cg_entities_and_descr.csv", "r") as file: data = file.read() myText = data.split() print type(myText) print myText #categorize entities in Civic Graph based on their descriptions (data = 'text') for i in range(0, len(lis)): categ_result = alchemyapi.category('text', entities[i]) pp.pprint(categ_result) i += 1 #Test API call # categ_result = alchemyapi.category('text', "http://techpresident.com/news/25496/first-post-data-driven"); # pp.pprint(categ_result['category'])