def Entity_Extraction(self): print " ----------------------------" print "# STARTING ENTITY EXTRACTION:" print " ----------------------------" count = 0 os.system("python templates/alchemyapi.py 32449e7b4f6b65f9ef5cfd84b7128a46440a9402") startTime = datetime.now() # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() for paragraph in self.targeted_paragraphs: response = alchemyapi.entities('text', paragraph, {'sentiment': 1}) if response['status'] == 'OK': print "DOCUMENT-LEVEL RESULTS: " print "ARTICLE TITLE: " , self.article_title[len(self.article_title) - len(self.article_title) + count] print 'ARTICLE URL: ' , self.article_url[len(self.article_url) - len(self.article_url) + count] print "DATA FRAME: " count = count + 1 for entity in response['entities']: entity_text = entity['text'] entity_type = entity['type'] entity_relevance = entity['relevance'] entity_sentiment_type = entity['sentiment']['type'] if 'score' in entity['sentiment']: entity_sentiment_score = entity['sentiment']['score'] df_entity_extraction = pd.DataFrame(data = {'text': [entity_text], 'type': [entity_type], 'relevance': [entity_relevance], 'sentiment': [entity_sentiment_type], 'sentiment_score': [entity_sentiment_score]}) print "***** ENTITY EXTRACTION RESULTS: *****" print df_entity_extraction.T df_transpose = df_entity_extraction.T entity_json_results = df_transpose.to_dict() ####### self.result_entity_extraction.append(entity_json_results) else: pass else: print 'Error in entity extraction call: ', response['statusInfo'] print "----------- Entity Extraction is completed. ---------------" print "Time Elapsed: " , datetime.now() - startTime execution_time = datetime.now() - startTime self.list_time_elapsed.append(execution_time)
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [ { "missing": { "field": "entities" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "match": { "_all": "merkel" }} #{ "range": { "published": { "gte" : "now-1d" } } } ] }, "size": 500 } res = es.search(index="rss", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: #logger.info('Extracting entities for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source']['description'] try: response = alchemyapi.entities("text", analyzed_text) entities = [ x['text'] for x in response["entities"] ] #logger.info("Entities: " + entities) es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": {"entities": entities}}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def convert_to_clean_titles(infile, outfile): alchemyapi = AlchemyAPI() f = open(infile, "r") f2 = codecs.open(outfile, "w+", "utf-8") f3 = codecs.open("Entities.txt", "w+", "utf-8") count = 1 for line in f: line = line.decode("utf-8") response = alchemyapi.entities('text', line, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: if "type" in entity.keys: if entity['type'] in [ 'Country', 'Holiday', 'Movie', 'MusicGroup', 'Organization', 'Person', 'PrintMedia', 'Region', 'StateOrCountry', 'TelevisionShow', 'TelevisionStation', 'Money', 'Company', 'GeographicFeature' ]: line = line.replace(entity['text'], entity['text'].title()) print >> f3, entity['text'], entity['type'], entity[ 'sentiment'] print >> f2, line, else: print >> f2, line, print count, line count += 1
def getCategory(demo_text): alchemyapi = AlchemyAPI() demo_text = unicode(demo_text); demo_text = demo_text.encode("ascii",'ignore'); response = alchemyapi.entities('text', demo_text) if response['status'] == 'OK': #print(json.dumps(response, indent=4)) if (not len(response['entities'])): category = [] category.append("Undefined") return category entity = response['entities'][0] #print('text: ', entity['text'].encode('utf-8')) #print('type: ', entity['type']) #print('relevance: ', entity['relevance']) if entity.has_key('disambiguated') and entity['disambiguated'].has_key('subType'): category = entity['disambiguated']['subType'] else: category = [] category.append(entity['type']) else: category = [] category.append("Undefined") return category
def extract_entities(text): """ find the category that input text belongs to INPUT: test -> input text that need to be analyze OUTPUT: category string that input belongs to. "null" means alchemyapi fails somehow """ alchemyapi = AlchemyAPI() decoder = json.JSONDecoder() entities = [] type = "" response = alchemyapi.entities('text',text, {'sentiment': 0}) if response['status'] == 'OK': analysizedData = decoder.decode(json.dumps(response)) results = analysizedData.get("entities") for result in results: if result.get("type") in types.combinedTypes: type = get_category(text); if type == 'arts_entertainment' or type == 'gaming' or type == 'recreation': type = "entertainment"; elif type == 'sports': type = "sports"; elif type == 'computers_internet' or type == 'health' or type == 'science_technology': type = "technology"; elif type == 'culture_politics': type = "politics"; else: continue; else: type = result.get("type"); if type in types.entertainmentTypes: type = "entertainment"; elif type in types.sportsTypes: type = "sports"; elif type in types.politicsTypes: type = "politics"; elif type in types.technologyTypes: type = "technology"; else: continue; entity = { "text": result["text"], "relevance": result["relevance"], "type": type, "count": int(result["count"]) } entities.append(entity); return entities else: print(response['status']); return []
def performEE(url): alchemyapi = AlchemyAPI() response = alchemyapi.entities('url', url) relatedEntities = {} if response['status'] == 'OK': entities = response['entities'] for entity in entities: if (float(entity['relevance'])>0.1): relatedEntities[entity["type"]]=entity["text"] return relatedEntities
def extract_entities(text, lang): entities = {} alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', text, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: key = entity['text'].encode('utf-8') value = entity['type'] entities[key] = convert_label(value) return entities
def getAlcData(arrOfObj): alchemyapi = AlchemyAPI() #for x in range(0,len(arrOfObj)): for x in range(0, 10): asc = unicodedata.normalize('NFKD', arrOfObj[x].text).encode('ascii','ignore') print x print asc arrOfObj[x].responseEntities = alchemyapi.entities('text',asc, { 'sentiment':1 }) arrOfObj[x].responseKeywords = alchemyapi.keywords('text',asc, { 'sentiment':1 })
def performEE(url): alchemyapi = AlchemyAPI() response = alchemyapi.entities('url', url, {'disambiguate': 0}) relatedEntities = {} if response['status'] == 'OK': entities = response['entities'] print(entities) for entity in entities: print(entity["relevance"]+" "+entity["text"]) if (float(entity['relevance'])>0.1): relatedEntities[entity["type"]]=entity["text"] return relatedEntities
def ExtractEntity(text): # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', text, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: print('text: ', entity['text'].encode('utf-8')) print('type: ', entity['type']) print('relevance: ', entity['relevance']) else: print('Error in entity extraction call: ', response['statusInfo'])
def main(): alchemyapi = AlchemyAPI() text = "I'm wondering if it will find a city name, like Abu Dhabi, but I hope so!" nytimes = "http://www.nytimes.com/" nytimes2 = 'http://www.nytimes.com/2014/02/14/world/asia/on-indian-tea-plantations-low-wages-and-crumbling-homes.html?ref=world' good_types = ['StateOrCounty', 'City', 'Country'] response = alchemyapi.entities('url', nytimes2) #print response #print response['entities'] for e in response['entities']: if e['type'] in good_types: print e['text'], e['type'] find_geocode(e['text'])
def extractNamedEntitiesFromText(self,txt): """method for extracting named entities from given text""" #creating AlchemyAPI object alchemyapi = AlchemyAPI() #requesting json response from AlchemyAPI server response = alchemyapi.entities('text',txt, { 'sentiment':1 }) if response['status'] == 'OK': for entity in response['entities']: #entity object for storing the properties of an entity entityObj = AlchemyStructure.Entity() #sentiment object for storing the sentiment properties related to an entity sentimentObj = AlchemyStructure.Sentiment() #extracting the name of the entity entityObj.setName(entity['text']) #extracting the type of the entity example Organization, Person, FieldTerminology ect entityObj.setType(entity['type']) #extracting the relevance of the entity for the particular type entityObj.setRelevance(entity['relevance']) #extracting the type of the sentiment associated with the entity -> positive, negative or neutral sentimentObj.setType(entity['sentiment']['type']) #extracting the score of the sentiment associated with the entity if entity['sentiment']['type'] == "neutral": sentimentObj.setScore("0") else: sentimentObj.setScore(entity["sentiment"]["score"]) #extracting the frequency of occurrence of the entity in the given text entityObj.setFreq(entity['count']) #setting the sentiment attached with the entity entityObj.setSentiment(sentimentObj) #insert the entity into the list of retrieved entities self.entitiesFromText.append(entityObj) else: print('Error in entity extraction call: ', response['statusInfo'])
def pos_with_entity_replaced_common_words(infile, outfile): alchemyapi = AlchemyAPI() common_word_pos = open("common_word_pos.txt", "r") title_data = open(infile, "r+") f2 = codecs.open(outfile, "w+", "utf-8") for line1, line2 in title_data, common_word_pos: response = alchemyapi.entities('text', line1, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: line2.replace(entity['text'], entity['type']) print >> f2, line2,
def extractNamedEntitiesFromUrl(self, url): """method for extracting named entities from given url""" # creating AlchemyAPI object alchemyapi = AlchemyAPI() # requesting json response from AlchemyAPI server response = alchemyapi.entities("url", url, {"sentiment": 1}) if response["status"] == "OK": for entity in response["entities"]: # entity object for storing the properties of an entity entityObj = AlchemyStructure.Entity() # sentiment object for storing the sentiment properties related to an entity sentimentObj = AlchemyStructure.Sentiment() # extracting the name of the entity entityObj.setName(entity["text"]) # extracting the type of the entity example Organization, Person, FieldTerminology ect entityObj.setType(entity["type"]) # extracting the relevance of the entity for the particular type entityObj.setRelevance(entity["relevance"]) # extracting the score of the sentiment associated with the entity if entity["sentiment"]["type"] == "neutral": sentimentObj.setScore("0") else: sentimentObj.setScore(entity["sentiment"]["score"]) # extracting the type of the sentiment associated with the entity -> positive, negative or neutral sentimentObj.setType(entity["sentiment"]["type"]) # extracting the frequency of occurrence of the entity in the given text of the url entityObj.setFreq(entity["count"]) # setting the sentiment attached with the entity entityObj.setSentiment(sentimentObj) # insert the entity into the list of retrieved entities self.entitiesFromUrl.append(entityObj) else: print("Error in entity extraction call: ", response["statusInfo"])
def generate_which(inputsentence,sent): which=[] typeoftext={} alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', inputsentence) if response['status'] == 'OK': for entity in response['entities']: typeoftext[entity['text'].encode('utf-8')]=entity['type'] else: print >> log ,('Error in entity extraction call: ', response['statusInfo']) for key in typeoftext: which_question=[] which_answer=[] for (i,row) in enumerate(sent): if row[11]=="ROOT": if row[3]=="be": which_question.insert(0,row[1]) if sent[i+1][5]=="VBG": which_question.append(sent[i+1][1]) i+=1 else: if row[5]=="VBZ": which_question.insert(0,"does") elif row[5]=="VBD": which_question.insert(0,"did") elif row[5]=="VBG": if which_question: top=which_question.pop() which_question.insert(0,top) which_question.append(row[1]) flag=True continue else: which_question.insert(0,"do") which_question.append(row[2]) elif row[1] == key: which_question.insert(0,typeoftext[row[1]]) which_question.insert(0,"what") which_answer=[row[1]] which.append({'Q':which_question,'A':which_answer}) break else: which_question.append(row[1]) return which
class Alchemy(object): 'Chama API para leitura de feeds Atom RSS' def __init__(self): #Chamador do AlchemyAPI self.alchemy_api = AlchemyAPI() def processa_html(self, link): #Retorna o texto limpo a partir de uma URL return self.alchemy_api.text('url', link)['text'] def obtem_titulo(self, link): #Retorna o texto limpo a partir de uma URL return self.alchemy_api.title('url', link)['title'] def obtem_entidades(self, texto): #Retorna as entidaades encontradas no texto return self.alchemy_api.entities('text', texto, {'sentiment': 1})
def entity_topic_extraction(self, myText): alchemyApi = AlchemyAPI() # put all entities in a list entity_list = [] response = alchemyApi.entities('text', myText, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: #entity_list.append((entity['text'].encode('utf-8'),entity['type'])) entity_list.append((entity['text'].encode('utf-8'), entity['type'].encode('utf-8'))) # put all taxonomy in a list response = alchemyApi.taxonomy('text', myText) # put all taxonomy in a list taxonomy_list = [] if response['status'] == 'OK': for category in response['taxonomy']: taxonomy_list.append(category['label'].encode('utf-8')) return entity_list, taxonomy_list
txt_name = [] for(dirpath, dirnames,filenames) in walk(out_txt_path): txt_name.extend(filenames) break json_data = {} entity_list = [] keywords_list = [] concept_list = [] for f in txt_name: if f[-3:] == "txt": full_text_path = out_txt_path + f with open(full_text_path, 'r') as current_txt_file: txt_data = current_txt_file.read().replace('\n','') response_entities = alchemyapi.entities('text', txt_data) response_keywords = alchemyapi.keywords('text', txt_data) response_concepts = alchemyapi.concepts('text', txt_data) if response_entities['status'] == 'OK' and response_keywords['status'] == 'OK': print "status OK" for entity in response_entities["entities"]: dict_temp = {'entity': entity['text'], 'type': entity['type'], 'relevance': entity['relevance']} entity_list.append(dict_temp) for keyword in response_keywords["keywords"]: dict_temp = {'keyword': keyword['text'], 'relevance': keyword['relevance']} keywords_list.append(dict_temp) for concept in response_concepts['concepts']: dict_temp = {'concept': concept['text'],
#!/usr/bin/python # -*- coding: utf-8 -*- # cef5dcb639f382ce8db12ddd1a38a44311bf4d96 from alchemyapi import AlchemyAPI import json import os from os.path import join, getsize alchemyapi = AlchemyAPI() for root, dirs, files in os.walk('data/GOT5'): for name in files: print name with open(join(root, name),'r') as infile: text = infile.read() response = alchemyapi.entities('text', text, {'sentiment': 0}) if response['status'] == 'OK': with open(join(root, name)[:-4]+'-entities.json','w') as outfile: json.dump(response,outfile) else: print 'problem with ',name
print "[*] Parsing %s" % pdf_file pdf_obj = pyPdf.PdfFileReader(open(pdf_file,"rb")) full_text = "" # extract all of the text from each page for page in pdf_obj.pages: full_text += page.extractText() # let the Alchemy API extract entities print "[*] Sending %d bytes to the Alchemy API" % len(full_text) response = alchemyapi.entities('text', full_text, {'sentiment': 0}) if response['status'] == 'OK': # loop through the list of entities for entity in response['entities']: # add each entity to our master list if entities.has_key(entity['text']): entities[entity['text']] += int(entity['count']) else: entities[entity['text']] = int(entity['count']) print "[*] Retrieved %d entities from %s" % (len(entities),pdf_file)
class WatsonMagic: SOCIAL_TONES = ['Openness', 'Conscientiousness', 'Extraversion', 'Agreeableness', 'Emotional Range'] ENTITY_MAPPING = {'City':'cities', 'Person':'people', 'JobTitle':'jobs', 'Organization':'organizations', 'Company':'companies', 'Sport':'sports', 'PrintMedia':'media', 'Country':'countries', 'FieldTerminology':'terms', 'StateOrCounty':'places', 'Holiday':'holidays'} def __init__(self): self.tone_analyzer = ToneAnalyzerV3Beta( username='******', password='******', version='2016-02-11') self.alchemyapi = AlchemyAPI() self.entities = {} def extract_type_entities_from_alchemy(self, text): types = {} response = self.alchemyapi.entities('text', text, {'sentiment': 0}) if response['status'] == 'OK': for entity in response['entities']: if entity['type'] in types: types[entity['type']].append((entity['text'],entity)) else: types[entity['type']] = [(entity['text'],entity)] print("[*] Retrieved {} entities from {}".format(len(self.entities), text)) else: print("[!] Error receiving Alchemy response: %s" % response['statusInfo']) time.sleep(1) # now accumulate our most common terms and print them out sorted_type_keys = sorted(types, key=lambda x: len(types[x]),reverse=True) print(sorted_type_keys) sorted_types = [] for k in sorted_type_keys: sorted_types.append(types[k]) return sorted_types # types_counter = Counter(types) # top_types = types_counter.most_common() # print(top_types) #return top_types[0:5] def extract_most_popular_entitiesfrom_alchemy(self, text): response = self.alchemyapi.entities('text', text, {'sentiment': 0}) if response['status'] == 'OK': # loop through the list of entities for entity in response['entities']: # add each entity to our master list if entity['text'] in self.entities: self.entities[entity['text']] += int(entity['count']) else: self.entities[entity['text']] = int(entity['count']) print("[*] Retrieved {} entities from {}".format(len(self.entities), text)) else: print("[!] Error receiving Alchemy response: %s" % response['statusInfo']) time.sleep(1) # now accumulate our most common terms and print them out entity_counter = Counter(self.entities) top_entities = entity_counter.most_common() # let's take the top 10 entities UBL mentions for top_entity in top_entities[0:10]: # most_common returns a tuple (entity,count) print("%s => %d" % (top_entity[0], top_entity[1])) def get_tone_category_elements(self,category,text): t = self.tone_analyzer.tone(text=text, tones=category)['document_tone']['tone_categories'][0]['tones'] return t
demo_url = 'http://www.npr.org/2013/11/26/247336038/dont-stuff-the-turkey-and-other-tips-from-americas-test-kitchen' demo_html = '<html><head><title>Python Demo | AlchemyAPI</title></head><body><h1>Did you know that AlchemyAPI works on HTML?</h1><p>Well, you do now.</p></body></html>' image_url = 'http://demo1.alchemyapi.com/images/vision/football.jpg' reddit_url = 'http://www.reddit.com/r/worldnews' #Create the AlchemyAPI Object alchemyapi = AlchemyAPI() locations = [] relevance = [] master_locations = [] response = alchemyapi.entities('url', reddit_url, {'sourceText':'xpath', 'xpath':'//*[contains(@class,"title may-blank")]' }) if response['status'] == 'OK': for entity in response['entities']: if entity['type'] == 'Country' or entity['type'] == 'Region' or entity['type'] == 'City' or entity['type'] == 'StateOrCountry' or entity['type'] == 'Continent': currentRelevance = float(entity['relevance']) if entity.get('disambiguated'): locations.append(entity['disambiguated']['name']) relevance.append(currentRelevance) else:
def main(): tmpdir = "/tmp/pagekicker" #personal api key saved as api_key.txt parser = argparse.ArgumentParser() parser.add_argument('path', help="target file or directory for NER") parser.add_argument('output', help="target file for output") parser.add_argument('uuid', help="uuid") args = parser.parse_args() in_file = args.path out_file = args.output uuid = args.uuid folder = os.path.join(tmpdir, uuid) print(folder) cwd = os.getcwd() apikey_location = os.path.join(cwd, "api_key.txt") with open(in_file) as f: text = f.read() alchemyapi = AlchemyAPI() # alchemyapi = alchemyapi(api_key='b887e176b6a650093c3d4ca635cd1b470be6584e') # result = alchemyapi.TextGetRankedNamedEntities(text,json) result = alchemyapi.entities('text', text, {'sentiment': 1}) root = ET.fromstring(result) place_list = ['City', 'Continent', 'Country', 'Facility', 'GeographicFeature',\ 'Region', 'StateOrCounty'] People = {} Places = {} Other = {} for entity in root.getiterator('entity'): if entity[0].text == 'Person': People[entity[3].text] = [entity[1].text, entity[2].text] elif entity[0].text in place_list: Places[entity[3].text] = [entity[1].text, entity[2].text] else: Other[entity[3].text] = [entity[1].text, entity[2].text] #print lists ordered by relevance Places_s = sorted(Places, key=Places.get, reverse=True) People_s = sorted(People, key=People.get, reverse=True) Other_s = sorted(Other, key=Other.get, reverse=True) with codecs.open(out_file, mode='w', encoding='utf-8') as o: listwrite(o, People_s) listwrite(o, Places_s) listwrite(o, Other_s) # out_file = os.path.join(folder, 'People') with codecs.open(out_file, mode='w', encoding='utf-8') as o: listwrite(o, People_s) out_file = os.path.join(folder, 'Places') with codecs.open(out_file, mode='w', encoding='utf-8') as o: listwrite(o, Places_s) out_file = os.path.join(folder, 'Other') with codecs.open(out_file, mode='w', encoding='utf-8') as o: listwrite(o, Other_s)
url = "http://quora-api.herokuapp.com/users/" + sys.argv[1] + "/activity" data = requests.get(url).json() data = data["activity"] payload = {} # count=0 # getDocCount() for activities in data: title = activities["title"] summary = activities["summary"] print title document["title"] = title document["summary"] = summary labels = al.taxonomy("text", title) entities = al.entities("html", summary) keywords = al.keywords("html", summary) sentiment = al.sentiment("html", summary) # print labels['taxonomy'] # count+=1 payload["entities"] = {} payload["keywords"] = [] payload["sentiment"] = {} docNode = createDocNode(document) try: print "Yo" labels = labels["taxonomy"][0]["label"] print "Yo1" print labels labels = func(labels) print labels
url = "http://quora-api.herokuapp.com/users/" + sys.argv[1] + "/activity" data = requests.get(url).json() data = data['activity'] payload = {} #count=0 #getDocCount() for activities in data: title = activities['title'] summary = activities['summary'] print title document['title'] = title document['summary'] = summary labels = al.taxonomy("text", title) entities = al.entities("html", summary) keywords = al.keywords("html", summary) sentiment = al.sentiment("html", summary) #print labels['taxonomy'] #count+=1 payload['entities'] = {} payload['keywords'] = [] payload['sentiment'] = {} docNode = createDocNode(document) try: print "Yo" labels = labels['taxonomy'][0]['label'] print "Yo1" print labels labels = func(labels) print labels
class AlchemyPost: def __init__(self, post_tumblr, post_id, consumer_key, consumer_secret, oauth_token, oauth_secret): self.post_tumblr = post_tumblr self.post_id = post_id self._init_tumblr(consumer_key, consumer_secret, oauth_token, oauth_secret) self._init_alchemy() def _init_tumblr(self, consumer_key, consumer_secret, oauth_token, oauth_secret): self._client = pytumblr.TumblrRestClient(consumer_key, consumer_secret, oauth_token, oauth_secret) def _init_alchemy(self): self.alchemyapi = AlchemyAPI() self.content = {} def analyze_post(self): self.post = self._get_content_post() self._alchemy_entities() self._alchemy_keywords() self._alchemy_concepts() self._alchemy_sentiment() self._alchemy_relations() self._alchemy_category() self._alchemy_feeds() self._alchemy_taxonomy() def print_content(self): print(json.dumps(self.content, indent=4)) def _get_content_post(self): print "*", infos = self._get_infos_post() self.title = '' self.tags = [] if 'tags' in infos: self.tags = infos['tags'] if infos['type'] == 'text': return self._get_content_text(infos) if infos['type'] == 'quote': return self._get_content_quote(infos) return '' def _get_infos_post(self): infos = self._client.posts(self.post_tumblr, id=self.post_id) if 'posts' in infos and len(infos['posts'])>0: return infos['posts'][0] return {} def _get_content_text(self, infos): content = "<h1>" + str(infos['title']) + "</h1>" content += " <br>" + str(infos['body']) content += " <br>" + " ".join(infos['tags']) return content def _get_content_quote(self, infos): content = str(infos['text']) content += " <br>" + str(infos['source']) content += " <br>" + " ".join(infos['tags']) return content def _alchemy_entities(self): print ".", response = self.alchemyapi.entities('html', self.post) if response['status'] != 'OK': return False self.content['entities'] = response['entities'] return True def _alchemy_keywords(self): print ".", response = self.alchemyapi.keywords('html', self.post) if response['status'] != 'OK': return False self.content['keywords'] = response['keywords'] return True def _alchemy_concepts(self): print ".", response = self.alchemyapi.concepts('html', self.post) if response['status'] != 'OK': return False self.content['concepts'] = response['concepts'] return True def _alchemy_sentiment(self): print ".", response = self.alchemyapi.sentiment('html', self.post) if response['status'] != 'OK': return False self.content['sentiment'] = response['docSentiment'] return True def _alchemy_relations(self): print ".", response = self.alchemyapi.relations('html', self.post) if response['status'] != 'OK': return False self.content['relations'] = response['relations'] return True def _alchemy_category(self): print ".", response = self.alchemyapi.category('html', self.post) if response['status'] != 'OK': return False self.content['category'] = response['category'] self.content['score'] = response['score'] return True def _alchemy_feeds(self): print ".", response = self.alchemyapi.feeds('html', self.post) if response['status'] != 'OK': return False self.content['feeds'] = response['feeds'] return True def _alchemy_taxonomy(self): print ".", response = self.alchemyapi.taxonomy('html', self.post) if response['status'] != 'OK': return False self.content['taxonomy'] = response['taxonomy'] return True
def key_entity(): print "storing keywords" # Create the AlchemyAPI Object alchemyapi = AlchemyAPI() art_keywords = {} art_entities = {} count = 0 graph = Graph("http://*****:*****@localhost:7474/db/data/") article_query = """MATCH (n:article) Return distinct n.url as url""" result = graph.cypher.execute(article_query) keyword_count = 0 entity_count = 0 art_count = 0 for arti in result: if count >= 1000: print "Alchemy limit exceeds" exit() art = arti['url'] article_node = """ MATCH (article:article{url:'"""+art+"""'}) SET article.processed = 'yes' Return article; """ article = graph.cypher.execute(article_node) if art not in art_keywords.keys(): art_keywords[art] = [] response = alchemyapi.keywords('url', art, {'sentiment': 1}) count = count + 1 art_count = art_count + 1 if response['status'] == 'OK': for keyword in response['keywords']: # print('text: ', keyword['text'].encode('utf-8')) key = str(keyword['text'].encode('utf-8')).replace("'","") art_keywords[art].append(key) rel_dict = {} rel_dict['relevance'] = keyword['relevance'] rel_dict['sentiment'] = keyword['sentiment']['type'] if 'score' in keyword['sentiment']: rel_dict['sentiment_score'] = keyword['sentiment']['score'] keyword_node = """ MERGE (keyword:Keywords{text:'"""+key+"""'}) Return keyword; """ at_keywords = graph.cypher.execute(keyword_node) if len(list(graph.match(start_node=article.one,end_node=at_keywords.one, rel_type=("has_keyword",rel_dict)))) == 0: pth = Path(article.one,("has_keyword",rel_dict),at_keywords.one) graph.create(pth) keyword_count = keyword_count + 1 if count >= 1000: print "Alchemy limit exceeds" exit() if art not in art_entities.keys(): art_entities[art] = [] response = alchemyapi.entities('url', art, {'sentiment': 1}) count = count + 1 if response['status'] == 'OK': for entities in response['entities']: # print('text: ', entities['text'].encode('utf-8')) key = str(entities['text'].encode('utf-8')).replace("'","") art_entities[art].append(key) rel_dict = {} rel_dict['type'] = entities['type'] rel_dict['relevance'] = entities['relevance'] rel_dict['sentiment'] = entities['sentiment']['type'] if 'score' in entities['sentiment']: rel_dict['sentiment_score'] = entities['sentiment']['score'] entities_node = """ MERGE (entities:Entities{text:'"""+key+"""'}) Return entities; """ if len(list(graph.match(start_node=article.one,end_node=at_entities.one, rel_type=("has_entity",rel_dict)))) == 0: at_entities = graph.cypher.execute(entities_node) pth = Path(article.one,("has_entity",rel_dict),at_entities.one) graph.create(pth) entity_count = entity_count + 1 return {'articles':str(art_count),'keywords':str(keyword_count),'entities':str(entity_count)}
data = file.read() myText = data print "ANALYSIS FOR " + f + " \n" # extract keywords # kr_0 = alchemyapi.keywords('text', test_text) # print "KEYWORDS: \n" # pp.pprint(kr_0) # extract taxonomy # tr_0 = alchemyapi.taxonomy('text', myText) # pp.pprint(tr_0) # extract entities er_0 = alchemyapi.entities('text', myText) print "ENTITIES: \n" for i in range(0, 50): pp.pprint(er_0['entities']) print(er_0['entities'][i]['text']) print(er_0['entities'][i]['relevance']) print(er_0['entities'][i]['type']) print(er_0['entities'][i]['count']) print '\n' # extract categories # cr_0 = alchemyapi.category('text', myText) # pp.pprint(cr_0) # extract concepts
#Create the AlchemyAPI Object alchemyapi = AlchemyAPI() print('') print('') print('############################################') print('# Entity Extraction Example #') print('############################################') print('') print('') print('Processing text: ', demo_text) print('') response = alchemyapi.entities('text',demo_text, { 'sentiment':1 }) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Entities ##') for entity in response['entities']: print('text: ', entity['text'].encode('utf-8')) print('type: ', entity['type']) print('relevance: ', entity['relevance']) print('sentiment: ', entity['sentiment']['type']) if 'score' in entity['sentiment']: print('sentiment score: ' + entity['sentiment']['score'])
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function from alchemyapi import AlchemyAPI test_text = 'Bob broke my heart, and then made up this silly sentence to test the PHP SDK' test_html = '<html><head><title>The best SDK Test | AlchemyAPI</title></head><body><h1>Hello World!</h1><p>My favorite language is PHP</p></body></html>' test_url = 'http://www.nytimes.com/2013/07/13/us/politics/a-day-of-friction-notable-even-for-a-fractious-congress.html?_r=0' alchemyapi = AlchemyAPI() #Entities print('Checking entities . . . ') response = alchemyapi.entities('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.entities('html', test_html) assert (response['status'] == 'OK') response = alchemyapi.entities('url', test_url) assert (response['status'] == 'OK') response = alchemyapi.entities('random', test_url) assert (response['status'] == 'ERROR') #invalid flavor print('Entity tests complete!') print('') #Keywords print('Checking keywords . . . ') response = alchemyapi.keywords('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.keywords('html', test_html)
# Create the AlchemyAPI Object alchemyapi = AlchemyAPI() print("") print("") print("############################################") print("# Entity Extraction Example #") print("############################################") print("") print("") print("Processing text: ", demo_text) print("") response = alchemyapi.entities("text", demo_text, {"sentiment": 1}) if response["status"] == "OK": print("## Response Object ##") print(json.dumps(response, indent=4)) print("") print("## Entities ##") for entity in response["entities"]: print("text: ", entity["text"].encode("utf-8")) print("type: ", entity["type"]) print("relevance: ", entity["relevance"]) print("sentiment: ", entity["sentiment"]["type"]) if "score" in entity["sentiment"]: print("sentiment score: " + entity["sentiment"]["score"]) print("")
from alchemyapi import AlchemyAPI alchemyapi = AlchemyAPI() entities = {} mytext = open('mytext.txt','r') mytext = mytext.read() response1 = alchemyapi.entities("text",mytext) response = alchemyapi.keywords("text",mytext) response3 = alchemyapi.relations("text",mytext) en1 = response1['entities'][0]['text'] en2 = response1['entities'][1]['text'] key1 = response['keywords'][0]['text'] key2 = response['keywords'][1]['text'] key3 = response['keywords'][2]['text'] key4 = response['keywords'][3]['text'] key5 = response['keywords'][4]['text'] key6 = response['keywords'][5]['text']
counter = 0 while(counter != 4): url = ('https://ajax.googleapis.com/ajax/services/search/web' '?v=1.0&q=Google%20wokrers&start='+str(counter)) request = urllib2.Request(url, None, {}) response = urllib2.urlopen(request) # Process the JSON string. results = simplejson.load(response) i = 0 for i in range(0, len(results.get("responseData").get("results")), 1): print results.get("responseData").get("results")[i].get("title") response = alchemyapi.entities('text', title, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: if(entity['sentiment']['type'] == 'negative'): negative += 1 elif(entity['sentiment']['type'] == 'positive'): positive += 1 else: neutral +=1 else: print('Error in entity extraction call: ', response['statusInfo']) counter += 4 print negative print positive
data = file.read() myText = data print "ANALYSIS FOR " + f + " \n" # extract keywords # kr_0 = alchemyapi.keywords('text', test_text) # print "KEYWORDS: \n" # pp.pprint(kr_0) # extract taxonomy # tr_0 = alchemyapi.taxonomy('text', myText) # pp.pprint(tr_0) # extract entities er_0 = alchemyapi.entities('text', myText) print "ENTITIES: \n" for i in range(0, 50): pp.pprint(er_0['entities']) print(er_0['entities'][i]['text']) print(er_0['entities'][i]['relevance']) print(er_0['entities'][i]['type']) print(er_0['entities'][i]['count']) print '\n' # extract categories # cr_0 = alchemyapi.category('text', myText) # pp.pprint(cr_0) # extract concepts # cr_0 = alchemyapi.concepts('text', myText)
) alchemyapi = AlchemyAPI() print('') print('') print('############################################') print('# Entity Extraction Example #') print('############################################') print('') print('') print('Processing text: ', demo_text) print('') response = alchemyapi.entities('text', demo_text, {'sentiment': 1}) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Entities ##') for entity in response['entities']: print('text: ', entity['text']) print('type: ', entity['type']) print('relevance: ', entity['relevance']) print( 'sentiment: ', entity['sentiment']['type'] + ' (' + entity['sentiment']['score'] + ')') print('')
def process(query, in_queue, out_queue): """ The worker thread to grab a found Tweet off the queue and calculate the sentiment via AlchemyAPI. It calculates the document-level sentiment for the entire tweet, and it will also attempt to calculate entity-level sentiment if the query string is identified as an entity. If the query string is not identified as an entity for the tweet, no entity level sentiment will be returned. INPUT: query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") in_queue -> the shared input queue that is filled with the found tweets. out_queue -> the shared output queue that is filled with the analyzed tweets. OUTPUT: None """ #Create the AlchemyAPI object alchemyapi = AlchemyAPI() while True: #grab a tweet from the queue tweet = in_queue.get() #init tweet['sentiment'] = {} try: #calculate the sentiment for the entity response = alchemyapi.entities('text',tweet['text'], { 'sentiment': 1 }) if response['status'] == 'OK': for entity in response['entities']: #Check if we've found an entity that matches our query if entity['text'] == query: tweet['sentiment']['entity'] = {} tweet['sentiment']['entity']['type'] = entity['sentiment']['type'] #Add the score (it's not returned if type=neutral) if 'score' in entity['sentiment']: tweet['sentiment']['entity']['score'] = entity['sentiment']['score'] else: tweet['sentiment']['entity']['score'] = 0 #Only 1 entity can possibly match the query, so exit the loop break; #calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text',tweet['text']) if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment']['type'] #Add the score (it's not returned if type=neutral) if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response['docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #add the result to the output queue out_queue.put(tweet) except Exception as e: #if there's an error, just move on to the next item in the queue print 'Uh oh, this just happened: ', e pass #signal that the task is complete in_queue.task_done()
uuid = args.uuid folder = os.path.join(tmpdir, uuid) print(folder) cwd = os.getcwd() apikey_location = os.path.join(cwd, "api_key.txt") print(in_file) with open(in_file) as f: filetext = f.read() return filetext filetext = main() alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', filetext, {'sentiment': 1}) if response['status'] == 'OK': print(json.dumps(response, indent=4)) for entity in response['entities']: print('text: ', entity['text'].encode('utf-8')) print('type: ', entity['type']) print('relevance: ', entity['relevance']) print('sentiment: ', entity['sentiment']['type']) if 'score' in entity['sentiment']: print('sentiment score: ' + entity['sentiment']['score']) print('') else: print('Error in entity extraction call: ', response['statusInfo'])
def createGraph(ac_results): alchemyapi = AlchemyAPI() g=nx.Graph() total = 0.0 i = 0.0 for key in sorted(ac_results.iterkeys()): total += len(ac_results[key]) for key in sorted(ac_results.iterkeys()): print "\nAlchemyAPI is now intepreting all of the "+key+" queries...\n" for item in ac_results[key]: i +=1.0 percent_complete = round((i/total)*100.0, 0) # print str(i) +" / "+str(total)+" - "+item print str(int(i)) +" / "+str(int(total))+" "+str(percent_complete) +"% " + item response_relations = alchemyapi.relations('text',item, {'entities':1, 'sentiment':1}) response_entities = alchemyapi.entities('text',item, { 'sentiment':0 }) if response_relations['status'] == 'OK': for relation in response_relations['relations']: # red.publish('chat', "found relation!") if 'subject' in relation: subject = relation['subject']['text'] g.add_node(subject, query=key) if 'entities' in relation['subject']: g.node[subject]['type'] = relation['subject']['entities'][0]['type'] if 'sentimentFromObject' in relation['subject']: # print relation['subject']['sentimentFromObject']['score'] g.node[subject]['sentiment'] = float(relation['subject']['sentimentFromObject']['score']) if 'sentiment' in relation['subject']: # print relation['subject']['sentiment']['score'] g.node[subject]['sentiment'] = float(relation['subject']['sentiment']['score']) if 'object' in relation: object_ = relation['object']['text'] g.add_node(object_, query=key) if 'entities' in relation['object']: g.node[object_]['type'] = relation['object']['entities'][0]['type'] if 'sentimentFromSubject' in relation['object']: # print relation['object']['sentimentFromSubject']['score'] g.node[object_]['sentiment'] = float(relation['object']['sentimentFromSubject']['score'] ) if 'sentiment' in relation['object']: # print relation['object']['sentiment']['score'] g.node[object_]['sentiment'] = float(relation['object']['sentiment']['score']) try: if all(x in ['subject', 'action', 'object'] for x in relation): n1 = relation['subject']['text'] a = relation['action']['text'] n2 = relation['object']['text'] if g.has_edge(n1,n2): g[n1][n2]['weight'] += 1 else: g.add_edge(n1,n2, weight=1, relation=a) except: pass try: for entity in response_entities['entities']: g.add_node(entity['text'], type=entity['type'], query=key) except: continue nx.write_gexf(g, topic+".gexf") else: print "AlchemyAPI is not responding." return g
#raw_input() tokens = WhitespaceTokenizer().tokenize(at_less.lower()) stopped_tokens = [i for i in tokens if not i in en_stop] stopped_tokens2 = [i for i in stopped_tokens if not i in gen_stop] stemmed_tokens = [p_stemmer.stem(i) for i in stopped_tokens2] topic.append(stemmed_tokens) dictionary = corpora.Dictionary(topic) corpus = [dictionary.doc2bow(text1) for text1 in topic] ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics=2, id2word = dictionary, passes=20) tweet_data['topic'] = ldamodel.print_topics(num_topics=1, num_words=1) print (ldamodel.print_topics(num_topics=1, num_words=1)) #Alchemy Stuff: print cnt cnt=cnt+1 if cnt>0: response = json.loads(json.dumps(alchemyapi.entities('text', trans_text , {'sentiment': 1}))) print 'heya' # size=len(response['entities']) flag=0 ent=[] ent_rele=[] ent_type=[] if response['status'] == 'OK': flag=1 for entity in response['entities']: ent.append(entity['text']) ent_rele.append(entity['relevance']) ent_type.append(entity['type']) else: print('Error in entity extraction call: ', response['statusInfo']) if flag==1:
class GoldenGlobeAnalyzer: def __init__(self, jsonFile, category_list = None, debug = False): '''Initialize a GGA object and load the tweets contained in the json file''' self.debug = debug self._entity_count_cutoff = 80 self.tweets = [] with open(jsonFile, 'r') as f: self.tweets = map(json.loads, f) print "-- read tweets\n" self.awards = [] for c in category_list: self.awards.append(Award(c[0],c[1],c[2])) self.hosts = [None,None] self.alchemyapi = AlchemyAPI() def _get_permutations_internal(self,lst): permutations = {} for i in range(len(lst)): for j in range(len(lst)): if i != j: name = lst[i] + " " + lst[j] permutations[name] = 0 return permutations def find_tweets_by_user(self, username): '''Find all the tweets by a given user whose user name is :param username''' ret = [] for t in self.tweets: if (t['user']['screen_name'].lower() == username.lower()): ret.append(t) return ret def find_tweets_containing(self, keyword): ret = [] for t in self.tweets: if keyword in t['text']: ret.append(t) return ret def get_sentiment_of_tweets(self,tweet_lump): response = self.alchemyapi.sentiment('text',tweet_lump.encode('ascii','ignore')) if response['status'] != u'ERROR': if 'sentiment' in response: return response['sentiment'] elif 'docSentiment' in response: return response['docSentiment'] return 0.0 def get_entities(self,tweet_text): tweet_text = tweet_text.encode('utf-8') tokens = nltk.word_tokenize(tweet_text) tagged = nltk.pos_tag(tokens) chunks = nltk.chunk.ne_chunk(tagged) entity_names = [] for word_tuple in chunks.pos(): if word_tuple[1] == 'PERSON' or word_tuple[1] == 'ORGANIZATION': if word_tuple[0][0].lower() != "golden" and word_tuple[0][0].lower() != "globes" and word_tuple[0][0].lower() != "goldenglobes" and word_tuple[0][0].lower() != "best": entity_names.append(word_tuple[0]) return entity_names def find_presenters(self): blacklist = "" for award in self.awards: blacklist += award.winner.lower() for award in self.awards: relevant = [] cont = True for t in self.tweets: if re.findall(r"[pP]resent*",t["text"]) and re.findall(award.re,t["text"]): relevant.append(t) ent_dict = {} for tweet in relevant: ents = self.get_entities(tweet["text"]) for e in ents: entity = e[0] if (entity.lower() not in award.long_name.lower()) and (entity.lower() not in blacklist): if entity in ent_dict: ent_dict[entity] += 1 #print ent_dict[e] else: ent_dict[entity] = 1 if ent_dict[entity] > 30: # cutoff to improve performance cont = False if not cont: break sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True) name_parts = [] for i in range(min(len(sorted_ents),8)): name_parts.append(sorted_ents[i][0]) presenter_idx = 0 possible_name_combos = self._get_permutations_internal(name_parts) cont = True for tweet in relevant: for name in possible_name_combos: if name in tweet["text"]: possible_name_combos[name] += 1 if possible_name_combos[name] > 0: award.presenters[presenter_idx] = name presenter_idx += 1 if presenter_idx > 1: cont = False break possible_name_combos[name] -= 1000 # ugly hack but it works if not cont: break if award.presenters[0] == "None": for t in self.tweets: if re.findall(award.re,t["text"]): relevant.append(t) ent_dict2 = {} relevant_text = "" for r in relevant: relevant_text += r["text"] + "\n" response = self.alchemyapi.entities('text',relevant_text.encode('ascii','ignore')) if response['status'] == 'OK': for entity in response['entities']: ent_txt = entity["text"].encode('ascii','ignore') if ent_txt.lower() != "goldenglobes" and ent_txt.lower() != "golden globes": if ent_txt in ent_dict2: ent_dict2[ent_txt] += 1 else: ent_dict2[ent_txt] = 1 sorted_ents = sorted(ent_dict2.iteritems(), key=operator.itemgetter(1), reverse = True) for s in range(min(len(sorted_ents),2)): award.presenters[s] = sorted_ents[s][0] blacklist += " " + sorted_ents[s][0].lower() print "-- " + award.presenters[0] + ((" and " + award.presenters[1] ) if award.presenters[1] != "None" else "") + " presented " + award.long_name return def print_presenters(self): for award in self.awards: print award.presenters[0] + ((" and " + award.presenters[1] ) if award.presenters[1] != "None" else "") + " presented " + award.long_name def find_hosts(self): relevant = [] new_tweets = [] cont = True for t in self.tweets: if len(re.findall("host*",t["text"])) > 0: relevant.append(t) # else: # new_tweets.append(t) #self.tweets = new_tweets ent_dict = {} for tweet in relevant: ents = self.get_entities(tweet["text"]) for e in ents: entity = e[0] if entity in ent_dict: ent_dict[entity] += 1 else: ent_dict[entity] = 1 if ent_dict[entity] > self._entity_count_cutoff: # cutoff to improve performance cont = False if not cont: break sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True) ## two first names and two last names, so now we just need to figure out which of them belong together name_parts = [] for i in range(4): name_parts.append(sorted_ents[i][0]) host_idx = 0 possible_name_combos = self._get_permutations_internal(name_parts) for tweet in relevant: for name in possible_name_combos: if name in tweet["text"]: possible_name_combos[name] += 1 if possible_name_combos[name] > 10: print "-- " + name + " was a host" self.hosts[host_idx] = name host_idx += 1 if host_idx > 1: return possible_name_combos[name] -= 1000 # ugly hack but it works return def print_hosts(self): for h in self.hosts: print h + " was a host\n" def find_winners(self): tweets_lst = [] new_tweets = [] for tweet in self.tweets: if ("wins" in tweet["text"] or "won" in tweet["text"]) and "should" not in tweet["text"]: info = (tweet["text"].partition("http")[0]) info = info.partition(":")[2] if "@" in info: info = info.partition("@")[2] info=info.replace("#","") info = info.replace('"','') tweets_lst.append(info) else: new_tweets.append(tweet) tweets_lst = list(set(tweets_lst)) self.tweets = new_tweets for i in tweets_lst: if "wins" in i: i = i.partition("wins") else: i = i.partition("won") winner = i[0] category = i[2] for award in self.awards: if re.findall(award.re,category): award.winner_candidates.append(winner) break for award in self.awards: award.extract_most_likely_winner() print award.winner + "won " + award.long_name return def print_winners(self): for award in self.awards: print award.winner + "won " + award.long_name print "\n" def find_nominees(self): blacklist = "" for award in self.awards: blacklist += award.winner.lower() for award in self.awards: relevant = [] cont = True for t in self.tweets: if re.findall(r"([nN]ominated.*[fF]or)|([nN]ominee)",t["text"]) and re.findall(award.re,t["text"]) and "should" not in t["text"] and "wasn't" not in t["text"]: relevant.append(t) elif re.findall(r"\b[sS]hould.*[wW]on\b",t["text"]) and re.findall(award.re,t["text"]): relevant.append(t) ent_dict = {} relevant_text = "" for r in relevant: relevant_text += r["text"] + "\n" response = self.alchemyapi.entities('text',relevant_text.encode('ascii','ignore')) if response['status'] == 'OK': for entity in response['entities']: ent_txt = entity["text"].encode('ascii','ignore') if ent_txt.lower() != "goldenglobes" and ent_txt.lower() != "golden globes": if ent_txt in ent_dict: ent_dict[ent_txt] += 1 else: ent_dict[ent_txt] = 1 sorted_ents = sorted(ent_dict.iteritems(), key=operator.itemgetter(1), reverse = True) for s in range(min(len(sorted_ents),5)): award.nominees[s] = sorted_ents[s][0] print "The nominees for " + award.long_name + " are " + award.nominees[0] + ", " + award.nominees[1] + ", " + award.nominees[2] + ", " + award.nominees[3] + " and " + award.nominees[4] def find_popularity_of_winners(self): for award in self.awards: if award.winner != "None": relevant_text = "" for t in self.tweets: if award.winner in t["text"]: relevant_text += award.winner + '\n' award.popularity = self.get_sentiment_of_tweets(relevant_text) most_popular = self.awards[0] least_popular = self.awards[0] for award in self.awards: if award.popularity > most_popular.popularity: most_popular = award elif award.popularity < least_popular.popularity: least_popular = award print "-- The most popular winner was " + most_popular.winner + " and the least popular winner was " + least_popular.winner return def find_dress_opinions(self): _all_positive = [] _all_negative = [] people = [] for award in self.awards: people.append(award.winner) for host in self.hosts: people.append(host.encode('ascii','ignore')) for person in people: if person != "None": relevant_txt = "" for t in self.tweets: if person in t["text"] and re.findall(r"\b[dD]ress\b",t["text"]): relevant_txt += t["text"] sentiment_score = self.get_sentiment_of_tweets(relevant_txt) if type(sentiment_score) is not float: #print person + " : " + str(sentiment_score["score"]) if float(sentiment_score["score"]) > 0.15: _all_positive.append([sentiment_score["score"],person]) elif sentiment_score != 0.0: _all_negative.append([sentiment_score["score"],person]) _all_positive = sorted(_all_positive, key = lambda tup : tup[0]) _all_negative = sorted(_all_negative, key = lambda tup : tup[0]) print "People liked these people's dresses (in descending order)" for a in _all_positive: print " - " + a[1] print "People didn't like these people's dresses very much (in descending order)" for a in _all_negative: print " - " + a[1]
from __future__ import print_function from alchemyapi import AlchemyAPI test_text = 'Bob broke my heart, and then made up this silly sentence to test the PHP SDK' test_html = '<html><head><title>The best SDK Test | AlchemyAPI</title></head><body><h1>Hello World!</h1><p>My favorite language is PHP</p></body></html>' test_url = 'http://www.nytimes.com/2013/07/13/us/politics/a-day-of-friction-notable-even-for-a-fractious-congress.html?_r=0' alchemyapi = AlchemyAPI() #Entities print('Checking entities . . . ') response = alchemyapi.entities('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.entities('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.entities('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.entities('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Entity tests complete!') print('') #Keywords print('Checking keywords . . . ') response = alchemyapi.keywords('text', test_text); assert(response['status'] == 'OK')
def process(query, in_queue, out_queue): """ The worker thread to grab a found Tweet off the queue and calculate the sentiment via AlchemyAPI. It calculates the document-level sentiment for the entire tweet, and it will also attempt to calculate entity-level sentiment if the query string is identified as an entity. If the query string is not identified as an entity for the tweet, no entity level sentiment will be returned. INPUT: query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") in_queue -> the shared input queue that is filled with the found tweets. out_queue -> the shared output queue that is filled with the analyzed tweets. OUTPUT: None """ #Create the AlchemyAPI object alchemyapi = AlchemyAPI() while True: #grab a tweet from the queue tweet = in_queue.get() #init tweet['sentiment'] = {} try: #calculate the sentiment for the entity response = alchemyapi.entities('text',tweet['text'], { 'sentiment': 1 }) if response['status'] == 'OK': for entity in response['entities']: #Check if we've found an entity that matches our query if entity['text'] == query: tweet['sentiment']['entity'] = {} tweet['sentiment']['entity']['type'] = entity['sentiment']['type'] #Add the score (it's not returned if type=neutral) if 'score' in entity['sentiment']: tweet['sentiment']['entity']['score'] = entity['sentiment']['score'] else: tweet['sentiment']['entity']['score'] = 0 #Only 1 entity can possibly match the query, so exit the loop break #calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text',tweet['text']) if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment']['type'] #Add the score (it's not returned if type=neutral) if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response['docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #add the result to the output queue out_queue.put(tweet) except Exception as e: #if there's an error, just move on to the next item in the queue print 'Uh oh, this just happened: ', e pass #signal that the task is complete in_queue.task_done()
print("**********") response = alchemyapi.concepts('text', text, {'maxRetrieve': 10}) if response['status'] == "OK": #print(json.dumps(response, indent = 4)) keywords = [(concept['text'].encode('ascii'), float(concept['relevance'])) for concept in response['concepts']] for concept in response['concepts']: print('Concept text:', concept['text'].encode('utf-8')) print('Concept relevance:', concept['relevance']) print keywords print("**********") response = alchemyapi.entities('text', text, {'maxRetrieve': 200}) if response['status'] == "OK": print(json.dumps(response, indent=4)) for entity in response['entities']: print("Entity text", entity['text']) print("Entity type", entity['type']) persons = [ ent['text'] for ent in response['entities'] if ent['type'] == 'Person' ] print("All persons:", persons)
out_file = args.outfile uuid = args.uuid folder = os.path.join(tmpdir, uuid) print(folder) cwd = os.getcwd() apikey_location = os.path.join(cwd, "api_key.txt") print(in_file) with open(in_file) as f: filetext = f.read() return filetext filetext = main() alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', filetext, {'sentiment': 1}) if response['status'] == 'OK': print(json.dumps(response, indent=4)) for entity in response['entities']: print('text: ', entity['text'].encode('utf-8')) print('type: ', entity['type']) print('relevance: ', entity['relevance']) print('sentiment: ', entity['sentiment']['type']) if 'score' in entity['sentiment']: print('sentiment score: ' + entity['sentiment']['score']) print('') else: print('Error in entity extraction call: ', response['statusInfo'])