def populateTweets(self): self.lat, self.long, self.radius = self.findCoordinatesRadius() geo = str(self.lat) + "," + str(self.long) + "," + str( self.radius) + "km" tweets = api.search(q=self.search, lang='en', geocode=geo, rpp=100) showcase = tweets[0:5] self.showcase = [] for tweet in showcase: self.showcase.append([tweet.text, tweet.user.screen_name]) hashtagsRaw = [tweet.entities['hashtags'] for tweet in tweets] hashtagsList = list(itertools.chain.from_iterable(hashtagsRaw)) hashtags = [hash['text'] for hash in hashtagsList] frequency = {} for hashtag in hashtags: frequency[hashtag] = hashtags.count(hashtag) self.popularHashtags = dict(Counter(hashtags).most_common(5)).keys() texts = [tweet.text for tweet in tweets] self.sentiment = 0.0 alchemyapi = AlchemyAPI() for text in texts: response = alchemyapi.sentiment_targeted('text', text.lower(), self.search.lower()) if response['status'] != 'ERROR' and response['docSentiment'][ 'type'] != 'neutral': numeric = float(response['docSentiment']['score']) self.sentiment = self.sentiment + ( numeric / len(texts)) #computes average sentiment
def sentiment(): twitter = Twython(passwords.Live.twitter_app_key, passwords.Live.twitter_app_secret, oauth_version=2) access_token = twitter.obtain_access_token() twitter = Twython(passwords.Live.twitter_app_key, access_token=access_token) search_results = None try: search_results = twitter.search(q='$' + request.args.get('symbol'), result_type='popular') except TwythonError as e: print e twitter_corpus = "" for tweets in search_results['statuses']: twitter_corpus += tweets['text'].encode('utf-8') #Create the AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.sentiment('text', twitter_corpus) sentiment = None if response['status'] == 'OK': sentiment = {"sentiment": response['docSentiment']['type']} if request.args.get('output') == "jsonp": return Response('callback(' + json.dumps(sentiment) + ')', content_type='application/javascript') else: return jsonify(sentiment)
def run_sentiment_analysis(tweets, text_key): def print_error(response): # This should be replaced with better logging print('Error with AlchemyAPI response:') print(sentiment, '\n') alchemyapi = AlchemyAPI() results = [] for item in tweets: if text_key not in item: # Assume it's a bad tweet and continue print(text_key, 'not found in tweet') continue sentiment = alchemyapi.sentiment('text', item['words']) try: if sentiment['status'].lower() == 'error': # Unrecognized language, emoji only, etc... print_error(sentiment) # Make a deep copy (since it's a nested dictionary) new_item = copy.deepcopy(item) sentiment_type = sentiment['docSentiment']['type'] new_item['sentiment_type'] = sentiment_type if sentiment_type == 'neutral': new_item['sentiment_score'] = 0 else: new_item['sentiment_score'] = sentiment['docSentiment'][ 'score'] results.append(new_item) except Exception as ex: print(type(ex).__name__) print_error(sentiment) return results
def main(): alchemyapi = AlchemyAPI() user_tweets = read_tweet_text_per_user(f_in) cnt = 0 LAST_USR = "******" UNPROCESSED = False with codecs.open(f_out, 'a', encoding='utf8') as output_file: for user1 in user_tweets.iterkeys(): cnt += 1 if not UNPROCESSED: if user1 != LAST_USR: continue else: UNPROCESSED = True print("Found", LAST_USR, cnt) if cnt % 100 == 0: print(cnt, user1) tweets1 = user_tweets[user1] BREAK, taxonomy_result1 = alchemy_on_tweets( alchemyapi, user1, tweets1) # there is the API daily limit so we check when exceeded and continue tomorrow from # the last processed users if BREAK: print("Last processed user: ", user1) return output_file.write( unicode(json.dumps(taxonomy_result1, ensure_ascii=False)) + '\n') return
def nlp_process(ids, ids_hash): #instantiate an elasticsearch client es = Elasticsearch() #instantiate an alchemy client alchemyapi = AlchemyAPI() for item in ids: data = ' '.join(ids_hash[item]) lowers = data.lower() alchem_data = [] response = alchemyapi.keywords('text', lowers, {'sentiment': 1}) if response['status'] == 'OK': print('#Success#') for keyword in response['keywords']: al_temp = defaultdict() al_temp['text'] = keyword['text'].encode('utf-8') al_temp['relevance'] = keyword['relevance'] al_temp['sentiment'] = keyword['sentiment']['type'] if 'score' in keyword['sentiment']: al_temp['score'] = keyword['sentiment']['score'] alchem_data.append(al_temp) else: print('Error in keyword extaction call: ', response['statusInfo']) print len(alchem_data) # prepare body for insertion doc = {"business_id": item, "word_freq": alchem_data} exit() template = {"create": {"_index": "alchem", "_type": "doc"}} res = es.index(index="alchem", doc_type='doc', body=doc)
def convert_to_clean_titles(infile, outfile): alchemyapi = AlchemyAPI() f = open(infile, "r") f2 = codecs.open(outfile, "w+", "utf-8") f3 = codecs.open("Entities.txt", "w+", "utf-8") count = 1 for line in f: line = line.decode("utf-8") response = alchemyapi.entities('text', line, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: if "type" in entity.keys: if entity['type'] in [ 'Country', 'Holiday', 'Movie', 'MusicGroup', 'Organization', 'Person', 'PrintMedia', 'Region', 'StateOrCountry', 'TelevisionShow', 'TelevisionStation', 'Money', 'Company', 'GeographicFeature' ]: line = line.replace(entity['text'], entity['text'].title()) print >> f3, entity['text'], entity['type'], entity[ 'sentiment'] print >> f2, line, else: print >> f2, line, print count, line count += 1
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [ { "missing": { "field": "entities" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "match": { "_all": "merkel" }} #{ "range": { "published": { "gte" : "now-1d" } } } ] }, "size": 500 } res = es.search(index="rss", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: #logger.info('Extracting entities for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source']['description'] try: response = alchemyapi.entities("text", analyzed_text) entities = [ x['text'] for x in response["entities"] ] #logger.info("Entities: " + entities) es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": {"entities": entities}}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def get_sentiment(places): twitter_api = get_twitter_api() alchemy_api = AlchemyAPI() sentiments = dict() for place in places: r = twitter_api.GetSearch(term=place, count=10) for tw in r: txt = tw.GetText() response = alchemy_api.sentiment('text', txt) if response['status'] == 'OK': sentiments[txt] = str(response['docSentiment']['type']) ret_list = [] for t, s in sentiments.iteritems(): ret_json = dict() ret_json["tweet"] = t ret_json["sentiment"] = s ret_list.append(ret_json) list_len = 16 if len(ret_list) > list_len: ret_list = random.sample(ret_list, 16) else: for i in xrange(len(ret_list), list_len): ret_list.append({"No Tweet": "neutral"}) print ret_list return ret_list
def sentiment_analysis(text): alchemy_api = AlchemyAPI() response = alchemy_api.sentiment("text", text) try: float(response["docSentiment"]['score']) return float(response["docSentiment"]['score']) except ValueError: return None
def updateCounter(): global use global alchemyapi print use use += 1 if use >= 15: use = 0 alchemyapi = AlchemyAPI(use = use)
def get_sentiment(text): alchemyapi = AlchemyAPI() for key in utils.get_random_alchemy_credentials(): alchemyapi.apikey = key response = alchemyapi.sentiment("text", text) if 'docSentiment' not in response: continue return response['docSentiment'].get('score', '0')
def retrieveReviewSentiment(text): alchemyapi = AlchemyAPI() response = alchemyapi.sentiment("text", text) status = response["status"] if status == 'OK': return response["docSentiment"]["type"] else: return response['statusInfo']
def main(data): args = argv('@', data['recv']) # look for URL link = geturl(data['recv']) if link and link != "" and not modeCheck('b', data): link = link[0] # look for title badext = ('.cgi', '.pdf') imgext = ('.jpg', '.png', '.gif', '.bmp') if not link[-4:].lower() in badext: if not link[-4:].lower() in imgext: title = gettitle(link) if title: title = html_decode(title) # encode unicode object to byte string if type(title) == unicode: title = title.encode('utf-8', "ignore") title = title.replace('\n', ' ') title = title.replace('\r', ' ') title = title.strip() if len(title) >= 150: title = title[:150] if len(link) > int( data['config']['settings']['maxLinkLen']): # post title + tiny data['api'].say( args['channel'], '^ ' + title + ' ' + maketiny(link) + ' ^') return else: # post title only data['api'].say(args['channel'], '^ ' + title + ' ^') return else: # We've got an image URL. from alchemyapi import AlchemyAPI alchemyapi = AlchemyAPI() response = alchemyapi.imageTagging('url', link) if response['status'] == 'OK' and response['imageKeywords'][0][ 'text'] != 'NO_TAGS': retme = "^ Image of: " for keyword in response['imageKeywords']: retme += "%s(%s%%) " % ( keyword['text'], int( float(keyword['score']) * 100)) if len(link) > int( data['config']['settings']['maxLinkLen']): retme += maketiny(link) + " " retme += "^" data['api'].say(args['channel'], retme) return if len(link) > int(data['config']['settings']['maxLinkLen']): # post tiny only data['api'].say(args['channel'], '^ ' + maketiny(link) + ' ^') return else: # nothing return False
def connect_alchemy(url): # to connect with alchemy and tag the content from alchemyapi import AlchemyAPI alchemyapi = AlchemyAPI() resp = alchemyapi.text('url', url) response = alchemyapi.keywords("text", resp['text']) keywors = response["keywords"]
def extract_entities(text, lang): entities = {} alchemyapi = AlchemyAPI() response = alchemyapi.entities('text', text, {'sentiment': 1}) if response['status'] == 'OK': for entity in response['entities']: key = entity['text'].encode('utf-8') value = entity['type'] entities[key] = convert_label(value) return entities
def GetAlchemyAPIObject(): with open("api_key.txt","r") as aFile: for line in aFile.read().split("\n"): if line != "": api = AlchemyAPI(line) result = api.sentiment("text","test") if result["status"] != "ERROR": return api print "Could not initialize valid, usable AlchemyAPI object. Consider requesting another API key." exit() return None
def update_entities(incremental=True): alchemy = AlchemyAPI() story_criteria = {} if incremental: story_criteria = { 'entities' : { '$exists' : False } } for story in _db.stories.find(story_criteria): entities = alchemy.analyze_url(story['unescapedUrl'])['entities'] logging.debug('%s, %s entities' % (story['title'], len(entities))) story['entities'] = entities _db.stories.save(story)
def __init__(self, aws_id, aws_key, es, aws_region='us-west-2', sqs_name='new-tweet-notifs'): try: #connect with sqs self.sqs = boto.sqs.connect_to_region(aws_region, aws_access_key_id=aws_id, aws_secret_access_key=aws_key) self.sqs_queue = self.sqs.get_queue(sqs_name) self.alc = AlchemyAPI() self.sns = boto.sns.connect_to_region(aws_region) self.es = es self.thread_pool = ThreadPoolExecutor(max_workers=4) except Exception as e: print('Could not connect') print(e) print('Connected to AWS SQS: '+ str(self.sqs))
def pos_with_entity_replaced_common_words(infile, outfile): alchemyapi = AlchemyAPI() common_word_pos = open("common_word_pos.txt", "r") title_data = open(infile, "r+") f2 = codecs.open(outfile, "w+", "utf-8") for line1, line2 in title_data, common_word_pos: response = alchemyapi.entities('text', line1, { 'sentiment': 1, 'disambiguate': 1 }) if response['status'] == 'OK': for entity in response['entities']: line2.replace(entity['text'], entity['type']) print >> f2, line2,
def alchemy_keywords(title): response = AlchemyAPI().keywords('text', title.encode('utf8')) keywords = [] # roots of categories # categorize the paragraph if response['status'] == 'OK': for keyword in response['keywords']: #print keyword label, score = keyword['text'], keyword['relevance'][:4] keywords.append(str(label)) else: pass #print 'Error in concept tagging call: ', response['statusInfo'] # roots = list(set(roots)) # remove duplicates #print 'Keywords: ', keywords return keywords
def getSoup(): sock = urllib.urlopen('https://en.wikipedia.org/wiki/Motocross') sockRaw = sock.read() soup = BeautifulSoup(sockRaw, "html.parser") soupText = soup.get_text() # use the alchemyAPI to find the keyword/phrases from the texts alchemyapi = AlchemyAPI() response = alchemyapi.keywords('text', soupText, {'maxRetrieve': 10}) if response['status'] == 'OK': print "\nThe Keywords are:" for i in response['keywords']: print "Word: " + i["text"] + ", Relevance: " + i["relevance"] else: print "Something went wrong with Alchemy."
def checkDailyQuotaAndRunAlchemy(commentDb, cruiseLines): with open('data/Alchemy_response_keywords.json', 'rb') as fp: returned_keywords = json.load(fp) with open('data/Alchemy_response_relations.json', 'rb') as fp: returned_relations = json.load(fp) alchemyapi = AlchemyAPI() test = "test if finished Alchemy daily quota" response = alchemyapi.keywords('text', test, {'sentiment': 0}) if response['status'] == 'OK': returned_keywords, returned_relations = runAlchemyApi( cruiseLines, commentDb, returned_keywords, returned_relations, alchemyapi) else: print 'Error in keyword extraction call: ', response['statusInfo'] return returned_keywords, returned_relations
def handle(self, *args, **options): es = elasticsearch.Elasticsearch(es_url) alchemyapi = AlchemyAPI() query = { "query": { "and": [{ "missing": { "field": "sentiment" } }, { "terms": { "language": ['en', 'de', 'fr', 'it', 'es', 'pt'] } }, { "range": { "published": { "gte": "now-1d" } } }] }, "size": 100 } res = es.search(index="rss-*", doc_type="posting", body=query) logger.info("%d documents found" % res['hits']['total']) for p in res['hits']['hits']: logger.info('Checking sentiment for - %s' % p['_id']) analyzed_text = p['_source']['title'] + ' ' + p['_source'][ 'description'] try: response = alchemyapi.sentiment("text", analyzed_text) logger.info("Sentiment: " + response["docSentiment"]["type"]) sentiment = response["docSentiment"]["type"] es.update(index=p['_index'], doc_type=p['_type'], id=p['_id'], body={"doc": { "sentiment": sentiment }}) except KeyError: logger.exception("Problem getting sentiment :( %s" % response)
def taxonomy(self, para): response = AlchemyAPI().taxonomy('text', para) roots = [] # roots of categories # categorize the paragraph if response['status'] == 'OK': for category in response['taxonomy']: label, score = category['label'], category['score'][:4] root_label = label.split('/')[1] roots.append(str(root_label)) if self.debug: print "Root: %s \t Score: %s" % (label.ljust(40), score) else: print 'Error in concept tagging call: ', response['statusInfo'] roots = list(set(roots)) # remove duplicates if self.debug: print 'Category roots', roots return roots
def __init__(self, filename, language): self.al = AlchemyAPI('bf18ed72384724d86425c8674204039f87352870') self.filename = filename self.language = language self.filterKeys = { 'created_at': u'[\'created_at\']', 'id': u'[\'id\']', 'lang': u'[\'lang\']', 'tweet_urls': u'[\'entities\'][\'urls\']', 'tweet_hashtags': u'[\'entities\'][\'hashtags\']', 'user_location': u'[\'user\'][\'location\']', 'keywords': u'[\'alchemy\'][\'keywords\']', 'user_screen_name': u'[\'user\'][\'screen_name\']', 'text': u'[\'text\']', 'text_' + self.language: u'[\'text\']' }
def createTweets(source, num): #num is number of tweets to create words = createDict(source, 2) #state size of 2 allows for more combinations as tweets are small tweets = [] alchemyAPI = AlchemyAPI() for x in range(0, num): #at most 50% chance of using a hashtag if randint(0,1) == 0: tweet = generateText(words, 2, choice(range(100,140))) tweets.append(tweet) else: tweet = generateText(words, 2, choice(range(80,120))) response = alchemyAPI.concepts('text', tweet) if response['status'] == 'OK': hashtag = " #" + response['concepts'][0]['text'].replace(" ", "") if len(hashtag) <= 140 - len(tweet): tweet = tweet + hashtag tweets.append(tweet) return tweets
def analysecontent(content): """ Process/Analyse the extracted contents with Alchemy API Assumption: api_key.txt with a valid key is available from where this program is getting executed. """ print('Processing extracted text with AlchemyAPI...') alchemyapi = AlchemyAPI() response = alchemyapi.keywords('text', content, {'maxRetrieve': 10}) if response['status'] == 'OK': print('---------------------------------') print('## Keywords ## Relevance') for keyword in response['keywords']: print("{0}: {1}".format(keyword['text'].encode('utf-8'), keyword['relevance'])) print('---------------------------------') else: print('Error in keyword extraction call: ', response['statusInfo'])
def process(in_queue, out_queue): #INPUT: #query -> the query string that was used in the Twitter API search (i.e. "Denver Broncos") #in_queue -> the shared input queue that is filled with the found tweets. #out_queue -> the shared output queue that is filled with the analyzed tweets. #OUTPUT: #None #Create the alchemy api object alchemyapi = AlchemyAPI() while True: #Grab a tweet from the queue tweet = in_queue.get() #Initilise tweet['sentiment'] = {} try: #Calculate the sentiment for the entire tweet response = alchemyapi.sentiment('text', tweet['text']) #Add the score if its not returned neutral if response['status'] == 'OK': tweet['sentiment']['doc'] = {} tweet['sentiment']['doc']['type'] = response['docSentiment'][ 'type'] if 'score' in response['docSentiment']: tweet['sentiment']['doc']['score'] = response[ 'docSentiment']['score'] else: tweet['sentiment']['doc']['score'] = 0 #Add the result to the output queue out_queue.put(tweet) except Exception as e: #If there's an error, just move on to the next item in the queue print 'Error ', e pass #Signal that the task is finished in_queue.task_done()
def get_alch_keys(url): #Create the AlchemyAPI Object print('-----Create AlchemyAPI') alch = AlchemyAPI() alch_keys = set() print('-----get concepts') alch_keys |= get_concepts(url, alch) print('-----get keywords') alch_keys |= get_keywords(url, alch) print('-----get entities') alch_keys |= get_entities(url, alch) good_alch_keys = set() for i in alch_keys: if verify_companies_with_crunchbase(i): good_alch_keys.add(i) return good_alch_keys
def __init__(self): self.KEYWORDS = { "firefox": "browser", "mozilla": "org", "google": "org", "chrome": "browser", "internet explorer": "browser", "microsoft": "org", "safari": "browser", "apple": "org" } self.API_KEY = self.read_api_key(".twitterapikey") self.API_SECRET = self.read_api_key(".twitterapisecret") self.ACCESS_TOKEN = self.read_api_key(".twitteraccesstoken") self.ACCESS_TOKEN_SECRET = self.read_api_key( ".twitteraccesstokensecret") self.f_out = open("results.csv", "w") self.alchemy = AlchemyAPI() self.sentiment_results = []