def sent_sentiment(sent, analyzer): if analyzer == 'textblob': from textblob import TextBlob testimonial = TextBlob(sent) polarity = testimonial.sentiment.polarity if polarity > pos_threshold: # print 'positive' return 1 elif polarity < neg_threshold: # print 'negative' return -1 else: # print 'neutral' return 0 elif analyzer == 'vader': from vaderSentiment.vaderSentiment import sentiment as vaderSentiment try: vs = vaderSentiment(sent) except: vs = vaderSentiment(sent.encode('utf8')) if vs['pos'] > vs['neg'] and vs['pos'] > vs['neu']: return 1 elif vs['neg'] > vs['neu']: return -1 else: return 0 else: raise ValueError("Got incorrect analyzer type: %s" % analyzer)
def run_twitter_search(keyword, output_file): print(keyword.upper()) counter = 0 try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([str(keyword)]) # let's define all words we would like to have a look for # tso.set_language('en') # we want to see English tweets only tso.set_include_entities(True) # and don't give us all those entity information tso.set_geocode(45.551279, -92.586955, 530, imperial_metric=True) for tweet in ts.search_tweets_iterable(tso): counter = counter + 1 search_term = keyword username = "******" if tweet['user']['screen_name'] is None else tweet['user']['screen_name'] text = "NONE" if tweet['text'] is None else tweet['text'] place = "NONE" if tweet['place'] is None else tweet['place'] if (tweet['coordinates'] is not None): lat = tweet['coordinates']['coordinates'][1] lng = tweet['coordinates']['coordinates'][0] elif (tweet['place'] is not None): place_coordinates = tweet['place']['bounding_box']['coordinates'] sum_lat = 0 sum_lng = 0 for pair in place_coordinates[0]: sum_lat += pair[1] sum_lng += pair[0] lat = sum_lat / len(place_coordinates[0]) lng = sum_lng / len(place_coordinates[0]) place = tweet['place']['full_name'] else: lat = "NONE" lng = "NONE" location = "NONE" if tweet['user']['location'] is None else tweet['user']['location'] created_at = "NONE" if tweet['created_at'] is None else tweet['created_at'] description = "NONE" if tweet['user']['description'] is None else tweet['user']['description'] verified = "NONE" if tweet['user']['verified'] is None else str(tweet['user']['verified']) sentiment_score = vaderSentiment(text.encode('utf-8')) compound_sentiment = sentiment_score['compound'] description_sentiment = vaderSentiment(description.encode('utf-8'))['compound'] try: df.loc[len(df)] = [search_term, username, text, lat, lng, location, created_at, place, description, verified, sentiment_score, compound_sentiment, description_sentiment] if((len(df) % 200) == 0): write_to_excel(output_file, 'Sheet1', df) print("_%s %s tweets/%s total" % (counter, keyword.upper(), len(df))) except: write_to_excel(output_file, str(keyword), df) if(counter == 10000): return write_to_excel(output_file, str(keyword), df) print("_______%s tweets saved" % (len(df))) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def determineSentiment(track_lyrics): # VADER Sentiment calculatrion and database storage try: track_lyrics = track_lyrics.encode('utf-8') sentiment = vaderSentiment(track_lyrics) except UnicodeEncodeError: track_lyrics = track_lyrics.encode('ascii', 'ignore') sentiment = vaderSentiment(track_lyrics) neg = sentiment['neg'] neu = sentiment['neu'] pos = sentiment['pos'] return (neg,neu,pos)
def analyzeSentiment(sentences): """ analyzeSentiment(listOfString) -> listofJSON returns a JSON object wrapped in an array, that contains the amount of positive, neutral and negative tweets ready to be processed by charJS """ neu = 0 pos = 0 neg = 0 #declare if each sentence carries a negative, neutral of positive sentiment for sentence in sentences: vs = vaderSentiment(sentence) if (vs['neg'] > vs['pos']): neg+=1 elif (vs['pos'] > vs['neg']): pos+=1 else: neu+=1 #output is in this format so chartJs can use it in the web-App return_data = [{'value': neg, 'label': 'Negative Tweets','color': '#D18177'}, {'value': neu, 'label': 'Neutral Tweets','color': '#9CBABA'}, {'value': pos, 'label': 'Positive Tweets','color': '#00688B'}] return return_data
def process_tweet(tweet): tweet = json.loads(tweet) tweet_text = tweet.get('text') trump = word_in_text('trump', tweet_text) clinton = word_in_text('clinton', tweet_text) if trump is False and clinton is False: return 0 vs = vaderSentiment(tweet_text.encode('utf-8')) if vs.get('neu') == 1 or (vs.get('compound') > -.35 and vs.get('compound') < .35): return 0 if vs.get('compound') > .35: pos = True if vs.get('compound') < -.35: pos = False post = { 'trump': trump, 'clinton': clinton, 'vader': vs, 'positive': pos, 'text': tweet_text } posts = db.tweets post_id = posts.insert_one(post).inserted_id count_tweets(True, 'trump')
def process_item(self, item, spider): try: item['vaderSentiment'] = vaderSentiment(item['lenArticle']) except: pass success_count = 0 tech_count = 0 update_count = 0 partner_count = 0 contest_count = 0 tokens = nltk.word_tokenize(item['lenArticle']) ## tokens = sorted(w for w in set(tokens) if len(w) > 1) for word in tokens: word = word.lower() if word in self.words_to_remove: tokens.remove(word) if word in self.words_to_rank_success: success_count += 1 if word in self.words_to_rank_tech: tech_count += 1 if word in self.words_to_rank_update: update_count += 1 if word in self.words_to_rank_partner: partner_count += 1 if word in self.words_to_rank_contest: contest_count += 1 item['lenArticle'] = len(tokens) fdist = nltk.FreqDist(tokens) item['fdist'] = fdist.most_common(50) item['topic'] = {'success':success_count,'technology':tech_count,'update':update_count,'partner':partner_count,'contest':contest_count} return item """Look at the following commented out code, use a similar loop structure and """
def bigrams_unigrams_sentiment(texts): all_feats = [] for text in texts: profanity_count = 0 appreciation_count = 0 help_count = 0 for string in appreciation: appreciation_count += text.lower().count(string) for string in profanity: profanity_count += text.lower().count(string) for string in seeking_help: help_count += text.lower().count(string) words = text.split(" ") bigrams = [] for i in xrange(len(words) - 1): bigrams.append(words[i] + " " + words[i+1]) features = Counter(bigrams) features += Counter(words) vs = vaderSentiment(text) #print text, vs features['neg_sentiment'] = vs['neg'] features['neu_sentiment'] = vs['neu'] features['pos_sentiment'] = vs['pos'] features['profanity_count'] = profanity_count features['appreciation_count'] = appreciation_count features['help_count'] = help_count features['text_length'] = len(text) all_feats.append(features) vectorizer = DictVectorizer(sparse=False) return vectorizer.fit_transform(all_feats)
def get_vadersentiment_polarity(text_to_analyze): analyzer = vaderSentiment() scores = analyzer.polarity_scores(text_to_analyze) neg = scores['neg'] pos = scores['pos'] polarity = pos - neg return polarity
def parse_dir_contents(self,response): # This creates an instance of the item, the definition of which is stored in items.py item = Website() # The if statement checks if there exists a div with id = 'maincontent', suggesting we are on the cse website, hence no error will be thrown if (response.xpath('//div[@id="maincontent"]') != []): for sel in response.xpath('//div[@id="maincontent"]'): item['name'] = sel.xpath('h1/text()').extract() temp = [] for ptag in sel.xpath('p'): temp += ptag.xpath('text()').extract() else: item["name"] = response.xpath('//h1/text()').extract() temp = [] for ptag in response.xpath('//p'): temp += ptag.xpath('text()').extract() item['description'] = "" for i in temp: item['description'] += i item['lenArticle']= len(item['description'].split()) # This line computes the sentiment of the current article, and adds it to the solution object item['vaderSentiment'] = vaderSentiment(item['description']) # This basically gets the first part of the text from the right p tag of the footer, converts it into a str, and slices the needed part dateText = response.xpath('//div[@id="footer"]/p[@class="right"]/text()[1]').extract() date = dateText[0] item['date'] = date[16:] yield item
def avg_review_vaderSentiment(data): sentiment_totals = defaultdict(int) result_categories = set() output = [] for each in data.items(): results = each[1]['results'] result_categories.add(results) reviews_list = each[1].get('reviews') if reviews_list: n_reviews = len(reviews_list) review_sentiments = [vaderSentiment(x[1].encode('utf8')) for x in reviews_list] sentiment_totals[results + '_n_reviews'] += n_reviews sentiment_totals[results + '_pos_sentiment'] += sum([i['pos'] for i in review_sentiments]) sentiment_totals[results + '_neg_sentiment'] += sum([i['neg'] for i in review_sentiments]) sentiment_totals[results + '_neu_sentiment'] += sum([i['neu'] for i in review_sentiments]) sentiment_totals[results + '_compound_sentiment'] += sum([i['compound'] for i in review_sentiments]) for result in result_categories: t = (result, sentiment_totals[result + '_n_reviews'], float(sentiment_totals[result + '_pos_sentiment'])/float(sentiment_totals[result + '_n_reviews']), float(sentiment_totals[result + '_neg_sentiment'])/float(sentiment_totals[result + '_n_reviews']) ,float(sentiment_totals[result + '_compound_sentiment'])/float(sentiment_totals[result + '_n_reviews'])) output.append(t) return output
def max_review_vaderSentiment(data, n, min_length): vader_reviews = {} pos_reviews = [] neg_reviews = [] for each in data.items(): reviews_list = each[1].get('reviews') if reviews_list: review_sentiments = [ (x[1].encode('utf8'), vaderSentiment(x[1].encode('utf8'))) for x in reviews_list if (len(nltk.word_tokenize(x[1])) >= min_length) ] for r in review_sentiments: vader_reviews[r[0]] = (r[1]['neg'], r[1]['pos']) top_neg = sorted(vader_reviews.items(), key=lambda x: x[1][0], reverse=True)[0:n + 1] top_pos = sorted(vader_reviews.items(), key=lambda x: x[1][1], reverse=True)[0:n + 1] return top_pos, top_neg
def VSPolarity(self, corpus): self.result = [] for sentence in corpus: vs = vaderSentiment(sentence) aList = [vs["neg"], vs["neu"], vs["pos"]] self.result.append(aList) return np.array(self.result)
def VSPolarity(self, corpus): self.result = [] for sentence in corpus: vs = vaderSentiment(sentence) aList = [vs["neg"], vs["neu"], vs["pos"]] self.result.append(aList) print "Sentiment VSPolarity done!" return np.array(self.result)
def discretized_vader(text, cutoffs=[-0.150, 0.150]): ''' Give a discreted VADER sentiment score ''' score = vaderSentiment(text)['compound'] if score < cutoffs[0]: return 1 elif score > cutoffs[1]: return 3 return 2
def getSentiment(): for Tweet_text in Tweet_Corpus: vs = str(vaderSentiment(Tweet_text)) #print Tweet_text #print str(vs) cur.execute( "INSERT INTO sentiment (tweet_text, sentiment) VALUES (%s, %s)", (Tweet_text, vs)) cur.fetchall() db.commit()
def calculate_Sentiment(item): reviews = [] values_list = item[1] for review in values_list: with open("/path/to/sentiment_results_file", "a") as f: sentiment_score = vaderSentiment(review[2])['compound'] f.write("\n Product ID: " + str(item[0]) + " Review: " + str(review[2]) + " User Rating: " + str(review[1]) + " Sentiment Score: " + str(sentiment_score))
def facebookReport(id): fb = facebook.GraphAPI(ACCESS_TOKEN) d_posts = fb.get_connections(id, 'posts') for i in range(10): post = d_posts['data'][i] postText = post['message'] vs = vaderSentiment(postText.encode('utf-8')) words = postText.split() lexicalDiversity = len(set(words)) * 1.0 / len(words) print 'Post Text:', removeUnicode(postText) print 'Lexical Diversity:', lexicalDiversity print 'Sentiment:', vs['compound'] print '================================================\n'
def get_tweets(q, count): auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET) tw = twitter.Twitter(auth=auth) print "==============================" * 2 print "\n\t" + '\033[95m' + str(count) + " Tweets with " + q + '\033[0m' + "\n" print "==============================" * 2 tweets = tw.search.tweets(q=q, count=count, lang='en') texts = [] counter = 0 neg_tweet = 0 pos_tweet = 0 retweet_counter = 0 total_senti = 0 for status in tweets["statuses"]: counter += 1 texts.append(status["text"]) words = [] for w in removeUnicode(status['text']).split(): words.append(w) vs = vaderSentiment(status["text"].encode('utf-8')) if (vs['compound'] > 0): pos_tweet += 1 if (vs['compound'] < 0): neg_tweet += 1 print "\n" + "="* 10 + "Tweet # " + str(counter) + "=" * 10 + "\n" print removeUnicode(status['text']) retweet_counter += status["retweet_count"] print "\nRetweet count: %d\n" % (status["retweet_count"]) print "Lexical Diversity: ", 1.0 * len(set(words)) / len(words) print "\nSentiment: " + str(vs['compound']) total_senti += vs['compound'] print "==============================" * 2 print "\nReport over %d tweets: \n" % counter print "Total number of retweets: %d\n" % retweet_counter print "Negative tweets: " + str(neg_tweet) print "\nPositive tweets: " + str(pos_tweet) print "\nOverall sentiment: " + str(total_senti/counter) words = [] for t in texts: words += [ w for w in removeUnicode(t).split() ] lexical_diversity = 1.0 * len(set(words)) / len(words) print "\nOverall lexical diversity: ", lexical_diversity
def func(std, emojificationParam, text, outFile="out.txt"): if text[-4:] == '.txt': f = open(text, "r") content = f.read() else: content = text output = open(outFile, "w") sentences = re.split("[?\.!]", content) for sentence in sentences: vs = vaderSentiment(sentence) text = word_tokenize(sentence) pos_tags = pos_tag(text) myEpd = epd.epd(0, std) emojipos = [] for sample in range( 0, int(np.random.rand() * emojificationParam * len(text))): a = myEpd.getPosition(text) if not a is None: emojipos += [int(a)] righttags = [] for position in emojipos: if getTag(pos_tags, position) == True: if righttags.count(position) < 3: righttags += [position] score = vs["compound"] * 50 final_emojis = [] emojis = [] #index to list of unicode values for k, v in emoji.items(): for val in v: if val >= score - 5 and val <= score + 5: emojis += [k] for index in righttags: sample = int(np.floor(np.random.rand() * len(emojis))) choice = emojis[sample] final_emojis += [(index, choice)] text = [unicode(t, "utf-8") for t in text] for index, e in final_emojis: if index == 0: samp = np.random.rand() if samp > .5: text[index] = text[index] + e else: text[index] = e + text[index] else: text[index] = text[index] + e s = " ".join(text) if len(s) > 2: output.write((s + unicode(". ")).encode("utf-8")) print(s + unicode(". ")).encode("utf-8") output.close()
def process(self, tup): id = tup.values[0] title = tup.values[1].encode('ascii', 'ignore') tweet = tup.values[2].encode('ascii', 'ignore') # get the good tweet sentence emitted by the Parse bolt # Check if this is the data structure that is emitted by ParseTweet bolt # Get the sentiment values for the tweet. vs = vaderSentiment(tweet) # print "\n\t" + str(vs) # vs is a dictionary in the form of {'neg': value1, 'neu': value2, 'pos': value3, 'compound': value4} # Create a list version of the vs dictionary #vslist = list(vs.values()) # Emit the sentiment values. This will be a set of 4 key-value pairs in JSON format # self.emit_many(vs) # tuple acknowledgement is handled automatically # Get existing running count, running values for positive, negative, neutral & compound sentiment # based on keyword, tmdbid, and increment it by one (for the counter) and the sentiment values from vs. #conn = psycopg2.connect(database="filmzz", user="******", password="******", host="localhost", port="5432") #cur = conn.cursor() currentCount=0 currentSentiment=[] cur.execute("SELECT runningCount from TweetStatistic WHERE tmdbId=%s", [id]) result=cur.fetchone() if result != None: currentCount=result[0] for key, value in vs.items(): currentSentiment.append(value) if currentCount == 0: # Insert new rows into Tweets Statistic database with values for counter value, Movie Title and tmdbId currentCount = currentCount + 1 cur.execute("INSERT INTO TweetStatistic (tmdbId, title, runningCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment) VALUES (%s,%s,%s,%s,%s,%s,%s)", [id, title, currentCount, currentSentiment[0], currentSentiment[1], currentSentiment[2], currentSentiment[3]]) else: cur.execute("SELECT runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment from TweetStatistic WHERE tmdbId=%s", [id]) result=cur.fetchone() currentCount = currentCount + 1 if result != None: runningNegativeSentiment=currentSentiment[0]+result[0] runningNeutralSentiment=currentSentiment[1]+result[1] runningPositiveSentiment=currentSentiment[2]+result[2] runningCompoundSentiment=currentSentiment[3]+result[3] self.log('Updating the statistics for id:%s' %(id)) # Update the Tweets Statistic database cur.execute("UPDATE TweetStatistic SET runningCount = %s, runningNegativeSentiment = %s, runningNeutralSentiment = %s, runningPositiveSentiment=%s, runningCompoundSentiment = %s WHERE tmdbId = %s", (currentCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment, id)) conn.commit()
def print_statistics(tweets): for item in tweets: print "======================" print item["text"] print "Favorite Count: ", item["favorite_count"] print "Retweets: ", item["retweet_count"] print "Lexical Diversity", lexical_diversity(item["text"]) print "Sentiment Analysis:", vaderSentiment( item["text"].encode('utf-8'))['compound'] # not 100% sure this'll work if item["user"]: print "Username: "******"user"]["screen_name"].encode('utf-8') print "Description: ", item["user"]["description"].encode('utf-8') print "Location: ", item["user"]["location"].encode('utf-8')
def run_vader_sentiment_analyzer(batch, sentiments, cutoffs=[-0.50, 0.50]): ''' One of the NLP tools used to evaluate sentiment. VADER outputs a compound score between -1 and +1 so we must make our own wrapper to categorize the scores the way we see fit. Cutoffs for negative, nuetral, and positive can be optionally specified. ''' for text in batch: vs = vaderSentiment(text) score = vs['compound'] if score < cutoffs[0]: sentiments['negative'] += 1 elif score > cutoffs[1]: sentiments['positive'] += 1 else: sentiments['neutral'] += 1
def sentiment_analysis(tweet): """Returns the sentiment of the tweet from -1 to 1""" vs = vaderSentiment(tweet) pos = vs["pos"] neg = vs["neg"] neu = vs["neu"] # find highest scoring sentiment highest = max(pos, neg, neu) if highest == neg: # negate value and then add to dict highest *= -1 else: # neutral value = 0.0 and add to dict highest = 0.0 return highest
def compute_vader_sentiments(posts): cnt = 0 sentiments = [] for post in posts: cnt += 1 data = {} data["post"] = post vs = vaderSentiment(post) #print vs if vs["neg"] >= vs["neu"] and vs["neg"] >= vs["pos"]: data["label"] = "neg" elif vs["pos"] >= vs["neu"] and vs["pos"] >= vs["neg"]: data["label"] = "pos" elif vs["neu"] >= vs["neg"] and vs["neu"] >= vs["pos"]: data["label"] = "neutral" sentiments.append(data) print "sentiments computed for %d posts" % cnt return sentiments
def sentiment_classifier(course_code): # delete all previous classifications Classification.objects.filter(classifier='VaderSentiment').delete() # get messages sm_objs = LearningRecord.objects.filter(course_code=course_code) for sm_obj in sm_objs: message = sm_obj.message.encode('utf-8', 'replace') sentiment = "Neutral" vs = vaderSentiment(message) #print vs, message #print "\n\t" + str(vs) if (vs['compound'] > 0): sentiment = "Positive" elif (vs['compound'] < 0): sentiment = "Negative" # Save Classification classification_obj = Classification(xapistatement=sm_obj,classification=sentiment,classifier='VaderSentiment') classification_obj.save()
def get_sentiment(item, source): ''' Get the overall sentiment of the videos description ''' if source == 'twitter': description = item['tweet']['orig_text'] elif source == 'facebook': if 'description' in item: description = item['description'] else: description = '' else: description = item['items'][0]['snippet']['description'] description = description.encode('utf-8').strip() sent = vaderSentiment(description) item.setdefault("sentiment", sent['compound']) return item
def on_data(self, data): fil = open("meu.txt", "a") stop = set(nltk.corpus.stopwords.words('english')) stop.update(['http', 'https', 'rt']) tweet = json.loads(data) if 'text' in tweet: texto = tweet['text'].encode('utf-8', 'ignore') self.numstop -= 1 texto = self.user + '-' + texto self.producer.send(self.instanttopic, texto) saveTweet('pos', tweet, self.user) saveLocation('pos', tweet, self.user) vs = vaderSentiment(str(texto)) contagemneg = vs['neg'] contagempos = vs['pos'] contagemspam = vs['neu'] filo = open("vader.txt", 'a') if self.numstop == 0: return False return True
def avg_review_vaderSentiment(data): sentiment_totals = defaultdict(int) result_categories = set() output = [] for each in data.items(): results = each[1]['results'] result_categories.add(results) reviews_list = each[1].get('reviews') if reviews_list: n_reviews = len(reviews_list) review_sentiments = [ vaderSentiment(x[1].encode('utf8')) for x in reviews_list ] sentiment_totals[results + '_n_reviews'] += n_reviews sentiment_totals[results + '_pos_sentiment'] += sum( [i['pos'] for i in review_sentiments]) sentiment_totals[results + '_neg_sentiment'] += sum( [i['neg'] for i in review_sentiments]) sentiment_totals[results + '_neu_sentiment'] += sum( [i['neu'] for i in review_sentiments]) sentiment_totals[results + '_compound_sentiment'] += sum( [i['compound'] for i in review_sentiments]) for result in result_categories: t = (result, sentiment_totals[result + '_n_reviews'], float(sentiment_totals[result + '_pos_sentiment']) / float(sentiment_totals[result + '_n_reviews']), float(sentiment_totals[result + '_neg_sentiment']) / float(sentiment_totals[result + '_n_reviews']), float(sentiment_totals[result + '_compound_sentiment']) / float(sentiment_totals[result + '_n_reviews'])) output.append(t) return output
def baseline(tweets, emoji_maps): """Takes cleaned tweets. Returns list of baseline predictions""" assigned_emojis = list() for tweet in tweets: emoji_assigned = False for word in tweet.split(): for emoji_map in emoji_maps: tags = emoji_map["tags"] for tag in tags.split(): if word == tag and emoji_assigned == False: uni = emoji_map["unicode"] print(tag) print(uni) assigned_emojis.append(uni.lower()) emoji_assigned = True if emoji_assigned == False: vs = vaderSentiment(tweet) positive = vs["pos"] negative = vs["neg"] neutral = vs["neu"] compound = vs["compound"] if positive > negative: uni = "\u0001f60a" # TODO: how were these emojis picked? else: uni = "\u0001f622" assigned_emojis.append(uni) emoji_assigned = True return assigned_emojis
def process(self, tup): twt = tup.values[0].encode("utf-8","replace") # remove links twt = re.sub(r'https?:\S+', "", twt) # remove @ symbol twt = re.sub(r'@', "", twt) # split hashtags by camel case pattern = re.compile(r'#\S+') for match in re.findall(pattern, twt): split = match.replace("#","") split = re.sub(r"([A-Z])", " \\1", split) split = re.sub(r"([a-zA-Z])([0-9])", "\\1 \\2", split) split = split[1:] + "." twt = twt.replace(match,split) # run VADER over normalized text vader = vaderSentiment(twt)['compound'] # emit results for every matching queryId for queryId in tup.values[1]: storm.emit([queryId, vader, tup.values[0]])
# Search twitter for @CocaCola's most recent tweets q = 'from:CocaCola' count=25 tweets = tw.search.tweets(q=q, count=count, lang='en', result_type='recent') texts=[] print 'Sentiment Analysis for @CocaCola\'s Most Recent Tweets:' print '--------------------------------------------------------------------------' # Sentiment analysis for status in tweets['statuses']: texts.append(status['text']) print 'Tweet:' print '\t' + status['text'].encode('utf-8') vs = vaderSentiment(status['text'].encode('utf-8')) print 'Sentiment analysis:' print '\t' + str(vs['compound']) print '--------------------------------------------------------------------------' print '\nLexical Analysis for @CocaCola\'s Most Recent Tweets:' print '--------------------------------------------------------------------------' # Lexical analysis for text in texts: print 'Tweet:' print '\t' + text.encode('utf-8') words = [] for w in text.split(): words.append(w) print 'Lexical diversity:'
def analyze(path,output_path): k=open(path,'r') o=open(output_path,'w') for url in k: print url split_lines = url.split('/') date=extract_time_fox_news(url) if date!=0: result = [] result.append(url.strip('\n').strip('\r')) if date>=20150101 and ('news' in split_lines or 'politics' in split_lines): total_paragraph = 0 print date result.append(str(date)) sentences = [] page = requests.get(url.strip("\n")) soup = BeautifulSoup(page.content) total_score=0 temp = soup.find_all('article', {"itemprop": "articleBody"}) header = soup.find_all('em') first_p = soup.find_all('br') for x in header: start = [] for y in x.contents: if exclude_text not in str(y.string): start.append(str(y.string)) texts= "".join(start) if texts != " " and texts != "\n" and texts != "" and texts != "\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts != 'None': print texts sentences.append(texts) zen = vaderSentiment(unicode(texts)) print zen total_paragraph += 1 if zen['neu'] < float(1): if zen['pos'] > zen['neg']: total_score += zen['pos'] else: total_score -= zen['neg'] for x in first_p: if str(x.next_sibling) != " " and exclude_text not in str(x.next_sibling): texts= str(x.next_sibling) if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None': print texts sentences.append(texts) zen = vaderSentiment(unicode(texts)) print zen total_paragraph += 1 if zen['neu'] < float(1): if zen['pos'] > zen['neg']: total_score += zen['pos'] else: total_score -= zen['neg'] for x in temp: for y in x.contents: texts=unicode(y.string).strip('\n') if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None': print texts sentences.append(texts) zen = vaderSentiment(unicode(texts)) print zen total_paragraph += 1 if zen['neu'] < float(1): if zen['pos'] > zen['neg']: total_score += zen['pos'] else: total_score -= zen['neg'] rake_object = RAKE.Rake('stop-word-list.txt') keywords = rake_object.run("\n".join(sentences)) trump_prob = 0 hillary_prob = 0 for x in keywords: # print x if 'trump' in x[0] or 'donald' in x[0]: # print x trump_prob += int(x[1]) if 'hillary' in x[0] or 'clinton' in x[0]: # print x hillary_prob += int(x[1]) print url.strip("\n") print "Trump total ", trump_prob print "Hillary total ", hillary_prob if abs(trump_prob - hillary_prob) <= 2: result.append("B") elif trump_prob > hillary_prob: result.append("T") else: result.append("H") # zen=vaderSentiment("\n".join(sentences)) # if zen['neu']<0.5: # if zen['pos']>zen['neg']: # total_score=zen['pos'] # else: # total_score=zen['neg'] # total = neg + neu + pos # if total!=0: if total_paragraph != 0: print "Total: ", total_score / total_paragraph print "Total paragraph: ", total_paragraph # print "Neg: ", neg # print "Neu: ", neu # print "Pos: ", pos print "\n" # result.append(str(neg)) # result.append(str(neu)) # result.append(str(pos)) result.append(str(total_score / total_paragraph)) # print result o.writelines(", ".join(result) + "\n")
from datetime import datetime from docopt import docopt from twitterframework import TwitterAPI from vaderSentiment.vaderSentiment import sentiment as vaderSentiment import win_unicode_console as win_unicode_console if __name__ == '__main__': arguments = docopt(__doc__, version='Twitter Stream DB 1.0') twitter_fw = TwitterAPI(arguments['<file>']) twitter_api = twitter_fw.getAPI() keylist = [key for key in arguments['<TwitterKeyword>']] rows = [] win_unicode_console.enable() # allows printing unicode to windows console for tweet in twitter_api.GetStreamFilter(follow=None, track=keylist, locations=None, delimited=None, stall_warnings=None): t_lang = tweet['lang'] t_text = tweet['text'] vs = vaderSentiment(t_text) for i, searchtext in enumerate(keylist): if searchtext in t_text: print("'{0}','{1}','{2}','{3}','{4}','{5}'".format( str(datetime.now()), str(t_text), searchtext, str(t_lang), vs['pos'], vs['neg']))
"Warren Beatty has never been so entertaining.", "I won't say that the movie is astounding and I wouldn't claim that \ the movie is too banal either.", "I like to hate Michael Bay films, but I couldn't fault this one", "It's one thing to watch an Uwe Boll film, but another thing entirely \ to pay for it", "The movie was too good", "This movie was actually neither that funny, nor super witty.", "This movie doesn't care about cleverness, wit or any other kind of \ intelligent humor.", "Those who find ugly meanings in beautiful things are corrupt without \ being charming.", "There are slow and repetitive parts, BUT it has just enough spice to \ keep it interesting.", "The script is not fantastic, but the acting is decent and the cinematography \ is EXCELLENT!", "Roger Dodger is one of the most compelling variations on this theme.", "Roger Dodger is one of the least compelling variations on this theme.", "Roger Dodger is at least compelling as a variation on the theme.", "they fall in love with the product", "but then it breaks", "usually around the time the 90 day warranty expires", "the twin towers collapsed today", "However, Mr. Carter solemnly argues, his client carried out the kidnapping \ under orders and in the ''least offensive way possible.''" ] sentences.extend(tricky_sentences) #sid = SentimentIntensityAnalyzer() for sentence in sentences: print sentence, ss = vaderSentiment(sentence) print "\t" + str(ss) print ""
def get_sentiment(sentence): sentiments = max(vaderSentiment(sentence) .iteritems(), key=operator.itemgetter(1)) if sentiments[0] != "compound": return sentiments[0] else: return sentiments[1]
def getSentiment(text): vs = vaderSentiment(text.encode('utf-8')) return vs['compound']