def strong_sentiment(review): count = 0 for x in utils.sentiment(review.review): if x['neg'] > 0.5: count -=1 elif x['pos'] > 0.5: count +=1 return count
def df_transform(self, terms): self.df[pd.isnull(self.df['Comment'])] = "" self.df = self.df.drop_duplicates('Comment') self.df['date'] = self.df['date'].apply(lambda x : unix_convert(x)) self.df['Comment'] = self.df['Comment'].apply(lambda x: clean_text(str(x))) self.df['Sentiment_raw'] = self.df.apply(lambda row: sentiment(row['Comment']), axis = 1) self.df['Sentiment'] = self.df.apply(lambda row: sentiment_new(row['Comment'], terms), axis = 1) self.df['State'] = self.df.apply(lambda row: state_label(str(row['Locations'])), axis = 1) self.df = pd.merge(self.df, self.longlat, how='left', on='State')
def tweet_processor(self, tweet, flag, pipe): self.totCount += 1 self.logger.debug( f"Pipe: {pipe} | Processing Tweet ID: {tweet.id} | User ID: {tweet.author.id}") # Consider the original Tweet of a retweet. if hasattr(tweet,'retweeted_status'): tweet = tweet.retweeted_status if tweet.id_str not in self.twt_db.db and tweet.id not in self.history['tweet']: self.history['tweet'].add(tweet.id) valid,json_tweet = self.check_location(tweet) json_tweet = filter_tweet(json_tweet, self.filterKeys) if valid and check_relevance(json_tweet['full_text'],self.searchTermsList): # Add relevant tweet to database with relevant tags if valid: json_tweet['keywords'],json_tweet['hashtags'] = extract_keywords(json_tweet['full_text']) json_tweet['sentiment'] = sentiment(json_tweet['full_text']) json_tweet['relevance'] = True if self.twt_db.save(json_tweet): self.logger.info(f'Pipe: {pipe} | Saving Tweet ID: {json_tweet["id"]} | Database: twt_db') self.twtCount += 1 self.validTwtCount += 1 else: json_tweet['relevance'] = False # Add tweet to database with normal tags if self.twt_db.save(json_tweet): self.logger.info(f'Pipe: {pipe} | Saving Tweet ID: {json_tweet["id"]} | Database: twt_db') self.twtCount += 1 self.add_user_to_queue(json_tweet['user'], flag, pipe) self.logger.info(f'Pipe: {pipe} | Count: Valid - {self.validTwtCount} | {self.searchState} - {self.twtCount} | Total - {self.totCount}') return True else: return False else: self.logger.debug( f"Pipe: {pipe} | Skipping Tweet ID: {tweet.id} as already processed.") return False
def sentiment_variance(review): """variance of compound sentiment polarities between sentences in review""" polarities = utils.sentiment(review.review) return numpy.var([x['compound'] for x in polarities],dtype=numpy.float64)
def raw_sentiment(review): return sum(x['compound'] for x in utils.sentiment(review.review))
#TOKENIZE EACH SENTENCE sentence_preprocess = lambda row: list( set(bigrammer[preprocess1(row.sentence)]).intersection(set(row.tokened))) one_sentences['sentence_tokens'] = one_sentences.apply( lambda row: sentence_preprocess(row), axis=1) print('Tokenized sentences:', round(time() - t, 2), 's') one_sentences.head() # DROP 'TOKENED', WE DON'T NEED IT ANYMORE one_sentences.drop('tokened', axis=1, inplace=True) # SENTIMENT ANALYSIS #----------->>> CAN GO BACK AND ADJUST POLARITY SCORE # GET polarity of each sentence #let stars skew polarity one_sentences['sentence_polarity'] = one_sentences.apply(lambda row: sentiment( row.sentence, row.stars, PNthresholds, star_importance), axis=1) print('Analyzed sentiment:', round(time() - t, 2), 's') # ......toks_sentDF...........FIRST BIG DF TO KEEP AND COME BACK TO # EXPLODE DATAFRAME INTO KEYWORD-SENTENCE PAIRS tok_sentDF = pd_explode(one_sentences, 'sentence_tokens', 'token') print('Exploded into every token-sentence combo:', round(time() - t, 2), 's') # CREATE WORD-LEVEL SENTIMENT POLARITY TABLE, Grouping by keyword toksP = tok_sentDF[tok_sentDF['sentence_polarity'] > 0].groupby('token').agg({ 'review_id': list, 'sentence': list, 'stars':
def login(): prob = '' if request.method == 'POST': tag = request.form['input'] prob = sentiment(tag) return render_template('index.html', probability = prob)