예제 #1
0
def strong_sentiment(review):
    count = 0
    for x in utils.sentiment(review.review):
        if x['neg'] > 0.5:
            count -=1
        elif x['pos'] > 0.5:
            count +=1

    return count
예제 #2
0
 def df_transform(self, terms):    
     self.df[pd.isnull(self.df['Comment'])] = ""
     self.df = self.df.drop_duplicates('Comment')
     self.df['date'] = self.df['date'].apply(lambda x : unix_convert(x))
     self.df['Comment'] = self.df['Comment'].apply(lambda x: clean_text(str(x)))
     self.df['Sentiment_raw'] = self.df.apply(lambda row: sentiment(row['Comment']), axis = 1)
     self.df['Sentiment'] = self.df.apply(lambda row: sentiment_new(row['Comment'], terms), axis = 1)
     self.df['State'] = self.df.apply(lambda row: state_label(str(row['Locations'])), axis = 1)
     self.df = pd.merge(self.df, self.longlat, how='left', on='State')
예제 #3
0
    def tweet_processor(self, tweet, flag, pipe):
        self.totCount += 1
        self.logger.debug(
            f"Pipe: {pipe} | Processing Tweet ID: {tweet.id} | User ID: {tweet.author.id}")

        # Consider the original Tweet of a retweet.
        if hasattr(tweet,'retweeted_status'):
            tweet = tweet.retweeted_status
        
        if tweet.id_str not in self.twt_db.db and tweet.id not in self.history['tweet']:
            
            self.history['tweet'].add(tweet.id)
            valid,json_tweet = self.check_location(tweet)
            json_tweet = filter_tweet(json_tweet, self.filterKeys)

            if valid and check_relevance(json_tweet['full_text'],self.searchTermsList):
                # Add relevant tweet to database with relevant tags
                if valid:  
                    json_tweet['keywords'],json_tweet['hashtags'] = extract_keywords(json_tweet['full_text'])
                    json_tweet['sentiment'] = sentiment(json_tweet['full_text'])
                    json_tweet['relevance'] = True
                    if self.twt_db.save(json_tweet):
                        self.logger.info(f'Pipe: {pipe} | Saving Tweet ID: {json_tweet["id"]} | Database: twt_db')
                        self.twtCount += 1
                        self.validTwtCount += 1
                else:
                    json_tweet['relevance'] = False
                    # Add tweet to database with normal tags
                    if self.twt_db.save(json_tweet):
                        self.logger.info(f'Pipe: {pipe} | Saving Tweet ID: {json_tweet["id"]} | Database: twt_db')
                        self.twtCount += 1

                self.add_user_to_queue(json_tweet['user'], flag, pipe)
                self.logger.info(f'Pipe: {pipe} | Count: Valid - {self.validTwtCount} | {self.searchState} - {self.twtCount} | Total - {self.totCount}')
                return True
            else:
                return False
        else:
            self.logger.debug(
                f"Pipe: {pipe} | Skipping Tweet ID: {tweet.id} as already processed.")
            return False
예제 #4
0
def sentiment_variance(review):
    """variance of compound sentiment polarities between sentences in review"""
    polarities = utils.sentiment(review.review)
    return numpy.var([x['compound'] for x in polarities],dtype=numpy.float64)
예제 #5
0
def raw_sentiment(review):
    return sum(x['compound'] for x in utils.sentiment(review.review))
#TOKENIZE EACH SENTENCE
sentence_preprocess = lambda row: list(
    set(bigrammer[preprocess1(row.sentence)]).intersection(set(row.tokened)))
one_sentences['sentence_tokens'] = one_sentences.apply(
    lambda row: sentence_preprocess(row), axis=1)
print('Tokenized sentences:', round(time() - t, 2), 's')
one_sentences.head()

# DROP 'TOKENED', WE DON'T NEED IT ANYMORE
one_sentences.drop('tokened', axis=1, inplace=True)

# SENTIMENT ANALYSIS
#----------->>> CAN GO BACK AND ADJUST POLARITY SCORE
# GET polarity of each sentence #let stars skew polarity
one_sentences['sentence_polarity'] = one_sentences.apply(lambda row: sentiment(
    row.sentence, row.stars, PNthresholds, star_importance),
                                                         axis=1)
print('Analyzed sentiment:', round(time() - t, 2), 's')

# ......toks_sentDF...........FIRST BIG DF TO KEEP AND COME BACK TO
# EXPLODE DATAFRAME INTO KEYWORD-SENTENCE PAIRS
tok_sentDF = pd_explode(one_sentences, 'sentence_tokens', 'token')
print('Exploded into every token-sentence combo:', round(time() - t, 2), 's')

# CREATE WORD-LEVEL SENTIMENT POLARITY TABLE, Grouping by keyword
toksP = tok_sentDF[tok_sentDF['sentence_polarity'] > 0].groupby('token').agg({
    'review_id':
    list,
    'sentence':
    list,
    'stars':
예제 #7
0
def login():
    prob = ''
    if request.method == 'POST':
        tag = request.form['input']
        prob = sentiment(tag)
    return render_template('index.html', probability = prob)