示例#1
0
class SentimentScore:
    """Vader sentiment score."""
    analyzer = vader.SentimentIntensityAnalyzer()

    def __init__(self, text: str, score: ScoreDict = None) -> None:
        """Create a new sentiment score."""
        if score is None:
            score = SentimentScore.analyzer.polarity_scores(text)
        self.text = text
        self.score = score

    def __repr__(self) -> str:
        """Return a serialized, machine-readable representation of self."""
        return f"SentimentScore({repr(self.text)}, score={repr(self.score)})"

    def __str__(self) -> str:
        """Return a human-readable string representation of self."""
        return '\n'.join((repr(self), repr(self.score)))

    def to_dict(self) -> Dict[str, Any]:
        """Return a dictionary including, sentiment scores and bar color."""
        score: Dict[str, Any] = self.score.copy()
        score['color'] = self.color
        score['text'] = self.text
        return score

    @property
    def color(self) -> RGBColor:
        """Return (red, green, blue) indicating sentiment."""
        red = self.score['neg']
        green = self.score['pos']
        blue = self.score['neu']
        return (red, green, blue)
示例#2
0
def sentiment_analyzer(reviews_collection_text):
    sia = vader.SentimentIntensityAnalyzer()
    with open('data/reviews_%s' % reviews_collection_text, 'r') as f:
        comments = f.readlines()
    pos_comments = [
        comment for comment in comments
        if sia.polarity_scores(comment)['compound'] > 0
    ]
    neg_comments = [
        comment for comment in comments
        if sia.polarity_scores(comment)['compound'] < 0
    ]
    neu_comments = [
        comment for comment in comments
        if comment not in pos_comments and comment not in neg_comments
    ]
    with open('data/reviews_%s_pos' % reviews_collection_text, 'w') as f:
        for pos_comment in pos_comments:
            f.write('%s' % pos_comment)
    with open('data/reviews_%s_neg' % reviews_collection_text, 'w') as f:
        for neg_comment in neg_comments:
            f.write('%s' % neg_comment)
    with open('data/reviews_%s_neu' % reviews_collection_text, 'w') as f:
        for neu_comment in neu_comments:
            f.write('%s' % neu_comment)
示例#3
0
def extract_sentiment(dataframe):
    """
    This function scores each word token in the tweet, combines
    the scores within a tweet to give an overall sentiment score.
    The scores for each tweet is appended as a new column 'sentiment_score'
    to the supplied dataframe and returned.

    Sentiment is extracted using the `Vader sentiment intensity
    analyzer`. A sentiment indicates the view or opinion
    expressed. This opinion can be positive or negative.
    It is represented as a floating point decimal, where 0
    indicates neutral sentiment, negative values for negative
    sentiments and positive for positive sentiments.

    Args:
        dataframe (pandas.DataFrame):
            The dataframe that holds the twitter data.
            The dataframe must contain the processed_text column.

    Returns:
        pd.DataFrame:
        A dataframe that holds the twitter data. Including a new
        column 'sentiment_score' holding the sum of compounded
        word tokens sentiment scores of processed_text.
    """
    sia = vd.SentimentIntensityAnalyzer()
    dataframe['sentiment_score'] = dataframe['processed_text']\
    .apply(
        lambda x: sum([
            sia.polarity_scores(i)['compound']
            for i in word_tokenize( ' '.join(x) )
        ])
    )
    return dataframe
示例#4
0
    def _get_title_negativity(self, article_title):

        analyzer = vader.SentimentIntensityAnalyzer()

        scores = analyzer.polarity_scores(str(article_title))

        return (scores['neg'])
示例#5
0
def extract_sentiment_stemmed(dataframe):
    """
    Extracts the sentiment score from the stemmed token words
    in the `stemmed` column of the dataframe.

    Args:
        dataframe (pandas.DataFrame):
            a DataFrame that holds the twitter data.
            requires the `stemmed` column.

    Returns:
        pandas.DataFrame:
            a DataFrame that holds the twitter data and
            contains the 'sentiment_score_stemmed' column.

    Raises:
        Exception: unable to process sentiment_score_stemmed when
        `stemmed` column is not found.
    """
    if 'stemmed' not in dataframe.columns.tolist():
        raise Exception('Unable to process sentiment_score_stemmed\
        when `stemmed` column is not found.')
    sia = vd.SentimentIntensityAnalyzer()
    dataframe['sentiment_score_stemmed'] = dataframe['stemmed']\
    .apply(
        lambda x: sum([
            sia.polarity_scores(i)['compound']
            for i in word_tokenize( ' '.join(x) )
        ]))
    return dataframe
示例#6
0
def conductSentimentAnalysis(allDict):

    nltk.download('vader_lexicon')
    senti = vader.SentimentIntensityAnalyzer()

    for val in tqdm(list(allDict.values()), desc="Analyze Sentiment"):
        val["sentiment"] = senti.polarity_scores(val["collatedContents"])
    return
示例#7
0
    def __init__(self):
        self.analyzer = sent.SentimentIntensityAnalyzer()

        self.raw_tweets = None
        self.get_tweets()
        self.raw_tweets = self.raw_tweets[:10000]

        self.train_labels = None
        self.test_labels = None
示例#8
0
def mapping_func(line):
    # if 'text' in l:
    compound = float(vader.SentimentIntensityAnalyzer() \
                     .polarity_scores(line)['compound'])
    if compound < -.25:
        return (0, 0, 1, 1)
    elif compound > .25:
        return (1, 0, 0, 1)
    else:
        return (0, 1, 0, 1)
示例#9
0
def get_vader_polarity(vader_context):
    '''
    Calculate the sentiment polarity of a comment using VADER sentiment scoring
    
    Output = Sentiment Polarity
    '''
    if vader_context == None:
        return np.nan
    vader_sent = vader.SentimentIntensityAnalyzer()
    return vader_sent.polarity_scores(vader_context)['compound']
示例#10
0
def nltk_vader(query, category=None, top=None, bottom=None):
    '''
    Core sentiment analysis. Given query object uses NLTK Vader
    SentimentIntensityAnalyzer to define sentiment for each tweet. If domain is
    given, incorporates supplemental information from Naive Bayes Classifier.
    Assigns to query object avg rating and a normalized and unnormalized rating
    to each tweet object
    '''
    #Original code
    if category == 'movie':
        with open('get_rating/movie_terms.json') as f:
            movie_terms = json.load(f)
    sid = vd.SentimentIntensityAnalyzer()
    avg_rating = 0
    num_valenced = 0
    best_score = 0
    worst_score = 0
    X_max = 85
    X_min = 15
    for tweet in query.tweets:
        scores = sid.polarity_scores(tweet.text)
        tweet.rate = scores['compound']
        if category == 'movie':
            for word in tweet.text.lower().split():
                if word in movie_terms:
                    tweet.rate += movie_terms[word]
        tweet.norm_rate = (((tweet.rate * 50 + 50) - X_min) /
                           (X_max - X_min)) * 100
        if tweet.rate > best_score:
            best = tweet
            best_score = tweet.rate
        elif tweet.rate < worst_score:
            worst = tweet
            worst_score = tweet.rate
        avg_rating += tweet.rate
        num_valenced += (tweet.rate != 0)

    if top or bottom:
        tweets_sorted = sorted(query.tweets, key=lambda x: x.norm_rate)
        if top:
            for t in tweets_sorted[-10:]:
                query.top_tweets.append((t.text, t.norm_rate))
        if bottom:
            for t in tweets_sorted[:10]:
                query.bottom_tweets.append((t.text, t.norm_rate))

    if num_valenced == 0:
        pass
    else:
        query.avg_rate = (avg_rating / float(num_valenced)) * 50 + 50
        X_std = (query.avg_rate - X_min) / (X_max - X_min)
        query.avg_rate = X_std * 100
        query.best = best
        query.worst = worst
 def post(self):
     args = parser.parse_args()
     vaderObj = vader.SentimentIntensityAnalyzer()
     res = vaderObj.polarity_scores(args.statement)
     print res['compound']
     if res['compound'] < 0:
         return {'res': 'Sentiment is negative'}, 201
     elif res['compound'] > 0:
         return {'res': 'Sentiment is positive'}, 201
     else:
         return {'res': 'Sentiment is neutral'}, 201
示例#12
0
def combineTopicSentiment(patDF, noteDF, noteCol, docTopics, nTopic):
    # standard sentiment analysis using textblob
    sid = vader.SentimentIntensityAnalyzer()
    return pd.concat([
        patDF,
        pd.DataFrame(docTopics,
                     columns=["topic_" + str(i) for i in range(nTopic)
                              ]), noteDF[noteCol].apply(analyze_sentiment),
        noteDF[noteCol].apply(lambda x: analyze_vader_sent(x, sid))
    ],
                     axis=1)
示例#13
0
 def process(self, document_field):
     '''Added sentiment based on Vader'''
     try:
         senti = vader.SentimentIntensityAnalyzer()
         sentimentscores = senti.polarity_scores(document_field)
         return sentimentscores
     except LookupError:
         from nltk import download
         download('vader_lexicon')
         logger.error(
             "Couldn't find Vader Lexicon, downloaded it\nYou will have to re-run the processor"
         )
def sentiment_cal(title,intro,body):
    
    sia = vad.SentimentIntensityAnalyzer()
    tscore = sia.polarity_scores(title)
    iscore= sia.polarity_scores(intro)
    bscore= sia.polarity_scores(body)
    tscore = float(tscore['neg'])
    iscore = float(iscore['compound'])
    bscore = float(bscore['compound'])
    #max_pos,max_neg=1.0,-1.0
    score=(tscore+iscore+bscore)
    
    return round(score,2)
示例#15
0
def determineSentiment(texts):
    """
    Determines the sentiment of a tweet based on the vader module

    :param text:        Tweet as a string
    :return:            Returns a dict of {neg:x, neu:y, pos:z, compound:w}
    """
    sentiment = []
    for text in texts:
        # Adding 1.0 because Naive Bayes doesnt do well with negative values.
        sentiment.append(vader.SentimentIntensityAnalyzer().polarity_scores(
            text)['compound'] + 1.0)
    return sparse.csr_matrix(sentiment, dtype='float').T
    def getSentiment(self):
        
        self.tweetSentiment = self.tweetDf[['date', 'tweet']]
        if self.lang == 'pt':
            s = leia.SentimentIntensityAnalyzer()
            self.tweetSentiment['textPolarity'] = self.tweetSentiment.tweet.apply(lambda frase: s.polarity_scores(frase)['compound'])
        elif self.lang == 'en':
            a = vader.SentimentIntensityAnalyzer()
            self.tweetSentiment['textPolarity'] = self.tweetSentiment.tweet.apply(lambda frase: a.polarity_scores(frase)['compound'])
        else:
            raise Exception('Please type \'pt\' for Portuguese or \'en\' for English')

        del self.tweetSentiment['tweet']
def get_review_sentiments(positive_reviews, negative_reviews):
    sia = vader.SentimentIntensityAnalyzer()
    positive_compound_scores = [
        sentiment_calculator(sia, positive_review)
        for positive_review in positive_reviews
    ]
    negative_compound_scores = [
        sentiment_calculator(sia, negative_review)
        for negative_review in negative_reviews
    ]
    return {
        'results-on-positive': positive_compound_scores,
        'results-on-negative': negative_compound_scores
    }
示例#18
0
def sentimentListCalculation(feedbacks):
    pos = {}
    neg = {}
    sia = vader.SentimentIntensityAnalyzer()

    for fd in feedbacks:
        score = sia.polarity_scores(fd)
        count = len(word_tokenize(fd))
        if (score['compound'] > 0):
            pos[fd] = score['pos']
        else:
            neg[fd] = score['neg']

    return (pos, neg)
示例#19
0
    def create_ml_features(self):
        '''
        Create features from these textual data
        '''

        for coinDetail in self.detailsList:
            savePath = os.path.join(self.historic_path.format(coinDetail['coinname']), "interpreted/data-{}.csv".format(self.algo_name))

            if (not(os.path.isfile(savePath))):
                path = os.path.join(self.historic_path.format(coinDetail['coinname']), "raw")

                combinedCsv = os.path.join(path, "combined.csv")

                if os.path.isfile(combinedCsv):
                    df = pd.read_csv(combinedCsv, lineterminator='\n')
                    self.logger.info("CSV file read for {}".format(coinDetail['coinname']))

                    df['Tweet'] = cleanData(df['Tweet'])
                    self.logger.info("Tweets Cleaned")
                    
                    df['Time'] = pd.to_datetime(df['Time'], unit='s')
                    df = df.set_index('Time')
                    
                    self.logger.info("Calculating sentiment")
                    analyzer = vader.SentimentIntensityAnalyzer()
                    df['sentiment'] = df['Tweet'].swifter.apply(applyVader, analyzer=analyzer)

                    self.logger.info("Now calculating features")
                    df = applyParallel(df.groupby(pd.Grouper(freq='H')), self.f_add_features)
                    df['Time'] = pd.date_range(df['Time'].iloc[0], df['Time'].iloc[-1], periods=df['Time'].shape[0])
                    self.logger.info("Features Calculated")
                    
                    df['variation_all'] = df['n_bullish_all'].diff()
                    df = df.drop(['n_bullish_all', 'n_bearish_all'], axis=1)
                    df['mean_vader_change_top'] = df['mean_vader_top'].diff()
                    #add botorNot too
                    df = trends_ta(df, 'mean_vader_top')
                    df = trends_ta(df, 'mean_vader_all')
                    df = df.replace(np.inf, 0)
                    df = df.replace(-np.inf, 0)
                    df = df.replace(np.nan, 0)
                    
                    df.to_csv(savePath, index=None)
                    self.logger.info("Added all features. Saved to {}".format(savePath))
                else:
                    self.logger.info("{} does not exists so skipping".format(combinedCsv))
            else:
                self.logger.info("Using the cached file from {}".format(savePath))
示例#20
0
def plot_trend(res_fname, res_name):
    fname = 'project/reviews_data/{}.html'.format(res_fname)
    df = pd.read_html(fname, header=0)[0]
    df['date'] = pd.to_datetime(df['date'])
    senti = vader.SentimentIntensityAnalyzer()
    df["sentiment"] = df["text"].apply(
        lambda x: senti.polarity_scores(x)["compound"])
    # print("Pearsons coeff", round(pearsonr(df["sentiment"], df["stars"])[0], 2))
    df['year'], df['month'] = df['date'].dt.year, df['date'].dt.month
    df = df[df['year'] >= 2015]
    mean_stars = df.groupby(['year', 'month'], as_index=False).mean()
    mean_stars1 = df.groupby(['year', 'month'],
                             as_index=False)[['stars', 'sentiment'
                                              ]].agg({"mean_score": "mean"})
    mean_final = mean_stars[['year', 'month', 'sentiment', 'stars']]
    mean_final['period'] = mean_final['year'].astype('str').str.cat(
        mean_final['month'].astype('str'), sep='-')

    scaler = StandardScaler()
    mean_final['stars'] = scaler.fit_transform(
        np.expand_dims(mean_final['stars'], axis=1))
    mean_final['sentiment'] = scaler.fit_transform(
        np.expand_dims(mean_final['sentiment'], axis=1))
    f, ax = plt.subplots(figsize=(100, 20))

    x_col = 'period'

    sns.pointplot(ax=ax, x=x_col, y='stars', data=mean_final, color='blue')
    sns.pointplot(ax=ax, x=x_col, y='sentiment', data=mean_final, color='red')

    plt.yticks([])
    plt.ylabel('')
    plt.xlabel('')

    ax.legend(handles=ax.lines[::len(mean_final) + 1],
              labels=["Star Rating", "Sentiment"],
              fontsize=70)

    ax.set_xticklabels(
        [t.get_text().split("T")[0] for t in ax.get_xticklabels()])
    plt.tick_params(labelsize=50)

    plt.gcf().autofmt_xdate()

    plt.title('Trend of Star Rating and Sentiment for ' + res_name,
              fontsize=80)
    plt.savefig("images/" + res_fname)
    print(res_fname, pearsonr(mean_final['stars'], mean_final['sentiment'])[0])
示例#21
0
def get_sentiment(response):
    processed_response = response.replace('"','')
    if vader.negated(processed_response):
        sentiment = 'neg'
        return 'neg'
    sent = vader.SentimentIntensityAnalyzer()
    score = sent.polarity_scores(processed_response)
    sent_list = ['neg','neutral','pos']
    neg = score['neg']
    neutral = score['neu']
    pos = score['pos']
    sent_score = [neg,neutral,pos]
    big_score = max(sent_score)
    index = sent_score.index(big_score)
    sentiment = sent_list[index]
    return sentiment
示例#22
0
    def __sentiment(self):

        sid = vader.SentimentIntensityAnalyzer()
        score = sid.polarity_scores(self.poemOutput.cget("text"))['compound']

        if (score >= 0.4):
            print(f"score:{score}, positive")
            self.poemOutput.config(bg="#d9e7d9")
        elif (score < 0.4 and score > -0.4):
            print(f"score:{score}, neutral")
            self.poemOutput.config(bg="#e6e6e6")
        else:
            print(f"score:{score}, negative")
            self.poemOutput.config(bg="#fcddde")

        return None
    def __init__(self):
        self.analyzer = sent.SentimentIntensityAnalyzer()

        self.raw_tweets = None

        self.train_labels = None
        self.test_labels = None

        self.nos_train_neu = 0
        self.nos_train_pos = 0
        self.nos_train_neg = 0

        self.nos_test_neu = 0
        self.nos_test_neg = 0
        self.nos_test_pos = 0

        self.get_tweets()
示例#24
0
def getPosAndNegWords(tweet):
    """
    Finds positive and negative boosted words in the tweet

    :param tweet:   Tweet as a string
    :return:        Returns a list with two floats between 0 and 1. First element represent pos and second repr neg
    """
    pos = 0
    neg = 0
    a = vader.SentimentIntensityAnalyzer()
    for word in word_tokenize(tweet):
        if a.polarity_scores(word)['pos'] > 0.9:
            pos += 1
        if a.polarity_scores(word)['neg'] > 0.9:
            neg += 1

    l = float(len(word_tokenize(tweet)))
    return [pos / l, neg / l]
示例#25
0
def create_sentiment():
    sa = vader.SentimentIntensityAnalyzer()
    import unicodecsv as csv

    comments_with_sentiment = (
        toolz.merge(
            dict(
                id=d["id"],
                time=dt.datetime.utcfromtimestamp(float(d["created_utc"])).strftime("%Y-%m-%d %H:%M:%S"),
                ups=d["ups"],
                contr=d["controversiality"]
            ),
            toolz.keymap(
                lambda x: "vader_" + x,
                sa.polarity_scores(d["body"])
            ),
            dict(zip(
                ["pattern_polarity", "pattern_subjectivity"],
                pattern.sentiment(d["body"])
            ))
        )
        for d in corpus.load_json(
                p=None,  # just do all
                include_is=True,
                include_oos=True,
                #filter_deleted=False
        )
    )

    with open("data-sentiment/sentiment.csv", "w") as o:
        c = comments_with_sentiment.next()
        writer = csv.DictWriter(o, c.keys())
        writer.writeheader()
        writer.writerow(c)
        for c in comments_with_sentiment:
            writer.writerow(c)
        return 'Positive'


df['Analysis'] = df['Polarity'].apply(get_analysis)

df.tail(20)

import nltk.sentiment.vader as vd
from nltk import download
download('vader_lexicon')

# In[8]:

#Calculating Sentiment Score

sia = vd.SentimentIntensityAnalyzer()

from nltk import download
download('punkt')
from nltk.tokenize import word_tokenize
df['sentiment_score'] = df['processed_text'].apply(lambda x: sum(
    [sia.polarity_scores(i)['compound'] for i in word_tokenize(' '.join(x))]))
df[['processed_text', 'sentiment_score']].head(n=10)

# In[9]:

#########################################################################################################################
#
#
##################################            Text Visualization/ WordCloud                ##############################
#
示例#27
0
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row, SparkSession
import datetime
from nltk.corpus import stopwords
from nltk.sentiment import vader
from nltk.corpus import words

import string


printable = set(string.printable)
stop_words = stopwords.words('english')
english_words = words.words()
analyzer = vader.SentimentIntensityAnalyzer()


def analyse(sentence):
    #for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    compound = float(vs['compound'])
    score = ""
    #max_val =max([float(vs['neg']),float(vs['pos']),float(vs['neu'])])
    if compound >= 0.05:
        score = "positive"
    if compound > -0.05 and compound < 0.05:
        score = "neutral"
    if compound <= -0.05:
        score = "negative"

示例#28
0
 def VaderSentimentIntensityAnalyzer(self,
                                     text):  #Vader Sentiment Analysis.
     sid = vader.SentimentIntensityAnalyzer()
     ss = sid.polarity_scores(text)
     result = ss
     return result
# Find all concepts whose polarity is different for NLTK and senticNet
#The same file is modified for both +ve and -ve by replacing pos <-> neg
import nltk.sentiment.vader as sentiment
sn = sentiment.SentimentIntensityAnalyzer()  # Sentiment Analyser for NLTK
neg_corpus = []

# Stores all the pos/neg concepts in SenticNet and store in an array
with open("neg_word_corpus") as neg_corpus_file:
    for line in neg_corpus_file:
        word = line[:-1]
        neg_corpus.append(word)

collision_file = open('colliding_polarity',
                      'w')  # The file has been renamed to nltk_collisions_N/P

for term in neg_corpus:
    nltk_polarity = sn.polarity_scores(term)
    if nltk_polarity['pos'] > nltk_polarity['neg']:  # have opposite polarity
        print nltk_polarity
        collision_file.write(term + "<-P\n")  # store the result
示例#30
0
def calculate_vader(tweet):

    if type(float) == type(tweet):
        return 0
    sia = vader.SentimentIntensityAnalyzer()
    return sia.polarity_scores(tweet)['compound']