Пример #1
1
def test_score():
    afinn = Afinn()
    score = afinn.score('bad')
    assert score < 0

    score = afinn.score('')
    assert score == 0.0
Пример #2
0
        def sentiment_analysis(self, sentences, freqs=True):
            '''
			Determines sentiment score PER MESSAGE, NOT per sentence
			Scores are frequencies
			'''
            afinn = Afinn()
            sentiment_summary = defaultdict(int)

            for sentence in sentences:
                sentiment = afinn.score(sentence)
                if sentiment == 0.0:
                    sentiment_summary['neutral'] += 1
                elif sentiment > 0.0:
                    sentiment_summary['positive'] += 1
                else:
                    sentiment_summary['negative'] += 1

            if freqs:
                sentiment_summary[
                    'neutral'] = sentiment_summary['neutral'] / len(sentences)
                sentiment_summary['positive'] = sentiment_summary[
                    'positive'] / len(sentences)
                sentiment_summary['negative'] = sentiment_summary[
                    'negative'] / len(sentences)

            return sentiment_summary
Пример #3
0
def insert_tweets(connection, tweets):
    """
    Inserts tweets into a database connection
    :param connection: database connection
    :param tweets: list of tweets
    :return: None
    """
    cursor = connection.cursor()
    analysis = Afinn()
    for _, tweet in tweets.iterrows():

        sql = ("INSERT INTO Handle(HandleId, Username)\n" "VALUES(?, ?);")
        values = (tweet['user_id'], tweet['screen_name'])
        try:
            cursor.execute(sql, values)
        except:
            pass  # repeat entry

        sentiment = analysis.score(tweet['text'])
        stamp = tweet['timestamp'].to_pydatetime()

        sql = (
            "INSERT INTO Tweet(TweetId, Post, Sentiment, Stamp, NumLikes, NumRetweets, HandleId)\n"
            "VALUES(?, ?, ?, ?, ?, ?, ?);")
        values = (tweet['tweet_id'], tweet['text'], sentiment, stamp,
                  tweet['likes'], tweet['retweets'], tweet['user_id'])
        try:
            cursor.execute(sql, values)
        except:
            pass  # repeat entry
Пример #4
0
    def sentiment_analysis(self, spider):
        print("in sent-anal")
        print(self.base_dir)
        afinn = Afinn()

        with open("../SCORES.txt", 'w') as outFile:
            outFile.write("SENTIMENT ANALYSIS OF THE ROOT URLs (Folders)\n"
                          "--------------------------------------------- ")

        # in each root dir (root URL)
        for d in os.listdir(self.base_dir):
            d = self.base_dir + '/' + d
            if os.path.isdir(d):
                output = ""
                dept_score = 0
                num_words = 0

                # with each file, read in and add/avg score
                for f in os.listdir(d):
                    f = d + '/' + f
                    if os.path.isfile(f):
                        with open(f, 'r') as inFile:
                            file_contents = inFile.read().replace('\n', '')
                            dept_score += afinn.score(file_contents)
                            num_words += len(file_contents.split())

                with open("../SCORES.txt", 'a') as outFile:
                    dept = d.split('/')[-1]
                    output += "\n\nDepartment       : " + dept + \
                                "\nScore            : " + str(dept_score) + \
                                "\n - Total words   : " + str(num_words) + \
                                "\n - Score per word: " + str(round(dept_score/num_words, 7))
                    outFile.write(output)
                    print("Analyzing " + dept + " and saving to...")
                    print(str(outFile))
Пример #5
0
def test_score_with_wordlist():
    afinn = Afinn()
    score = afinn.score('Rather good.')
    assert score > 0

    score = afinn.score('Rather GOOD.')
    assert score > 0
def main():

    conf = SparkConf().setAppName(APP_NAME).setMaster(SPARK_MASTER)
    sc = SparkContext(conf=conf)
    sc.setLogLevel('ERROR')
    ssc = StreamingContext(sc, BATCH_SIZE)
    ssc.checkpoint(CHECKPOINT_DIR)

    afinn = Afinn()

    topic = KafkaUtils.createDirectStream(
        ssc, KAFKA_TOPIC, {"metadata.broker.list": KAFKA_BROKERS})

    parsed = topic.map(lambda v: json.loads(v[1])) \
        .filter(lambda tweet: 'text' in tweet and len(tweet['text']) > 0) \
        .filter(lambda tweet: 'timestamp_ms' in tweet and len(tweet['timestamp_ms']) > 0) \
        .filter(lambda tweet: 'entities' in tweet and len(tweet['entities']['hashtags']) > 0 ) \
        .map(lambda t: (t['entities']['hashtags'][0]['text'].lower(), afinn.score(t['text'])))

    addFun = lambda a, b: (a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3])
    invFun = lambda a, b: (a[0] - b[0], a[1] - b[1], a[2] - b[2], a[3] - b[3])

    tags = parsed.map(lambda t: (t[0], scoreToList(t[1]))) \
                 .reduceByKeyAndWindow(addFun, invFun, WINDOW_SIZE, FREQUENCY) \
                 .transform(lambda rdd: rdd.sortBy(lambda a: -a[1][0]))

    saveTag = parsed.saveAsTextFiles(BATCH_LAYER)

    tags.pprint()

    tags.foreachRDD(
        lambda rdd: rdd.foreachPartition(lambda p: storeToRedis(p)))

    ssc.start()
    ssc.awaitTermination()
 def __init__(self, with_translation_support=False):
     self.sp_classifier = SentimentClassifier()
     self.af_classifier = Afinn(language='es')        
     self.vader_classifier = SentimentIntensityAnalyzer()
     self._download_polyglot_languages()
     if with_translation_support:
         self.translator = translate.Client()
Пример #8
0
def test_split():
    afinn = Afinn()
    words = afinn.split('Hello, World')
    assert words == ['Hello', 'World']

    words = afinn.split(u('Hell\xf8, \xc5rld'))
    assert words == [u('Hell\xf8'), u('\xc5rld')]
Пример #9
0
def compare_years():
    limit = 16493

    years = [year for year in range(1997, 2020) if year not in [2005, 2008]]
    entropies = []
    dict_size = []
    sentiment = []
    afinn = Afinn()
    for year in tqdm(years):
        words = get_words_by_year(soup, year, exclude_names)
        fdist = nltk.FreqDist(words)
        shannon_entropy = get_shannon_entropy(fdist)
        entropies.append(shannon_entropy)
        dict_size.append(len(set(words)))
        sentiment.append(afinn.score(' '.join(words)))

    # plt.figure()
    # plt.xlabel('Year')
    # plt.ylabel('Shannon Entropy')
    # plt.plot(years, entropies, 'o')
    # plt.show()
    plt.figure()
    plt.xlabel('Year')
    plt.ylabel('Sentiment')
    plt.plot(years, sentiment, 'o')
    plt.show()
Пример #10
0
def scatter_selfishness_sentiment(coach_names, player_names, lexicon):
    names = coach_names + player_names
    afinn = Afinn()

    fig, ax = plt.subplots()
    plt.ylabel('Sentiment')
    plt.xlabel('Selfishness')
    # plt.title('')
    hist_color = 'lightgreen'
    plot_form = '--g'
    histlabel = 'Coaches'
    plotlabel = 'Coaches Normal Approx. \n'

    # results = len(names)*[None]
    selfishness, sentiments = len(names) * [0], len(names) * [0]
    for i, name in enumerate(tqdm(names)):
        words = get_interviews_from(soup, name, all_together=True)
        his_selfishness = sum([lexicon[word] for word in words]) / len(words)
        his_sentiment = afinn.score(' '.join(words)) / len(words)
        # results[i] = (name, selfishness, sentiment)
        selfishness[i] = his_selfishness
        sentiments[i] = his_sentiment

    ax.scatter(selfishness, sentiments)
    for name, x, y in zip(names, selfishness, sentiments):
        ax.annotate(name, (x, y),
                    xytext=(0, 10),
                    textcoords='offset points',
                    ha='center')
    # handles, labels = ax.get_legend_handles_labels()
    # plt.legend(reversed(handles), reversed(labels), loc='upper right')
    # plt.tight_layout()
    # plt.savefig('figures/selfishness_histogram2.png')
    plt.show()
Пример #11
0
def process_sentiment(tweet):
    total = 0
    _min = 0
    _max = 0
    pos = 0
    neg = 0
    neut = 0

    afinn = Afinn(emoticons=True)
    sc = afinn.score(tweet['text'])

    total += sc

    if sc < _min:
        _min = sc
    if sc > _max:
        _max = sc
    if sc < 0:
        neg += 1
    elif sc > 0:
        pos += 1
    elif sc == 0:
        neut += 1

    return (total, _min, _max, neg, neut, pos)
Пример #12
0
    def index(tokens):
        afinn = Afinn()
        #print(len(tokens))
        dictionary = {}
        postingCount = 0
        dictionaryCount = 0
        for token in tokens:
            if token[0] not in dictionary:
                postings_list = []
                # postings_list = {} | USED WITH addToFreqList
                dictionary[token[0]] = []
                dictionary[token[0]].append(postings_list)
                dictionary[token[0]].append(1)
                dictionary[token[0]].append(afinn.score(token[0]))
                dictionaryCount = dictionaryCount + 1
            else:
                postings_list = dictionary[token[0]][0]
                if (token[1] not in postings_list):
                    dictionary[token[0]][1] = dictionary[token[0]][1] + 1
            # length = len(postings_list)
            Indexer.addToList(postings_list, token[1])
            # if(length != len(postings_list)):
            #     postingCount = postingCount + 1

        return dictionary
Пример #13
0
def calculate_department_scores():
    os.chdir("../output_500_itemcount")
    afinn = Afinn()
    afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-111.txt'),
                          word_boundary=False)

    # dictionary where the keys are departments, values are lists of tuples containing a url, doc score and doc length
    # for each document in the department
    department_scores = {}
    # loop through documents and accumulate scores for each department
    for f in glob.glob("*.jsonl"):
        with jsonlines.open(f) as reader:
            for obj in reader:
                dep = format(obj['field'])
                url = format(obj['url'])
                title = format(obj['title'])
                text = format(obj['text'])

                doc_score = get_score(title, text, afinn)
                doc_length = len(title + " " + text)
                doc_tuple = (url, doc_score, doc_length)
                if dep not in department_scores:  # checking if the department is present in the d
                    department_scores[dep] = [doc_tuple]
                else:
                    department_scores[dep].append(doc_tuple)

    return department_scores
Пример #14
0
    def sentiment(self):

        af = Afinn()

        self.df['sentiment'] = [
            af.score(' '.join(tweet)) for tweet in self.tokens
        ]
Пример #15
0
def afinn_model():
    # start = time.time()
    print("Loading Dataset")
    dataset = pd.read_csv(r'../../data/raw/movie_reviews.csv')

    # Get reviews and sentiments
    print("Get reviews and sentiment")
    reviews = np.array(dataset['review'])
    sentiments = np.array(dataset['sentiment'])

    # Get data and normalize test
    test_reviews = reviews[49900:]
    test_sentiments = sentiments[49900:]

    # Normalize
    print("Normalize")
    norm_test_reviews = pp.normalize_text(test_reviews)
    print("Normalize Done")
    sample_review_ids = [3, 5]

    # Load Model
    afn = Afinn(emoticons=True)

    # Make a prediction with model
    for review, sentiment in zip(test_reviews[sample_review_ids],
                                 test_sentiments[sample_review_ids]):
        print('REVIEW:', review)
        print('Actual Sentiment:', sentiment)
        print('Predicted Sentiment polarity:', afn.score(review))
        print('-' * 60)

    return 0
Пример #16
0
def Tweets(index_file):
    afinn = Afinn(emoticons=True)
    if index_file in cache:
        return cache[index_file]
    else:
        index = json.load(open(index_file, 'r'))
        tweets = {}
        for tweet_id, tweet in index.items():
            raw_tweet = open("dataset/" + tweet['text'], 'r',
                             encoding='utf-8').read()
            raw_tweet = json.loads(raw_tweet)
            tweet['id'] = tweet_id
            tweet['filename'] = tweet['text']
            tweet['text'] = raw_tweet['text'].lower()
            tweet['rt'] = raw_tweet['retweet_count']
            tweet['fav'] = raw_tweet['favorite_count']
            tweet['user_desc'] = raw_tweet['user']['description'].lower()
            tweet['followers'] = raw_tweet['user']['followers_count']
            tweet['userid'] = raw_tweet['user']['id']
            tweets[tweet_id] = tweet
            tweet['vision'] = getImageVision(tweet['photo'])
            tweet['afinn'] = afinn.score(tweet['text'])
            #tweet['emotion'] = getImageEmotion(tweet['photo'])
        cache[index_file] = tweets
        return tweets
def calculate_matrix(name_list, sentence_list, align_rate):
    '''
    Function to calculate the co-occurrence matrix and sentiment matrix among all the top characters
    :param name_list: the list of names of the top characters in the novel.
    :param sentence_list: the list of sentences in the novel.
    :param align_rate: the sentiment alignment rate to align the sentiment score between characters due to the writing style of
    the author. Every co-occurrence will lead to an increase or decrease of one unit of align_rate.
    :return: the co-occurrence matrix and sentiment matrix.
    '''

    # calculate a sentiment score for each sentence in the novel
    afinn = Afinn()
    sentiment_score = [afinn.score(x) for x in sentence_list]
    # calculate occurrence matrix and sentiment matrix among the top characters
    name_vect = CountVectorizer(vocabulary=name_list, binary=True)
    occurrence_each_sentence = name_vect.fit_transform(sentence_list).toarray()
    cooccurrence_matrix = np.dot(occurrence_each_sentence.T,
                                 occurrence_each_sentence)
    sentiment_matrix = np.dot(occurrence_each_sentence.T,
                              (occurrence_each_sentence.T * sentiment_score).T)
    sentiment_matrix += align_rate * cooccurrence_matrix
    cooccurrence_matrix = np.tril(cooccurrence_matrix)
    sentiment_matrix = np.tril(sentiment_matrix)
    # diagonals of the matrices are set to be 0 (co-occurrence of name itself is meaningless)
    shape = cooccurrence_matrix.shape[0]
    cooccurrence_matrix[[range(shape)], [range(shape)]] = 0
    sentiment_matrix[[range(shape)], [range(shape)]] = 0

    return cooccurrence_matrix, sentiment_matrix
Пример #18
0
def test_split():
    afinn = Afinn()
    words = afinn.split('Hello, World')
    assert words == ['Hello', 'World']

    words = afinn.split(u('Hell\xf8, \xc5rld'))
    assert words == [u('Hell\xf8'), u('\xc5rld')]
Пример #19
0
 def sentiment(document):
     afinn = Afinn()
     sentiment = 0
     tokens = Tokenizer.tokenize(document)
     for token in tokens:
         sentiment += afinn.score(token)
     return sentiment
Пример #20
0
def test_score_with_wordlist():
    afinn = Afinn()
    score = afinn.score_with_wordlist('Rather good.')
    assert score > 0

    score = afinn.score_with_wordlist('Rather GOOD.')
    assert score > 0
Пример #21
0
def test_score():
    afinn = Afinn()
    score = afinn.score('bad')
    assert score < 0

    score = afinn.score('')
    assert score == 0.0
Пример #22
0
def get_sentence_level_sent(text):
    afinn = Afinn(emoticons=True)
    sents = sent_tokenize(text)
    scores = []
    for s in sents:
        scores.append(afinn.score(s))
    return scores
def afinn_analysis(news_df):
    """
    initialize afinn sentiment analyzer and do afinn analysis on our data
    """
    af = Afinn()

    # compute sentiment scores (polarity) and labels
    sentiment_scores = [af.score(article) for article in news_df['text']]
    sentiment_category = ['positive' if score > 0 
                              else 'negative' if score < 0 
                                  else 'neutral' 
                                      for score in sentiment_scores]


    # sentiment statistics per news category
    df = pd.DataFrame([list(news_df['label']), sentiment_scores, sentiment_category]).T
    df.columns = ['label', 'sentiment_score', 'sentiment_category']
    df['sentiment_score'] = df.sentiment_score.astype('float')
    print(df.groupby(by=['label']).describe())
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
    sp = sns.stripplot(x='label', y="sentiment_score", 
                       hue='label', data=df, ax=ax1)
    bp = sns.boxplot(x='label', y="sentiment_score", 
                     hue='label', data=df, palette="Set2", ax=ax2)
    fc = sns.catplot(x="label", hue="sentiment_category", 
                    data=df, kind="count", 
                    palette={"negative": "#FE2020", 
                             "positive": "#BADD07", 
                             "neutral": "#68BFF5"})
    t = f.suptitle('Visualizing News Sentiment', fontsize=14)
    return df
Пример #24
0
def test_words_and_emoticons():
    afinn = Afinn(emoticons=True)

    score = afinn.score(':-)')
    assert score > 0

    score = afinn.score('BAD BAD BAD :-)')
    assert score < 0
Пример #25
0
def afinn_sentiment(corpus):
    af = Afinn()
    af_scores = []

    for tweet in corpus['cleaned_tweets']:
        af_scores.append(af.score(tweet))

    return af_scores
Пример #26
0
def test_words_and_emoticons():
    afinn = Afinn(emoticons=True)

    score = afinn.score(':-)')
    assert score > 0

    score = afinn.score('BAD BAD BAD :-)')
    assert score < 0
Пример #27
0
def sentiment(body):
    """
    calculate sentiment using Afinn model
    :type body:  unicode str
    :rtype: int
    """
    afinn = Afinn()
    return afinn.score(body)
Пример #28
0
 def sentiment_afinn(self):
     af = Afinn()
     afinn_scores = [af.score(article) for article in self.feedbackcomment]
     afinn_category = [
         'positive' if score > 0 else 'negative' if score < 0 else 'neutral'
         for score in afinn_scores
     ]
     return afinn_scores, afinn_category
Пример #29
0
def sent_proc(titles):

    af = Afinn()  # Load library

    # afinn sentiment scores
    sentiment_scores_af = [af.score(article) for article in titles]

    return np.array(sentiment_scores_af)
Пример #30
0
def test_score_language():
    afinn = Afinn(language='en')
    score = afinn.score('bad')
    assert score < 0

    afinn = Afinn('en')
    score = afinn.score('bad')
    assert score < 0
Пример #31
0
 def afinnSentiScore(self, doc, emoticons = True):
     """
     The output is a float variable that if larger than zero indicates a
     positive sentiment and less than zero indicates negative sentiment.
     """
     
     afinn = Afinn(emoticons=emoticons)
     result = afinn.score(doc)
     return result
Пример #32
0
def get_sentiment_score(words):
    text = get_clean_words(
        words)  #set text to the string produced by get_clean_words
    word_list = text.split()  #split the string into a list of words
    num_words = len(word_list)  #get the number of words in the list
    afinn = Afinn()  #initiate Afinn
    score = afinn.score(text)  #find the Afinn score for the text string
    avg = score / num_words  #find the average Afinn score per word
    return avg
Пример #33
0
    def __init__(self, name="Combo Average", a=1, b=1, c=1, d=1):
        Model.__init__(self, name=name)
        self.afinn = Afinn(emoticons=True)
        self.vader = SentimentIntensityAnalyzer()

        self.a = a
        self.b = b
        self.c = c
        self.d = d
Пример #34
0
def word_analyze(review):
    # compute sentiment scores (polarity) and labels
    import numpy as np
    from afinn import Afinn
    af = Afinn()
    #     data = [word_tokenize(w)['newword'] for w in review]
    #     return [print(w) for w in data for i in w]
    data = word_tokenize(review)['newword']
    return [af.score(w) for w in data]
Пример #35
0
def sent_list(textl):
    afinn = Afinn(language='en')
    sent_vec = []
    for t in textl:
        t_vec = []
        for s in t:
            t_vec.append(afinn.score(s))
        sent_vec.append(sum(t_vec))
    return sent_vec
Пример #36
0
def analyze_text(input_text, analyzer):
    af = Afinn()
    analyser = SentimentIntensityAnalyzer()
    if analyzer == 'VADER':
        result = analyser.polarity_scores(input_text)
    elif analyzer == 'TextBlob':
        result = TextBlob(input_text).sentiment.polarity
    elif analyzer == 'Afinn':
        result = af.score(input_text)
    return result
Пример #37
0
def test_danish():
    afinn = Afinn(language='da')
    score = afinn.score('bedrageri')
    assert score < 0

    score = afinn.score(u('besv\xe6r'))
    assert score < 0

    score = afinn.score(u('D\xc5RLIG!!!'))
    assert score < 0
Пример #38
0
    def run(self):
        asyncio.set_event_loop(asyncio.new_event_loop())
        self.afinn = Afinn(emoticons=True)

        self.twintConfig = twint.Config()
        self.configTwint()
        self.createLog()

        self.totalTweets = self.calculateTotalTweets()
        self.sentimentScore = self.calculateSentimentScore()
Пример #39
0
    def extract(self):
        """
        Extracts location and sentiment from an article.
        """

        afinn = Afinn()
        target = self.title + " " + self.summary
        target = target.encode("ascii", "ignore")

        self.sentiment = afinn.score(target)
        self.countries = extract_countries(target)
Пример #40
0
def afinn_sentiment(filename):
    from afinn import Afinn
    afinn = Afinn()
    with open (my_file, "r") as myfile:
        text = myfile.read().replace('\n', ' ')
        sentences = tokenize.sent_tokenize(text)
        sentiments = []
        for sentence in sentences:
            sentsent = afinn.score(sentence)
            sentiments.append(sentsent)
        return sentiments
Пример #41
0
def test_data():
    """Test data files for format."""
    afinn = Afinn()
    filenames = listdir(afinn.data_dir())
    for filename in filenames:
        if not filename.endswith('.txt'):
            continue
        full_filename = join(afinn.data_dir(), filename)
        with io.open(full_filename, encoding='UTF-8') as fid:
            for line in fid:
                # There should be the phrase and the score
                # and nothing more
                assert len(line.split('\t')) == 2

                # The score should be interpretable as an int
                phrase, score = line.split('\t')
                assert type(int(score)) == int
Пример #42
0
class SentimentBolt(storm.BasicBolt):

    def __init__(self):
        self.afinn = Afinn(language='da', emoticons=True)

    def process(self, tup):
        #storm.logInfo(tup.values[0])
        score = self.afinn.score(tup.values[0])
        #storm.logInfo(str(score))
        storm.emit([str(score)])
Пример #43
0
def test_emoticon():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          with_word_boundary=False)
    score = afinn.score(':-)')
    assert score > 0

    score = afinn.score('This is a :-) smiley')
    assert score > 0

    score = afinn.score('Just so XOXO.')
    assert score > 0
Пример #44
0
def test_emoticon_upper_case():
    afinn = Afinn()
    afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'),
                          word_boundary=False)

    score = afinn.score(':d')
    assert score == 0

    # TODO
    score = afinn.score(':D')
    # assert score > 0

    score = afinn.score('It is so: :D')
Пример #45
0
def test_score_with_pattern():
    afinn = Afinn(language='da')
    score = afinn.score('ikke god')
    assert score < 0

    score = afinn.score('ikke god.')
    assert score < 0

    score = afinn.score('IKKE GOD-')
    assert score < 0

    score = afinn.score('ikke   god')
    assert score < 0

    score = afinn.score('En tv-succes sidste gang.')
    assert score > 0

    score = afinn.score('')
    assert score == 0.0
# extract data for model evaluation
test_reviews = reviews[35000:]
test_sentiments = sentiments[35000:]
sample_review_ids = [7626, 3533, 13010]

# normalize dataset
norm_test_reviews = tn.normalize_corpus(test_reviews)


# # Sentiment Analysis with AFINN

# In[3]:

from afinn import Afinn

afn = Afinn(emoticons=True) 


# ## Predict sentiment for sample reviews

# In[4]:

for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    print('Predicted Sentiment polarity:', afn.score(review))
    print('-'*60)


# ## Predict sentiment for test dataset
Пример #47
0
# test AFINN

from afinn import Afinn
af = Afinn()
af_emoticon = Afinn(emoticons=True)
text1 = 'This is utterly excellent!'
text2 = ':/'
text3 = 'This is hardly excellent'
print text1, ":", af.score(text1) #score = 3.0
print "find_all:", af.find_all(text1) #['excellent']
print text2, ":", af_emoticon.score(text2) #score = -2.0
print text3, ":", af.score(text3)
print "find_all:", af.find_all(text3) #['excellent']
print "the", af.score("the")
Пример #48
0
 def __init__(self):
     self.afinn = Afinn(language='da', emoticons=True)
Пример #49
0
def scoreUpdate(tweet):
    sentimentScore = Afinn()
    return sentimentScore.score(tweet)
from afinn import Afinn
import re


eng_stopwords=stopwords.words("english")
#eng_stopwords=stopwords.words("english")
#domain_spec_stopwords=["press","foundations","trends","vol","editor","workshop","international","journal","research","paper","proceedings","conference","wokshop","acm","icml","sigkdd","ieee","pages","springer"]
#eng_stopwords=eng_stopwords+domain_spec_stopwords
    #normal_stopwords=[a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your]
with open(r"full_stopwords.txt","r") as f:
     comp_st=[]
     for i in f.readlines():
         comp_st.append(i[:-1])
compt_st=[i for i in comp_st if i!='']
eng_stopwords=eng_stopwords+comp_st  
afinn_model=Afinn()

with open(r"most_recent_ultimate_tweets.txt","r") as f:
    tweets_wo_hashtag=[]
    for i in f.readlines():
        tweets_wo_hashtag.append(i[:-1])
        
with open(r"best_hashtags_expanded.txt","r") as f:
     hashtags_alone=[]
     for i in f.readlines():
         hashtags_alone.append(i[:-1])

with open(r"AFINN-emoticon-8.txt","r") as f:
    ls_of_emoticons=[]
    for i in f.readlines():
        ls_of_emoticons.append(i.split()[0])
Пример #51
0
for token in spacy_text[1]:
    if token.ent_type_ != "":
        print(token, token.ent_type_)


# ## Sentiment Analysis

# There are also packages available that are more focussed on particular tasks, such as sentiment analysis.  
# One that is, for example, easy to use is: `AFINN` --> http://neuro.compute.dtu.dk/wiki/AFINN
# 
# In essence it is a word list but you can also install it directly by doing `pip install afinn`

# In[29]:

from afinn import Afinn
afinn = Afinn()


# In[30]:

afinn.score('This is utterly excellent!')


# In[31]:

for text in spacy_text:
    for sentence in text.sents:
        print(afinn.score(sentence.text), sentence)


# ## Search Reddit for threads about Egyptian Airline crash
Пример #52
0
def test_find_all():
    afinn = Afinn()
    words = afinn.find_all("It is so bad")
    assert words == ['bad']
Пример #53
0
        temp.append(float(item))
    if sum(temp) == 0:
        out = [0.0, 0.0, 0.0]
    else:
        for item in temp:
            out.append(item / sum(temp))
    return out


mentionMapper = mapMention('adData/analysis/ranked/mention.json')
featureList = ['Length', 'URL', 'Hashtag', 'Username', 'Sentiment', 'Readability',
               'ParseDepth', 'HeadCount', 'POS_N', 'POS_V', 'POS_A', '!', '?',
               'Verified', 'FollowerCount']

# happy_log_probs, sad_log_probs = utilities.readSentimentList('twitter_sentiment_list.csv')
afinn = Afinn()
posFile = open('adData/analysis/groups/totalGroup/group0.pos', 'r')
negFile = open('adData/analysis/groups/totalGroup/group0.neg', 'r')
posParseLengthFile = open('adData/analysis/groups/totalGroup/parserLength0.pos', 'r')
negParseLengthFile = open('adData/analysis/groups/totalGroup/parserLength0.neg', 'r')
posHeadCountFile = open('adData/analysis/groups/totalGroup/parserHeadCount0.pos', 'r')
negHeadCountFile = open('adData/analysis/groups/totalGroup/parserHeadCount0.neg', 'r')
posPOSCountFile = open('adData/analysis/groups/totalGroup/parserPOSCount0.pos', 'r')
negPOSCountFile = open('adData/analysis/groups/totalGroup/parserPOSCount0.neg', 'r')

ids = []
contents = []
scores = []
days = []
time = []
labels = []
Пример #54
0
def test_unicode():
    afinn = Afinn()
    score = afinn.score(u('na\xefve'))
    assert score < 0
Пример #55
0
from afinn import Afinn
from builtins import str
afinn = Afinn()
score = afinn.score('this site is not very good')
print(score)
test_reviews = np.array(test_data['review'])
test_sentiments = np.array(test_data['sentiment'])


sample_docs = [100, 5817, 7626, 7356, 1008, 7155, 3533, 13010]
sample_data = [(test_reviews[index],
                test_sentiments[index])
                  for index in sample_docs]


sample_data        


from afinn import Afinn
afn = Afinn(emoticons=True) 
print afn.score('I really hated the plot of this movie')

print afn.score('I really hated the plot of this movie :(')



import nltk
from nltk.corpus import sentiwordnet as swn

good = swn.senti_synsets('good', 'n')[0]
print 'Positive Polarity Score:', good.pos_score()
print 'Negative Polarity Score:', good.neg_score()
print 'Objective Score:', good.obj_score()

from normalization import normalize_accented_characters, html_parser, strip_html
Пример #57
0
def test_score_with_wordlist_empty():
    afinn = Afinn()
    score = afinn.score('')
    assert score == 0.0