Exemplo n.º 1
0
def return_sentiment(term_vec):
    sent_v = []
    for term in term_vec:
        s = sentiment.sentiment(term)
        sent_v.append(
            s
        )  #append each dictionary returned by the sentiment function into a list
    return sent_v
Exemplo n.º 2
0
def get_average_sentiment(datafr): 

    """ this function gets the average sentiment for each dataframe """
    orig_email = datafr.newcontent.dropna()
    # getting rid of http addresses and newline newtab symbols
    orig_email = orig_email.apply(lambda x : re.sub(r'http\S+', '', x))
    orig_email = orig_email.apply(lambda x : re.sub('\n', '', x))
    orig_email = orig_email.apply(lambda x : re.sub('\t', '', x))

    sep_email=orig_email.str.split(".").to_frame().values.tolist()

    # separating emails into sentences
    sentences = []
    i = 0
    for email in sep_email:
        email = email[0]
        j = 0
        while j < len( email ):
            if len( email[ j ] ) == 0:
                del email[ j ]
            else:
                j += 1
        for sentence in email:
            sentences.append(sentence)
        i += 1

    #  Calulate average valence and arousal for each sentence using Dr. Healey's package 
    #  which uses the ANEW and Happiness sentiment dictionaries	
    sentiments = [sentiment.sentiment(sentence.split(" ")) for sentence in sentences]

    clust0valence = 0
    clust0arousal = 0
    for x in sentiments:
        clust0valence += x['valence']
        clust0arousal += x['arousal']

    #cluster 0 sentiments 
    avgval0 = clust0valence/len(sentiments)
    avgarousal0 = clust0arousal/len(sentiments)
    return [avgval0, avgarousal0]
Exemplo n.º 3
0
    term_list = []

    for term in term_vec_NEW[i]:
        if (term not in stop_words and term not in stop_words_2):
            term_list.append(term)

    term_vec_NEW[i] = term_list

# -----------------------------------------------------------------------
## Term Sentiment

temp_sent = []
sentiments = np.zeros((len(term_vec_NEW), 2))
for i in range(0, len(term_vec_NEW)):
    temp_sent = []
    temp_sent = str(sentiment.sentiment(term_vec_NEW[i]))
    temp_sent = temp_sent.replace('{', '')
    temp_sent = temp_sent.replace('}', '')
    temp_sent = temp_sent.replace("'", '')
    temp_sent = temp_sent.split(sep=',')
    temp_sent[0] = temp_sent[0].split(sep=':')
    temp_sent[1] = temp_sent[1].split(sep=':')

    sentiments[i, 0] = temp_sent[0][1]
    sentiments[i, 1] = temp_sent[1][1]

print(np.mean(sentiments[:, 0]))
print(np.mean(sentiments[:, 1]))

cluster_sentiment = cluster_sentiment_fn(sentiments, clusters_LSA)
'''    
#document # for the given cluster
#document # for cluster 0

os.chdir(
    "C:/Users/Savannah Hampton/Documents/Master of Science in Analytics/Text Analytics/ted-talks"
)

from sentiment_module import sentiment

splittedlist = []
for line in newtedtalks['transcript']:
    splittedlist.append(line.split())

for line in splittedlist:
    sentiment.exist(line)
    sentiment.sentiment(line)

for i in range(0, len(splittedlist)):
    print(sentiment.sentiment(splittedlist[i]))

for i in range(2200, 2467):
    print(sentiment.sentiment(splittedlist[i]))

#highest valence: 8.13: An electrifying acoustic guitar performance
#average valence: 5.32
#highest arousal: 5.32: An electrifying acoustic guitar performance
#average arousal: 4.51

import pandas as pd
sentiment = pd.read_csv('Sentiment Analysis.csv')
print(sentiment)
Exemplo n.º 5
0
#lemmatization
from textblob import Word
    
stem_df['title'] = stem_df['title'].apply(lambda x: ([Word(word).lemmatize() for word in x]))
    
# Sentiment

#change directory at first
import os
os.chdir('C:/Users/Debosmita/Documents/TextMining')

#import sentiment module
from sentiment_module import sentiment
count = 0
for index, row in stem_df.iterrows():
    print(sentiment.sentiment(row['title']))
    count += 1
    if count > 5000:
        break











Exemplo n.º 6
0
i = 0
for email in sep_email:
    email = email[0]
    j = 0
    while j < len( email ):
        if len( email[ j ] ) == 0:
            del email[ j ]
        else:
            j += 1
    for sentence in email:
        sentences.append(sentence)
    i += 1

#  Calulate average valence and arousal for each sentence using Dr. Healey's package 
#  which uses the ANEW and Happiness sentiment dictionaries	
sentiments = [sentiment.sentiment(sentence.split(" ")) for sentence in sentences]

''' visualizing '''
valences = [sent["valence"] for sent in sentiments] 
valences = [i for i in valences if i != 0]
arousals = [sent["arousal"] for sent in sentiments] 
arousals = [i for i in arousals if i != 0]

sentiments_overall = list(zip(valences,arousals))

# high arousal, high valence --> orange #fdae61: 0
# low arousal, high valence --> green #abdda4: 1
# low arousal, low valence --> blue #2b83ba: 2
# high arousal, low valence --> red #d7191c: 3 
color = []
for sent in sentiments_overall: