def test_tokenizer(self): # Assert that french sentiment() uses French tokenizer. ("t'aime" => "t' aime"). v1 = fr.sentiment("je t'aime") v2 = fr.sentiment("je ne t'aime pas") self.assertTrue(v1[0] > 0) self.assertTrue(v2[0] < 0) self.assertTrue(v1.assessments[0][0] == ["aime"]) self.assertTrue(v2.assessments[0][0] == ["ne", "aime"])
def emotion_ts_from_text(tier, nlp): x = [] y = [] z = [] for sppasOb in tier: label, [start, stop], [start_r, stop_r] = get_interval(sppasOb) i = (start + stop) / 2.0 if label in ["#", "", " ", "***", "*"]: polarity = 0 subjectivity = 0 else: polarity_and_sunjectivity = sentiment(label) polarity = polarity_and_sunjectivity[0] subjectivity = polarity_and_sunjectivity[1] while (i < stop): x.append(i) y.append(polarity) z.append(subjectivity) i = i + ((start + stop) / 10.0) return x, y, z
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(fr.sentiment("fabuleux")[0] > 0) self.assertTrue(fr.sentiment("terrible")[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for 1,500 book reviews. # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for review, score in Datasheet.load(os.path.join(PATH, "corpora", "polarity-fr-amazon.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: fr.positive(review), reviews) self.assertTrue(A > 0.75) self.assertTrue(P > 0.76) self.assertTrue(R > 0.73) self.assertTrue(F > 0.75) print "pattern.fr.sentiment()"
if i == 0: header = row else: for c in string.punctuation: body = body.replace(c,' ') for c in string.whitespace: body = body.replace(c,' ') for c in ["'"]: body = body.replace(c,' ') words = clean_body([n.lower() for n in body.split(" ")]) comment = " ".join(words) polarity, subjectivity = sentiment(comment) vote = (polarity+subjectivity) subjectivity = subjectivity+1.0000001 polarity = polarity*subjectivity try: mood[polarity].append(row) except: mood[polarity] = [row] #.append(comment) #print polarity,"\t\t", comment[:50],"\t\t", len(comment[:50]),"\t\t" #print stem(s, stemmer=PORTER) words.extend(words) i =+ 1
import pandas as pd import sys store = 'tweets.csv' df = pd.read_csv(store, sep=';', names=['brand', 'id', 'username', 'date', 'tweet', 'source']) brand1 = df[df.brand == 'SNCB'] brand2 = df[df.brand == 'SNCF'] from pattern.fr import parse, sentiment, ngrams, pprint from pattern.web import URL, plaintext from pattern.metrics import readability b1_read = 0.0 b1_sent = 0.0 for tweet in brand1.tweet: b1_sent += sentiment(plaintext(tweet))[0] b1_read += readability(tweet) b2_read = 0.0 b2_sent = 0.0 for tweet in brand2.tweet: b2_sent += sentiment(plaintext(tweet))[0] b2_read += readability(tweet) print 'SNCB: %f' % b1_sent print 'SNCF: %f' % b2_sent