def test_readability(self): # Technical jargon should be in the "difficult" range: < 0.30 s = "The Australian platypus is seemingly a hybrid of a mammal and reptilian creature" v = metrics.readability(s) self.assertTrue(v < 0.30) # Dr. Seuss should be in the "easy" range: > 0.70 s = "'I know some good games we could play,' said the cat." + \ "'I know some new tricks,' said the cat in the hat." + \ "'A lot of good tricks. I will show them to you.'" + \ "'Your mother will not mind at all if I do.'" v = metrics.readability(s) self.assertTrue(v > 0.70)
def test_readability(self): # Assert technical jargon in the "difficult" range (< 0.30). s = "The Australian platypus is seemingly a hybrid of a mammal and reptilian creature" v = metrics.readability(s) self.assertTrue(v < 0.30) # Assert that Dr. Seuss isin the "easy" range (> 0.70). s = ( "'I know some good games we could play,' said the cat." + "'I know some new tricks,' said the cat in the hat." + "'A lot of good tricks. I will show them to you.'" + "'Your mother will not mind at all if I do.'" ) v = metrics.readability(s) self.assertTrue(v > 0.70) print "pattern.metrics.readability()"
import pandas as pd import sys store = 'tweets.csv' df = pd.read_csv(store, sep=';', names=['brand', 'id', 'username', 'date', 'tweet', 'source']) brand1 = df[df.brand == 'SNCB'] brand2 = df[df.brand == 'SNCF'] from pattern.fr import parse, sentiment, ngrams, pprint from pattern.web import URL, plaintext from pattern.metrics import readability b1_read = 0.0 b1_sent = 0.0 for tweet in brand1.tweet: b1_sent += sentiment(plaintext(tweet))[0] b1_read += readability(tweet) b2_read = 0.0 b2_sent = 0.0 for tweet in brand2.tweet: b2_sent += sentiment(plaintext(tweet))[0] b2_read += readability(tweet) print 'SNCB: %f' % b1_sent print 'SNCF: %f' % b2_sent