示例#1
0
 def test_readability(self):
     # Technical jargon should be in the "difficult" range: < 0.30
     s = "The Australian platypus is seemingly a hybrid of a mammal and reptilian creature"
     v = metrics.readability(s)
     self.assertTrue(v < 0.30)        
     # Dr. Seuss should be in the "easy" range: > 0.70
     s = "'I know some good games we could play,' said the cat." + \
         "'I know some new tricks,' said the cat in the hat." + \
         "'A lot of good tricks. I will show them to you.'" + \
         "'Your mother will not mind at all if I do.'"
     v = metrics.readability(s)
     self.assertTrue(v > 0.70)
示例#2
0
 def test_readability(self):
     # Assert technical jargon in the "difficult" range (< 0.30).
     s = "The Australian platypus is seemingly a hybrid of a mammal and reptilian creature"
     v = metrics.readability(s)
     self.assertTrue(v < 0.30)
     # Assert that Dr. Seuss isin the "easy" range (> 0.70).
     s = (
         "'I know some good games we could play,' said the cat."
         + "'I know some new tricks,' said the cat in the hat."
         + "'A lot of good tricks. I will show them to you.'"
         + "'Your mother will not mind at all if I do.'"
     )
     v = metrics.readability(s)
     self.assertTrue(v > 0.70)
     print "pattern.metrics.readability()"
示例#3
0
import pandas as pd
import sys

store = 'tweets.csv'

df = pd.read_csv(store,
                 sep=';',
                 names=['brand', 'id', 'username', 'date', 'tweet', 'source'])

brand1 = df[df.brand == 'SNCB']
brand2 = df[df.brand == 'SNCF']

from pattern.fr import parse, sentiment, ngrams, pprint
from pattern.web import URL, plaintext
from pattern.metrics import readability

b1_read = 0.0
b1_sent = 0.0
for tweet in brand1.tweet:
    b1_sent += sentiment(plaintext(tweet))[0]
    b1_read += readability(tweet)

b2_read = 0.0
b2_sent = 0.0
for tweet in brand2.tweet:
    b2_sent += sentiment(plaintext(tweet))[0]
    b2_read += readability(tweet)

print 'SNCB: %f' % b1_sent
print 'SNCF: %f' % b2_sent