예제 #1
0
    def on_status(self, status):
        try:
            if not status.retweeted and not status.in_reply_to_status_id and not status.in_reply_to_user_id and not hasattr(status, 'retweeted_status'):
                features = tweet_features.get_tweet_features(status.text, worstfeaturesfilter)
		print features
	    	mood = classifier.classify(features)
                if mood == 'negative':
                	if len(jokes) > 0:
				#TODO: respect word boundaries
                		jokechunks = jokes.pop()
				pickle.dump(jokes,open('jokes.pickle', 'w'))

   				for chunk in jokechunks:
					api.update_status(chunk)	
     			else:
				print "we're out of jokes!!"		
                print self.status_wrapper.fill(status.text)
                print '\n %s  %s  via %s\n' % (status.author.screen_name, status.created_at, status.source)
		print "mood classified as: %s" % mood
		classifier.explain(features)
        except Exception, e:
            # Catch any unicode errors while printing to console
            # and just ignore them to avoid breaking application.
            print e
            pass
예제 #2
0
            tweets.append((tweet.text, label))
        elif label == "negative":
            tweets.append((tweet.text, label))
        labelcount[label] += 1

random.shuffle(tweets)
print labelcount


# split in to training and test sets
random.shuffle(tweets)

num_train = int(0.8 * len(tweets))

# fvecs = [(tweet_features.make_tweet_dict(t),s) for (t,s) in tweets]
fvecs = [(tweet_features.get_tweet_features(t, set()), s) for (t, s) in tweets]
v_train = fvecs[0:num_train]
# v_train = fvecs
v_test = fvecs[num_train : len(tweets)]


# extract best word features
word_fd = FreqDist()
label_word_fd = ConditionalFreqDist()

for (feats, label) in fvecs:
    for key in feats:
        if feats[key]:
            word_fd.inc(key)
            label_word_fd[label].inc(key)
예제 #3
0
        if label == 'positive':
            tweets.append((tweet.text, label))
        elif label == 'negative':
            tweets.append((tweet.text, label))
        labelcount[label] += 1

random.shuffle(tweets)
print labelcount

# split in to training and test sets
random.shuffle(tweets)

num_train = int(0.8 * len(tweets))

#fvecs = [(tweet_features.make_tweet_dict(t),s) for (t,s) in tweets]
fvecs = [(tweet_features.get_tweet_features(t, set()), s) for (t, s) in tweets]
v_train = fvecs[0:num_train]
#v_train = fvecs
v_test = fvecs[num_train:len(tweets)]

#extract best word features
word_fd = FreqDist()
label_word_fd = ConditionalFreqDist()

for (feats, label) in fvecs:
    for key in feats:
        if feats[key]:
            word_fd.inc(key)
            label_word_fd[label].inc(key)

pos_word_count = label_word_fd['positive'].N()