def on_status(self, status): try: if not status.retweeted and not status.in_reply_to_status_id and not status.in_reply_to_user_id and not hasattr(status, 'retweeted_status'): features = tweet_features.get_tweet_features(status.text, worstfeaturesfilter) print features mood = classifier.classify(features) if mood == 'negative': if len(jokes) > 0: #TODO: respect word boundaries jokechunks = jokes.pop() pickle.dump(jokes,open('jokes.pickle', 'w')) for chunk in jokechunks: api.update_status(chunk) else: print "we're out of jokes!!" print self.status_wrapper.fill(status.text) print '\n %s %s via %s\n' % (status.author.screen_name, status.created_at, status.source) print "mood classified as: %s" % mood classifier.explain(features) except Exception, e: # Catch any unicode errors while printing to console # and just ignore them to avoid breaking application. print e pass
tweets.append((tweet.text, label)) elif label == "negative": tweets.append((tweet.text, label)) labelcount[label] += 1 random.shuffle(tweets) print labelcount # split in to training and test sets random.shuffle(tweets) num_train = int(0.8 * len(tweets)) # fvecs = [(tweet_features.make_tweet_dict(t),s) for (t,s) in tweets] fvecs = [(tweet_features.get_tweet_features(t, set()), s) for (t, s) in tweets] v_train = fvecs[0:num_train] # v_train = fvecs v_test = fvecs[num_train : len(tweets)] # extract best word features word_fd = FreqDist() label_word_fd = ConditionalFreqDist() for (feats, label) in fvecs: for key in feats: if feats[key]: word_fd.inc(key) label_word_fd[label].inc(key)
if label == 'positive': tweets.append((tweet.text, label)) elif label == 'negative': tweets.append((tweet.text, label)) labelcount[label] += 1 random.shuffle(tweets) print labelcount # split in to training and test sets random.shuffle(tweets) num_train = int(0.8 * len(tweets)) #fvecs = [(tweet_features.make_tweet_dict(t),s) for (t,s) in tweets] fvecs = [(tweet_features.get_tweet_features(t, set()), s) for (t, s) in tweets] v_train = fvecs[0:num_train] #v_train = fvecs v_test = fvecs[num_train:len(tweets)] #extract best word features word_fd = FreqDist() label_word_fd = ConditionalFreqDist() for (feats, label) in fvecs: for key in feats: if feats[key]: word_fd.inc(key) label_word_fd[label].inc(key) pos_word_count = label_word_fd['positive'].N()