def main(): me=Classifier() feature_counter=Counter() feature_set=pickle.load(open('validation_set.pkl', 'rb')) feature_set_labels=[] for tweet, rating in feature_set: print rating try: float(rating) except: continue if float(rating)>0: label='positive' elif float(rating)<0: label='negative' else: label='neutral' feature_set_labels.append((tweet, label)) feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set_labels]) for feat in feature_list: feature_counter[feat]+=1 me.feature_list=[feat for feat, count in feature_counter.most_common(1000)] ts=[(me.extract_features(tweet), label) for tweet, label in feature_set] print 'training Maxent' me.classifier=MaxentClassifier.train(ts) return me
def main(): me=Classifier() feature_counter=Counter() feature_set=pickle.load(open('undersampled_emoticon.pkl', 'rb')) feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set]) for feat in feature_list: feature_counter[feat]+=1 me.feature_list=[feat for feat, count in feature_counter.most_common(1000)] ts=[(me.extract_features(tweet), label) for tweet, label in feature_set] print 'training Maxent, algorithm CG' me.classifier=MaxentClassifier.train(ts) return me