def get_optimal_polarity_classifier(): """ Trains and returns the optimal polarity classifier. """ tweets = utils.get_pickles(3) tweets, targets = utils.make_polarity_targets(tweets) vect_options = { 'ngram_range': (1,1), 'max_df': 0.5 } tfidf_options = { 'sublinear_tf': False, 'use_idf': True, 'smooth_idf': True, } clf = SVM(tweets, targets, vect_options, tfidf_options) clf.set_feature_set('PC2', features.get_google_sentiment_values(3)) clf.train_on_feature_set() return clf
def perform_grid_search_on_featureset_SA_and_PA(): datasetnr = 3 tweets = utils.get_pickles(datasetnr) sentimentvalues = feat_utils.get_sentiment_values(datasetnr) tweets = preprocessing.remove_link_classes(tweets) tweets = preprocessing.lower_case(tweets) tweets = preprocessing.remove_specialchars_round2(tweets) train_tweets, train_targets, test_tweets, test_targets, train_sentimentvalues, test_sentimentvalues = utils.make_subjectivity_train_and_test_and_targets( tweets, sentimentvalues ) clf = SVM(train_tweets, train_targets, None) clf.set_feature_set("SA", None) clf.grid_search_on_text_features(file_postfix="subjectivity") clf = NB(train_tweets, train_targets, None) clf.set_feature_set("SA", None) clf.grid_search_on_text_features(file_postfix="subjectivity") clf = ME(train_tweets, train_targets, None) clf.set_feature_set("SA", None) clf.grid_search_on_text_features(file_postfix="subjectivity") train_tweets, train_targets, test_tweets, test_targets, train_sentimentvalues, test_sentimentvalues = utils.make_polarity_train_and_test_and_targets( tweets, sentimentvalues ) clf = SVM(train_tweets, train_targets, None) clf.set_feature_set("PA", None) clf.grid_search_on_text_features(file_postfix="polarity") clf = NB(train_tweets, train_targets, None) clf.set_feature_set("PA", None) clf.grid_search_on_text_features(file_postfix="polarity") clf = ME(train_tweets, train_targets, None) clf.set_feature_set("PA", None) clf.grid_search_on_text_features(file_postfix="polarity")