def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class( test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF')
def supperclassify(train_set, train_label, test_set, test_label): '''Different methods''' train_voted = voting(train_set) aux = train_voted == train_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(train_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(train_label, train_voted) print 'Estimator VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score print '\n' lambdas = weighted_voting_getlambdas(train_set, train_label) results = weighted_voting(test_set, lambdas) aux = results == test_label correct = sum(aux.astype(int)) _accuracy = (correct * 100) / len(test_label) _precision, _recall, _f1score, _support = ut.get_measures_for_each_class(test_label, results) print 'Estimator W_VOTING' print 'Average Accuracy:\t', _accuracy print 'Average Precision:\t', _precision print 'Average Recall:\t', _recall print 'Average F1 Measure:\t', _f1score rf = clf.classifier_randomForest(train_set, train_label) results = clf.evaluateResults(rf, test_set, test_label, estimator_name='RF') lr = clf.logistic_regression(train_set, train_label) results = clf.evaluateResults(lr, test_set, test_label, estimator_name='LR') svm = clf.classifier_svm(train_set, train_label) results = clf.evaluateResults(svm, test_set, test_label, estimator_name='SVM') rbf = clf.rbf_classifier(train_set, train_label) results = clf.evaluateResults(rbf, test_set, test_label, estimator_name='RBF') # import pdb; pdb.set_trace()
tweets = np.array(tweets) labels = np.array(labels) # Extract tweet partition train_tweets = tweets[np.array(train)] train_tweets, test_tweets, train_labels, test_labels = tweets[train], tweets[test], labels[train], labels[test] print len(test_tweets) print len(train_tweets) train_tweets = np.hstack(train_tweets) dictionary, tweets_features, vectorizer = bow.bow(train_tweets, vec="tfidf") ''' Training different classifiers. ''' svm = clf.classifier_svm(tweets_features, train_labels) rf = clf.classifier_randomForest(tweets_features, train_labels) ada = clf.adaboost(tweets_features, train_labels) lr = clf.logistic_regression(tweets_features, train_labels) ''' Test the different classifiers with the test tweets. ''' pred = vectorizer.transform(test_tweets) pred = pred.toarray() _results, _accuracyLR, _precisionLR, _recallLR, _f_measureLR = clf.evaluateResults(lr, pred, test_labels, estimator_name='Logistic regression', file_name=results_folder) _results, _accuracyRF, _precisionRF, _recallRF, _f_measureRF = clf.evaluateResults(rf, pred, test_labels,
train_tweets = np.hstack(train_tweets) dictionary, tweets_features, vectorizer = bow.bow(train_tweets, vec="tfidf") # dictionary, tweets_features, vectorizer = bow.bow(train_tweets, vec="count") # print dictionary '''Dimsionality reduction''' # LDA # lda = clf.lda(tweets_features, train_labels) # print tweets_features.shape ''' Training different classifiers. ''' # forest = clf.classifier_randomForest(tweets_features, train_labels) svm = clf.classifier_svm(tweets_features, train_labels) # mlp = clf.multilayer_perceptron(tweets_features, train_labels) # ada = clf.adaboost(tweets_features, train_labels) # lr = clf.logistic_regression(tweets_features, train_labels) # ONE VS ALL CLASSIFIER WITH DIFFERENT ESTIMATORS. # estimator = clf.svm.SVC(random_state=0) # oneVSall_svm = clf.onevsall(tweets_features, train_labels, estimator) # # estimator = clf.MLP() # oneVSall_mlp = clf.onevsall(tweets_features, train_labels, estimator) # # estimator = clf.RandomForestClassifier(n_estimators=50) # oneVSall_rf = clf.onevsall(tweets_features, train_labels, estimator) ''' Test the different classifiers with the test tweets.