def test_svm_based3(self): print "Compare Sentiment with SVM" dataset = sentiment_reader.toNumpy() X_train, y_train, X_test, y_test = dataset X_test = X_test[:1000] y_test = y_test[:1000] #n_feature = 'all' acc_matrix, f1_matrix, auc_matrix = self.compare_svm_based(dataset)
def test_ratio(self): ''' Compare several competing methods changing the ratio of the positive class in the dataset. We use binary class dataset for the easy of interpretation. ''' #dataset = rcv1_binary_reader.toNumpy() #dataset = snippet_reader.toNumpy() dataset = sentiment_reader.toNumpy() #set_size = 200 #X_train_full, y_train_full, X_test, y_test = dataset #X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size) #assert(len(y_train) == set_size) X_train, y_train, X_test, y_test = dataset X_test = X_test[:1000] y_test = y_test[:1000] train_set = (X_train, y_train) test_set_original = (X_test, y_test) clf = SVMLight() #clf = LinearSVC() clf.fit(X_train, y_train) mla = MLA(clf, verbose=1) for r in np.arange(0.05, 1.0, 0.05): #r = 0.1 # Generate a new test set with desired positive proportions. X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1) test_set = [X_test_new, y_test_new] dist_dict = DE.arrayToDistDict(y_test_new) mla.fit(X_train, y_train, dist_dict) y_pred = mla.predict(X_test_new) cm = confusion_matrix(y_test_new, y_pred) acc = self.accuracy(cm) print r, acc
from data_io import sentiment_reader from data_io import domain_reader from data_io import snippet_reader from sklearn.svm import LinearSVC X_train, y_train, X_test, y_test = sentiment_reader.toNumpy() clf = LinearSVC() clf.fit(X_train, y_train) print clf.score(X_test, y_test)
def _test_svm_based4(self): print "Compare Domain with SVM" dataset = sentiment_reader.toNumpy() #n_feature = 'all' acc_matrix, f1_matrix, auc_matrix = self.compare_svm_based(dataset)