def classify(features, results, test_features, test_results, C, gamma): cli = "%s@%s" %(C, gamma) st = time.time() log.info("Classifier begins") classifier = SVC(C=C, gamma=gamma, kernel="rbf") classifier.fit(features, results) st2 = time.time() prediction = classifier.predict(test_features) log.info("id: %s Training time: %s, Prediction time: %s" %(cli, st2-st, time.time()-st2) ) error = 0 for index, value in enumerate(prediction): if test_results[index] != value: error += 1 return (error/float(len(test_results))) * 100
def classify(train_file, test_file): """ Train a model and test train_file: file that the model is trained on test_file: file that is used to test the model """ X_train, y_train = load_svmlight_file(train_file) X_test, y_test = load_svmlight_file(test_file, X_train.shape[1]) # X_train = X_train.todense() # X_test = X_test.todense() clf = SparseSVC(kernel="linear", C=0.2) # clf = LogisticRegression(C=1.0, penalty='l1', tol=1e-6) clf.fit(X_train, y_train) y_predict = clf.predict(X_test) print sklearn.metrics.classification_report(y_test, y_predict) print sklearn.metrics.confusion_matrix(y_test, y_predict)
# X_den_train, X_den_test = X_den[train_index], X_den[test_index] # feed models clf_mNB.fit(X_train_train, y_train_train) clf_kNN.fit(X_train_train, y_train_train) clf_ridge.fit(X_train_train, y_train_train) clf_lSVC.fit(X_train_train, y_train_train) clf_SVC.fit(X_train_train, y_train_train) # get prediction for this fold run pred_mNB = clf_mNB.predict(X_train_test) pred_kNN = clf_kNN.predict(X_train_test) pred_ridge = clf_ridge.predict(X_train_test) pred_lSVC = clf_lSVC.predict(X_train_test) pred_SVC = clf_SVC.predict(X_train_test) # update z array for each model z_mNB = np.append(z_mNB , pred_mNB , axis=None) z_kNN = np.append(z_kNN , pred_kNN , axis=None) z_ridge = np.append(z_ridge , pred_ridge, axis=None) z_lSVC = np.append(z_lSVC , pred_lSVC , axis=None) z_SVC = np.append(z_SVC , pred_SVC , axis=None) # putting z's from each model into one 2d matrix # this is the (feature) input, similar as X, for level 1 # In level 1, y is still y. # z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32) z = np.array([z_mNB, z_kNN, z_ridge, z_lSVC, z_SVC], dtype=np.int32) z = z.transpose()
X_den_train, X_den_test = X_den[train_index], X_den[test_index] # feed models clf_mNB.fit(X_train, y_train) clf_kNN.fit(X_train, y_train) clf_ridge.fit(X_train, y_train) clf_lSVC.fit(X_train, y_train) clf_SVC.fit(X_train, y_train) # get prediction for this fold run pred_mNB = clf_mNB.predict(X_test) pred_kNN = clf_kNN.predict(X_test) pred_ridge = clf_ridge.predict(X_test) pred_lSVC = clf_lSVC.predict(X_test) pred_SVC = clf_SVC.predict(X_test) # update z array for each model z_mNB = np.append(z_mNB , pred_mNB , axis=None) z_kNN = np.append(z_kNN , pred_kNN , axis=None) z_ridge = np.append(z_ridge , pred_ridge, axis=None) z_lSVC = np.append(z_lSVC , pred_lSVC , axis=None) z_SVC = np.append(z_SVC , pred_SVC , axis=None) # putting z's from each model into one 2d matrix # this is the (feature) input, similar as X, for level 1 # In level 1, y is still y. # z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32) z = np.array([z_mNB, z_kNN, z_ridge, z_lSVC, z_SVC], dtype=np.int32) z = z.transpose()
# Initialize variables for couting the average f1_all = [] f5_all = [] acc_all = [] pre_all = [] rec_all = [] # Test for 10 rounds using the results from 10 fold cross validations for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # fit and predict clf.fit(X_train, y_train) pred = clf.predict(X_test) # print y_test # print pred # print type(pred) # output tree into graph # out = StringIO() # out = export_graphviz(clf, out_file=out) # metrics # # Original f1_score = metrics.f1_score(y_test, pred) f5_score = metrics.fbeta_score(y_test, pred, beta=0.5) acc_score = metrics.zero_one_score(y_test, pred)