def clasificar_OVO(X, y, df, trainInputs, trainOutputs, testInputs, testOutputs, graphname): print("\n[" + str(graphname) + "]") clfBase=DecisionTreeClassifier() scores = cross_val_score(clfBase, X, y, cv=10) clf=OneVsOneClassifier(clfBase) clf=clf.fit(trainInputs, trainOutputs) precisionTrain = clf.score(trainInputs, trainOutputs) precisionTest = clf.score(testInputs, testOutputs) print("\tCCR train = %.2f%% | CCR test = %.2f%%" % (precisionTrain*100, precisionTest*100)) prediccion_test = clf.predict(testInputs) print(prediccion_test) print(testOutputs) return precisionTest
def predict(X_train, X_test, y_train, y_test, k, method_name): print('Start knn predicting...') knn = neighbors.KNeighborsClassifier(n_neighbors=k, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=-1) knn_ovo = OneVsOneClassifier(knn) knn_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of knn_ovo: ' + '%.3f' % knn_ovo.score(X_test, y_test)) knn_ovr = OneVsRestClassifier(knn) knn_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of knn_ovr: ' + '%.3f' % knn_ovr.score(X_test, y_test)) plot.plot_conf_matrix(X_test, y_test, knn_ovr, method_name + '_ovr') plot.plot_conf_matrix(X_test, y_test, knn_ovo, method_name + '_ovo') plot.plot_roc(X_train, X_test, y_train, y_test, knn_ovr, method_name + '_ovr')
def evaluateOneVsOne(X, Y, printReport=False): time = datetime.datetime.now() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) clf = OneVsOneClassifier(LinearSVC(random_state=0)) clf.fit(X_train, Y_train) if printReport: print 'Training time:' + str(datetime.datetime.now() - time) print 'Evaluation result: OutputCode: ' + str( clf.score(X_test, Y_test)) Y_test = clf.predict(X_test) if printReport: print '0: ' + str((Y_test == 0).sum()) print '1: ' + str((Y_test == 1).sum()) print '2: ' + str((Y_test == 2).sum()) return [clf.score(X_test, Y_test), (Y_test == 1).sum(), clf]
def multi(x,y): from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split from sklearn.cross_validation import cross_val_score from sklearn.metrics import accuracy_score from sklearn.multiclass import OneVsRestClassifier from sklearn.multiclass import OneVsOneClassifier OVR = OneVsRestClassifier(LogisticRegression()).fit(x,y) OVO = OneVsOneClassifier(LogisticRegression()).fit(x,y) print 'One vs rest accuracy: %.3f' % OVR.score(xt,yt) print 'One vs one accuracy: %.3f' % OVO.score(xt,yt)
def svcOvO(X, Y, *args): X_test, Y_test, X_train, Y_train = get_input_output(X, Y, *args) # Create classifier model = OneVsOneClassifier(SVC(random_state=seed, gamma='scale')) model.fit(X_train, Y_train) # Evaluate classifier score = model.score(X_test, Y_test) predictions = model.predict(X_test) return (predictions, score, model)
def predict(X_train, X_test, y_train, y_test): print('Start RandomForest predicting...') ran = RandomForestClassifier() ran_ovo = OneVsOneClassifier(ran) ran_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of ran_ovo: ' + '%.3f' % ran_ovo.score(X_test, y_test)) ran_ovr = OneVsRestClassifier(ran) ran_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of ran_ovr: ' + '%.3f' % ran_ovr.score(X_test, y_test))
def predict(X_train, X_test, y_train, y_test, method_name): print('Start SVM predicting...') svm_ovo = OneVsOneClassifier(SVC(kernel='rbf', probability=True)) svm_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of svm_ovo: ' + '%.3f' % svm_ovo.score(X_test, y_test)) svm_ovr = OneVsRestClassifier(SVC(kernel='rbf', probability=True)) svm_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of svm_ovr: ' + '%.3f' % svm_ovr.score(X_test, y_test)) plot.plot_conf_matrix(X_test, y_test, svm_ovo, method_name+'_ovo') plot.plot_conf_matrix(X_test, y_test, svm_ovr, method_name+'_ovr') plot.plot_roc(X_train, X_test, y_train, y_test, svm_ovr, method_name+'_ovr')
def predict(X_train, X_test, y_train, y_test, method_name): print('Start XGBoost predicting...') xgb_ovo = OneVsOneClassifier(XGBClassifier()) xgb_ovo.fit(X_train, y_train.values.ravel()) print('Accuracy score of xgb_ovo: ' + '%.3f' % xgb_ovo.score(X_test, y_test)) xgb_ovr = OneVsRestClassifier(XGBClassifier()) xgb_ovr.fit(X_train, y_train.values.ravel()) print('Accuracy score of xgb_ovr: ' + '%.3f' % xgb_ovr.score(X_test, y_test)) plot.plot_conf_matrix(X_test, y_test, xgb_ovo, method_name + '_ovo') plot.plot_conf_matrix(X_test, y_test, xgb_ovr, method_name + '_ovr') plot.plot_roc(X_train, X_test, y_train, y_test, xgb_ovr, method_name + '_ovr')
def OneVsOne_LinearSVC(X_train, y_train, X_test, y_test, PCA): C = 1 start = timeit.default_timer() model1 = OneVsOneClassifier(LinearSVC(C=C)).fit(X_train, y_train) stop = timeit.default_timer() # lin_svc = svm_model_linear_ovr.predict(X_test) start2 = timeit.default_timer() accuracy1 = model1.score(X_test, y_test) stop2 = timeit.default_timer() print('One VS One SVM accuracy :> Kernel|Linear:' + str(accuracy1 * 100), " Time Trainig : " + str(stop - start), " Time Testing : " + str(stop2 - start2)) filename = 'OneVsOne_LinearSVC.pkl' if PCA == 0: pickle.dump(model1, open("models/" + filename, 'wb')) else: pickle.dump(model1, open("modelsPCA/" + filename, 'wb'))
def OneVsOne_ploy(X_train, y_train, X_test, y_test, PCA): C = 1 start = timeit.default_timer() model = OneVsOneClassifier(SVC(kernel='poly', degree=2, C=C)).fit(X_train, y_train) stop = timeit.default_timer() # poly_svc = svm_model_linear_ovr.predict(X_test) start2 = timeit.default_timer() accuracy = model.score(X_test, y_test) stop2 = timeit.default_timer() print('One VS One SVM accuracy Kernel == Poly: ' + str(accuracy * 100), " Time Trainig : " + str(stop - start), " Time Testing : " + str(stop2 - start2)) filename = 'OneVsOne_ploy.pkl' if PCA == 0: pickle.dump(model, open("models/" + filename, 'wb')) else: pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
def OneVsOne_rbf(X_train, y_train, X_test, y_test, PCA): C = .000001 # svm_model_linear_ovr = OneVsRestClassifier(SVC(kernel='rbf', gamma=0.4, C=1).fit(X_train, y_train)) start = timeit.default_timer() model = OneVsOneClassifier(SVC(kernel='rbf', gamma=0.4, C=C)).fit(X_train, y_train) stop = timeit.default_timer() start2 = timeit.default_timer() accuracy = model.score(X_test, y_test) stop2 = timeit.default_timer() print( 'One VS One SVM accuracy Kernel == rbf Gaussian : ' + str(accuracy * 100), " Time Trainig : " + str(stop - start), " Time Testing : " + str(stop2 - start2)) filename = 'OneVsOne_rbf.pkl' if PCA == 0: pickle.dump(model, open("models/" + filename, 'wb')) else: pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
def OneVsOnelinear(X_train, y_train, X_test, y_test, PCA): C = 1 start = timeit.default_timer() model = OneVsOneClassifier(SVC(kernel='linear', C=C)).fit(X_train, y_train) stop = timeit.default_timer() start2 = timeit.default_timer() accuracy = model.score(X_test, y_test) stop2 = timeit.default_timer() print('One VS One SVM accuracy :> Kernel|Linear:' + str(accuracy * 100), " Time Trainig : " + str(stop - start), " Time Testing : " + str(stop2 - start2)) # pred_i = model.predict(X_test) # error= np.mean(pred_i != y_test) # accuracy=np.mean(pred_i == y_test) # print(accuracy*100) filename = 'OneVsOnelinear.pkl' if PCA == 0: pickle.dump(model, open("models/" + filename, 'wb')) else: pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
def multiclass_SVC(X, y): from sklearn.svm import LinearSVC from sklearn import cross_validation # first move: split data X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.35) # one-vs-rest implementation from sklearn.multiclass import OneVsRestClassifier ovr = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_train, y_train) # one-vs-all implementation from sklearn.multiclass import OneVsOneClassifier ovo = OneVsOneClassifier(LinearSVC(random_state=0)).fit(X_train, y_train) one_vs_rest = ovr.score(X_test, y_test) one_vs_one = ovo.score(X_test, y_test) return one_vs_rest, one_vs_one
# Output the hitrate and the confusion matrix for each model print("SVM: ") print(svm.score(X_train, y_train)) print(svm.score(X_test, y_test)) #print(confusion_matrix(pred, y_test)) svm2 = OneVsOneClassifier(LinearSVC(C=100.)) svm2.fit(X_train, y_train) # Make an array of predictions on the test set pred = svm2.predict(X_test) # Output the hitrate and the confusion matrix for each model print("LinearSVC: ") print(svm2.score(X_train, y_train)) print(svm2.score(X_test, y_test)) from sklearn.neighbors import KNeighborsClassifier neigh = (KNeighborsClassifier(n_neighbors=2)) neigh.fit(X_train, y_train) pred = neigh.predict(X_test) print("knn: ") print(neigh.score(X_train,y_train)) print(neigh.score(X_test,y_test)) from sklearn.ensemble import RandomForestClassifier clf = (RandomForestClassifier(n_estimators=5,max_depth=None,min_samples_split=5, random_state=15)) clf = clf.fit(X_train, y_train)
ovr.fit(X_train[:,:2], y_train) print("ovr.score:",ovr.score(X_test[:,:2],y_test)) #ovr.score: 0.6 ################################################################################ #逻辑线性回归支持多分类- OVO from sklearn.linear_model import LogisticRegression log_reg_ovo = LogisticRegression(multi_class='multinomial', solver='newton-cg') log_reg_ovo.fit(X_train[:,:2], y_train) print("log_reg_ovo.score:",log_reg_ovo.score(X_test[:,:2],y_test)) #log_reg_ovo.score: 0.8 # 逻辑线性回归OVO多分类的决策边界 plot_decision_boundary(log_reg_ovo, axis=[4, 8, 1.5, 4.5]) plt.scatter(X[y==0, 0], X[y==0, 1], color='g', label='y==0') plt.scatter(X[y==1, 0], X[y==1, 1], color='b', label='y==1') plt.scatter(X[y==2, 0], X[y==2, 1], color='r', label='y==2') plt.legend() plt.show() #OVO类,可以将任意二分类转换为多分类 from sklearn.multiclass import OneVsOneClassifier from sklearn.linear_model import LogisticRegression lr = LogisticRegression() ovo = OneVsOneClassifier(lr) ovo.fit(X_train[:,:2], y_train) print("ovo.score:",ovo.score(X_test[:,:2],y_test)) #ovo.score: 0.6333333333333333
def main(): # load data # training data data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'usps', 'zip.train'), header=None, delimiter=' ').iloc[:, :-1] y_train = data.pop(0).values X_train = data.values # test data data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'usps', 'zip.test'), header=None, delimiter=' ') y_test = data.pop(0).values X_test = data.values pca = PCA(n_components=.95) pca.fit(X_train) X_train = pca.transform(X_train) X_test = pca.transform(X_test) svm_errs = [] with tqdm(desc="Problem 1", total=len(C_VALS)) as pbar: for C in C_VALS: svm = SVC(C=C, kernel='linear', decision_function_shape='ovo') svm.fit(X_train, y_train) pbar.update(1) svm_errs.append(1 - svm.score(X_test, y_test)) lr = OVO(LR(solver='lbfgs', max_iter=5000)) lr.fit(X_train, y_train) lr_score = lr.score(X_test, y_test) err_plot([svm_errs], ["SVM"], lr=1. - lr_score, title="One vs. One Linear SVM", out='hw7/ovo_linear_svm.pdf') ovo_svm_errs = [] with tqdm(desc="Problem 2", total=len(C_VALS)) as pbar: for C in C_VALS: svm = OVO(SVC(C=C, kernel='poly', degree=3, gamma='auto')) svm.fit(X_train, y_train) pbar.update(1) ovo_svm_errs.append(1 - svm.score(X_test, y_test)) err_plot([ovo_svm_errs], ["OvO SVM"], lr=1. - lr_score, title="One vs. One Cubic SVM", out='hw7/ovo_cubic_svm.pdf') ovr_svm_errs = [] with tqdm(desc="Problem 3", total=len(C_VALS)) as pbar: for C in C_VALS: svm = OVR(SVC(C=C, kernel='poly', degree=3, gamma='auto')) svm.fit(X_train, y_train) pbar.update(1) ovr_svm_errs.append(1 - svm.score(X_test, y_test)) err_plot([ovo_svm_errs, ovr_svm_errs], ["OvO SVM", "OvR SVM"], lr=1. - lr_score, title="One vs. Rest Cubic SVM/OvO Cubic", out='hw7/ovr_cubic_svm.pdf') n = 5 # ensuring that we have at least n neighbors for all classes in the # sample while True: index = np.random.choice(X_train.shape[0], 100, replace=False) X_sample = X_train[index] y_sample = y_train[index] # can use a list comprehension to check if all([ len(X_sample[y_sample == y_i]) >= n for y_i in np.unique(y_sample) ]): break dists = [] for X_i, y_i in zip(X_sample, y_sample): X_cls = X_sample[y_sample == y_i] nbrs = NearestNeighbors(n_neighbors=n) nbrs.fit(X_cls) try: distances, _ = nbrs.kneighbors(X_i.reshape(1, -1)) except ValueError as err: raise err # nee to use reshape b/c single sample dists.append(distances[-1]) global SIGMA SIGMA = np.mean(dists) ovo_gauss_svm_errs = [] with tqdm(desc="Problem 4 (SVM)", total=len(C_VALS), file=sys.stdout) as pbar: for C in C_VALS: svm = OVO(SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA**2))) # svm = SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA ** 2), # decision_function_shape='ovo') svm.fit(X_train, y_train) score = svm.score(X_test, y_test) pbar.update(1) ovo_gauss_svm_errs.append(1 - score) knn_errs = [] with tqdm(desc="Problem 4 (kNN)", total=len(np.arange(3, 11)), file=sys.stdout) as pbar: for k in np.arange(3, 11): knn = KNeighborsClassifier(n_neighbors=k, weights=gaussian) knn.fit(X_train, y_train) pbar.update(1) knn_errs.append((k, 1 - knn.score(X_test, y_test))) err_plot([ovo_gauss_svm_errs], ["OvO SVM"], knn=knn_errs, title="One vs. One Gaussian SVM with kNN", out='hw7/ovo_gaussian_svm_knn.pdf') ovr_gauss_svm_errs = [] with tqdm(desc="Problem 5", total=len(C_VALS), file=sys.stdout) as pbar: for C in C_VALS: svm = OVR(SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA**2))) # svm = SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA ** 2), # decision_function_shape='ovr') svm.fit(X_train, y_train) score = svm.score(X_test, y_test) pbar.update(1) ovr_gauss_svm_errs.append(1 - score) err_plot([ovr_gauss_svm_errs], ["OvR SVM"], knn=knn_errs, title="One vs. Rest Gaussian SVM with kNN", out='hw7/ovr_gaussian_svm_knn.pdf') err_plot([ svm_errs, ovo_svm_errs, ovr_svm_errs, ovo_gauss_svm_errs, ovr_gauss_svm_errs ], [ "Linear SVM", "OvO Cubic SVM", "OvR Cubic SVM", "OvO Gaussian SVM", "OvR Gaussian SVM" ], lr=1. - lr_score, knn=knn_errs, title="Multiclass SVM Kernels", out='hw7/all_svm_knn.pdf') min_idx = np.argmin(svm_errs) min_lin_err = svm_errs[min_idx] min_lin_c = np.log2(C_VALS[min_idx]) print("Min Linear SVM Error = {0:.4f}".format(min_lin_err)) print("Min Linear SVM log2(C) = {0}".format(min_lin_c)) print("LR Error = {0:.4f}".format(1. - lr_score)) min_idx = np.argmin(ovo_svm_errs) min_lin_err = ovo_svm_errs[min_idx] min_lin_c = np.log2(C_VALS[min_idx]) print("Min OvO Cubic SVM Error = {0:.4f}".format(min_lin_err)) print("Min OvO Cubic SVM log2(C) = {0}".format(min_lin_c)) min_idx = np.argmin(ovr_svm_errs) min_lin_err = ovr_svm_errs[min_idx] min_lin_c = np.log2(C_VALS[min_idx]) print("Min OvR Cubic SVM Error = {0:.4f}".format(min_lin_err)) print("Min OvR Cubic SVM log2(C) = {0}".format(min_lin_c)) min_idx = np.argmin(knn_errs) min_lin_k, min_lin_err = knn_errs[min_idx] print("Min kNN Error = {0:.4f}".format(min_lin_err)) print("Min kNN log2(C) = {0}".format(min_lin_k)) min_idx = np.argmin(ovo_gauss_svm_errs) min_lin_err = ovo_gauss_svm_errs[min_idx] min_lin_c = np.log2(C_VALS[min_idx]) print("Min OvO Gaussian SVM Error = {0:.4f}".format(min_lin_err)) print("Min OvO Gaussian SVM log2(C) = {0}".format(min_lin_c)) min_idx = np.argmin(ovr_gauss_svm_errs) min_lin_err = ovr_gauss_svm_errs[min_idx] min_lin_c = np.log2(C_VALS[min_idx]) print("Min OvR Gaussian SVM Error = {0:.4f}".format(min_lin_err)) print("Min OvR Gaussian SVM log2(C) = {0}".format(min_lin_c)) print("sigma = {0:.4f}".format(SIGMA))
# dividing X, y into train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) log.write(str(len(X_train)) + "\n" + str(len(X_test)) + "\n") svm_model_ovr_rbf = OneVsRestClassifier(SVC(kernel='rbf', C=1)).fit(X_train, y_train) svm_model_ovr_rbf_predictions = svm_model_ovr_rbf.predict(X_test) # model accuracy for X_test accuracy = svm_model_ovr_rbf.score(X_test, y_test) # creating a confusion matrix # conf_matrix = confusion_matrix(y_test, svm_predictions) log.write("Accuracy svm_model_ovr_rbf: " + str(accuracy) + "\n") # log.write(str(conf_matrix)) svm_model_ovo_linear = OneVsOneClassifier(SVC(kernel='linear', C=1)).fit(X_train, y_train) svm_model_linear_predictions = svm_model_ovo_linear.predict(X_test) accuracy = svm_model_ovo_linear.score(X_test, y_test) log.write("Accuracy svm_model_ovo_linear: " + str(accuracy) + "\n") # creating a confusion matrix # conf_matrix = confusion_matrix(y_test, svm_predictions) # log_reg_model_ovr = OneVsRestClassifier(linear_model.SGDClassifier(max_iter = 1000, tol = 1e-3)).fit(X_train, y_train) # accuracy = log_reg_model_ovr.score(X_test, y_test) # log.write("Accuracy log_reg_model_ovr: " + str(accuracy) + "\n")
# -*- coding: utf-8 -*- """ Created on Tue Jun 25 12:45:17 2019 @author: mfatemeh """ import numpy as np from sklearn.datasets import load_digits dataset = load_digits() X = dataset.data y = dataset.target from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) from sklearn.linear_model import Perceptron from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier ovsr = OneVsRestClassifier(Perceptron()) ovso = OneVsOneClassifier(Perceptron()) ovsr.fit(X_train, y_train) ovsr.score(X_test, y_test) ovso.fit(X_train, y_train) ovso.score(X_test, y_test)
tr_feat = tr[:,1:] ts_feat = ts[:,1:] tr_label = tr[:,0] ts_label = ts[:,0] # use sklearn C-Support Vector Classification ## == one-vs-one == ## # The multiclass support is handled in a one-vs-one scheme # train ovo_clf = OneVsOneClassifier(LinearSVC()) ovo_clf.fit(tr_feat, tr_label) # predict ovo_pred = ovo_clf.predict(ts_feat) ovo_err = 1- ovo_clf.score(ts_feat, ts_label) # confusion matrix # #array([[159, 7], # [ 5, 161]]) ovo_cmat = metrics.confusion_matrix(ts_label, ovo_pred) pred_total = np.sum(ovo_cmat,axis = 1) ovo_mis = 1- np.diag(ovo_cmat).astype(float) / pred_total print("one vs. one svm - classification err: %s \n"%(ovo_err)) print("confusion matrix: \n %s"%(ovo_cmat)) print("class misclassification rate : \n %s"%(ovo_mis)) ## == one-vs-rest == ## # The multiclass support is handled in a one-vs-rest scheme # train ovr_clf = OneVsRestClassifier(LinearSVC())
boundary(pipe2, [4, 8.5, 1.5, 4.5]) plt.scatter(X[y == 0, 0], X[y == 0, 1]) plt.scatter(X[y == 1, 0], X[y == 1, 1]) plt.scatter(X[y == 2, 0], X[y == 2, 1]) # 任意模型多分类器-------------------------------------------------------------------------------------------------------- X = iris.data y = iris.target # OneVsRestClassifier ovr = OneVsRestClassifier(log_reg) ovr.fit(X_train, y_train) print("ovr =", ovr.score(X_test, y_test)) # OneVsOneClassifier ovo = OneVsOneClassifier(log_reg) ovo.fit(X_train, y_train) print("ovo =", ovo.score(X_test, y_test)) plt.show() ''' 参数含义: 1.penalty:字符串,指定了正则化策略。默认为"l2" (1)如果为"l2",则优化的目标函数为:0.5*||w||^2_2+C*L(w),C>0, L(w)为极大似然函数。 (2)如果为"l1",则优化的目标函数为||w||_1+C*L(w),C>0, L(w)为极大似然函数。 2.dual:布尔值。默认为False。如果等于True,则求解其对偶形式。 只有在penalty="l2"并且solver="liblinear"时才有对偶形式。如果为False,则求解原始形式。 当n_samples > n_features,偏向于dual=False。 3.tol:阈值。判断迭代是否收敛或者是否满足精度的要求。 4.C:float,默认为1.0.指定了正则化项系数的倒数。必须是一个正的浮点数。他的值越小,正则化项就越大。 5.fit_intercept:bool值。默认为True。如果为False,就不会计算b值。
# TEST APPRENTISSAGE MULTICLASSE ################################################################################ # ONE VS ONE #Ici j'utilise les données multiclasses print("ovo:\n") ovo = OneVsOneClassifier(SVC(C=10, gamma=0.01, kernel="rbf")) ovo.fit(datax, datay) p = ovo.predict(datatestx) print(classification_report(datatesty, p)) accuracytrain = [] accuracytest = [] for i in range(0, 10): accuracytrain.append(ovo.score(datax, datay) * 100) accuracytest.append(ovo.score(datatestx, datatesty) * 100) print("Erreur moyenne : train %f, test %f" % (np.mean(accuracytrain), np.mean(accuracytest))) s #ONE VS REST print("ovr:\n") ovr = OneVsRestClassifier(SVC(C=10, gamma=0.01, kernel="rbf")) ovr.fit(datax, datay) p = ovr.predict(datatestx) print(classification_report(datatesty, p)) accuracytrain = [] accuracytest = [] for i in range(0, 10): accuracytrain.append(ovr.score(datax, datay) * 100)
svm_train = OneVsOneClassifier(svm_train_original) print("svm(One vs One):") else: svm_train = OneVsRestClassifier(svm_train_original) print("svm(One vs Rest):") #LSI svm_train.fit(train_LSI_array, train_data.target) test_result = svm_train.predict(test_LSI_array) LSI_precision = precision_score(test_data.target, test_result, average='weighted') LSI_recall = recall_score(test_data.target, test_result, average='weighted') LSI_confusionMatrix = confusion_matrix(test_data.target, test_result) LSI_accuracy = svm_train.score(test_LSI_array, test_data.target) #NMF svm_train.fit(train_NMF_array, train_data.target) test_result = svm_train.predict(test_NMF_array) NMF_precision = precision_score(test_data.target, test_result, average='weighted') NMF_recall = recall_score(test_data.target, test_result, average='weighted') NMF_confusionMatrix = confusion_matrix(test_data.target, test_result) NMF_accuracy = svm_train.score(test_NMF_array, test_data.target) print("accuracy with LSI is ", LSI_accuracy) print("precision with LSI is ", LSI_precision)
Y_laban = pickle.load( open( "Y_Laban", "r" ) ) X, y = np.array(X), np.array(y) baseClf = AdaBoostClassifier() clf = OneVsOneClassifier(baseClf) from sklearn import cross_validation n=1 rs = cross_validation.ShuffleSplit(len(y), n_iter=n, test_size=.1, random_state=0) res = [] resMixed = [] resLaban = [] for train_index, test_index in rs: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] Y_laban_train, Y_laban_test = Y_laban[train_index], Y_laban[test_index] clf.fit(X_train, y_train)#, sample_weight) r = clf.score(X_test, y_test) res.append(r) labanClf, selectedIndices = labanUtil.getMultiTaskclassifier(X_train, Y_laban_train) X_train_transformed = transform(X_train, selectedIndices) #X_train_laban = [] X_train_mixed = [] for x in X_train_transformed: labans = labanClf.predict(x) newVec = np.concatenate((x, labans)) X_train_mixed.append(newVec) #X_train_laban.append(labans) #X_train_laban=np.array(X_train_laban) X_test_transformed = transform(X_test, selectedIndices) X_test_laban = []
recall_score(y_train_5, y_train_pred_forest) # 10 классов---------------------------------------------------------------------- sgd_clf.fit(X_train, y_train) sgd_clf.predict([X_train[25001]]) sgd_clf.score(X_train, y_train) some_digit_scores = sgd_clf.decision_function([some_digit]) np.argmax(some_digit_scores) from sklearn.multiclass import OneVsOneClassifier ovo_clf = OneVsOneClassifier(SGDClassifier(random_state=42)) ovo_clf.fit(X_train, y_train) ovo_clf.predict([some_digit]) ovo_clf.score(X_train, y_train) forest_clf.fit(X_train, y_train) forest_clf.predict([some_digit]) forest_clf.predict_proba([some_digit]) forest_clf.score(X_train, y_train) sgd_clf.score(X_test, y_test) ovo_clf.score(X_test, y_test) forest_clf.score(X_test, y_test) cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring='accuracy') cross_val_score(ovo_clf, X_train, y_train, cv=3, scoring='accuracy') cross_val_score(forest_clf, X_train, y_train, cv=3, scoring='accuracy') from sklearn.preprocessing import StandardScaler
print("################ Classificação OVO ###########################") print("### Menu: ###") print("1- All numbers;") print("2- Choose numbers;") op = input("Pick an option: ") while (finish == False): if (op == "1"): start_time = time.time() OVO = OneVsOneClassifier(LogisticRegressionCV()) OVO.fit(x_train, y_train) predictionsOVO = OVO.predict(x_test) scoreOVO = OVO.score(x_test, y_test) print(scoreOVO) cmOVO = metrics.confusion_matrix(y_test, predictionsOVO) plt.figure(figsize=(9, 9)) sns.heatmap(cmOVO, annot=True, fmt=".3f", linewidths=.5, square=True, cmap='Blues_r') plt.ylabel('Actual label') plt.xlabel('Predicted label') all_sample_title = 'Accuracy Score: {0}'.format(scoreOVO) plt.title(all_sample_title, size=15)
tfidf_test_reduced = svd.transform(tfidf_test) svm_test_data = tfidf_test_reduced svm_test_tag = test.target #for i in test.target: # if(i < 4): # svm_test_tag.append(-1) # else: # svm_test_tag.append(1) svc = SVC(kernel='linear',C = 100) svc_ovoc=OVOC(svc) svc_ovoc.fit(svm_train_data, svm_train_tag) svc_ovoc_predict=svc_ovoc.predict(svm_test_data) #precision, recall, thresholds = precision_recall_curve(svm_test_tag, svc_ovoc_predict) #BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True) score=svc_ovoc.score(svm_test_data,svm_test_tag) precision = precision_score(svm_test_tag, svc_ovoc_predict, average = 'weighted') recall = recall_score(svm_test_tag, svc_ovoc_predict, average = 'weighted') print "1 VS 1 SVC" print "confusion matrix:","\n",confusion_matrix(svm_test_tag, svc_ovoc_predict) print "score=",score print "precision=", precision print "recall=", recall print '\n' svc = SVC(kernel='rbf',C = 100) svc_ovrc=OVRC(svc) svc_ovrc.fit(svm_train_data, svm_train_tag) svc_ovrc_predict=svc_ovrc.predict(svm_test_data) #precision, recall, thresholds = precision_recall_curve(svm_test_tag, svc_ovoc_predict) #BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True)
#y = np.array([0, 0, 1, 1, 2, 2, 3, 3]) print('start at %s' % startTime) print('start training...') clf = OneVsOneClassifier(LinearSVC(random_state = 0)) #clf = OneVsRestClassifier(LinearSVC(random_state = 0)) clf = clf.fit(X_train, y_train) print(clf.get_params()) #joblib.dump(clf, modelPath) # save the trained model #lists =[[5, -1], [-2, -6], [2,1], [-2, 5]] #test = np.array(lists) #test_label = np.array([3, 2, 0, 1]) print("start predicting...") #clf = joblib.load(modelPath) # load the model score = clf.score(X_test, y_test) print('accuracy is {0}'.format(score)) #============================================================================== # count = 0 # predictions = clf.predict(X_test) # lens = len(predictions) # for i in xrange(lens): # if predictions[i] == y_test[i]: # count +=1 # print('accuracy is %f' % (float(count) / lens )) #============================================================================== endTime = time.ctime() end = time.time() print("start at %s, end at %s" % (startTime, endTime)) print("consume ", (end - start))
print(log_reg2.score(X_test, y_test)) plot_decision_boundary(log_reg2, axis=[4, 8.5, 1.5, 4.5]) plt.scatter(X[y == 0, 0], X[y == 0, 1]) plt.scatter(X[y == 1, 0], X[y == 1, 1]) plt.scatter(X[y == 2, 0], X[y == 2, 1]) plt.show() # 使用所有数据 X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) # OvR log_reg = LogisticRegression() log_reg.fit(X_train, y_train) print(log_reg.score(X_test, y_test)) # OvO log_reg2 = LogisticRegression(multi_class="multinomial", solver="newton-cg") log_reg2.fit(X_train, y_train) print(log_reg2.score(X_test, y_test)) # sklearn 关于 OvR 和 OvO 的其他支持 ovr = OneVsRestClassifier(log_reg) ovr.fit(X_train, y_train) print(ovr.score(X_test, y_test)) ovo = OneVsOneClassifier(log_reg) ovo.fit(X_train, y_train) print(ovo.score(X_test, y_test))
all_sample_title = 'Accuracy Score: {0}'.format(scoreOVA) plt.title(all_sample_title, size = 15); ''' finish_time_OVA = time.time() - start_time_OVA ################################################################################################### ################################# OVO ############################################################# start_time_OVO = time.time() OVO = OneVsOneClassifier(LogisticRegressionCV()) OVO.fit(x_train_thres, y_train_thres) predictionsOVO = OVO.predict(x_test_thres) scoreOVO = OVO.score(x_test_thres, y_test_thres) ''' cmOVO = metrics.confusion_matrix(y_test_thres, predictionsOVO) plt.figure(figsize=(9,9)) sns.heatmap(cmOVO, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r'); plt.ylabel('Actual label'); plt.xlabel('Predicted label'); all_sample_title = 'Accuracy Score: {0}'.format(scoreOVO) plt.title(all_sample_title, size = 15); ''' finish_time_OVO = time.time() - start_time_OVO ################################################################################################### ################################# DICOTOMIA #######################################################
print(">>>> Loading finished") feature_vec = np.zeros((len(data), kmeans.n_clusters)) for i in range(len(data)): mydata = data[i] # mydata = pca.transform(mydata) feature_seq = kmeans.predict(mydata) for j in feature_seq: feature_vec[i][feature_seq[j]] += 1 feature_vec = normalize(feature_vec) train_x, test_x, train_y, test_y = \ train_test_split(feature_vec, all_y, test_size = 1-train_ratio) print(feature_vec.shape) print(">>>> Data prepared") # for alpha_ in [0.1, 0.01, 0.02, 0.03, 0.05, 0.008, 0.009, 0.006, 0.005]: for alpha_ in [0.0001]: clf = OneVsOneClassifier(linear_model.SGDClassifier(alpha = alpha_, n_iter=150000, shuffle=True), n_jobs=4) clf.fit(train_x, train_y) print(" alpha", alpha_) print(" train score", clf.score(train_x, train_y)) print(" test score", clf.score(test_x, test_y)) print(clf) pred_y = clf.predict(test_x) print(test_x[:2,:5]) print(pred_y)
for slip in slips: train = [] label = [] for category in label_dict.keys(): X, Y = allSamples(path, category, label_dict, order, window, slip) if train == []: train = X label = Y else: train = np.concatenate((train, X), 0) label = np.concatenate((label, Y), 0) #train = dim_reduction_PCA(train,0.999) X_train, X_test, Y_train, Y_test = train_test_split(train, label, test_size=0.4, random_state=42) C = 1.0 multiclassifier = OneVsOneClassifier(svm.SVC(kernel="rbf",gamma=0.7,C=C)).fit(X_train, Y_train) score.append(multiclassifier.score(X_test, Y_test)) # svc = svm.SVC(kernel='linear', C=C).fit(X_train, Y_train) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X_train, Y_train) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X_train, Y_train) # print score # plt.figure("Score-order") # plt.plot(slips,score) # plt.show() #Use validate data to test the model. path = "./" label_dict = {"test":0} X_test,Y_test = allSamples(path,"test",label_dict,order,window,slip) #print X_test,Y_test print multiclassifier.predict(X_test)
def main(params_file, output_dir, output_code, datasets, norm_type, labels_file, spca_file, **kwargs): # Load data from each dataset data_objects = [] specimen_ids_list = [] imp = SimpleImputer( missing_values=0, strategy='mean', copy=False, ) for ds in datasets: if len(ds["limit_to_cortical_layers"]) == 0: limit_to_cortical_layers = None else: limit_to_cortical_layers = ds["limit_to_cortical_layers"] data_for_spca, specimen_ids = ld.load_h5_data( h5_fv_file=ds["fv_h5_file"], metadata_file=ds["metadata_file"], dendrite_type=ds["dendrite_type"], need_structure=not ds["allow_missing_structure"], include_dend_type_null=ds["allow_missing_dendrite"], limit_to_cortical_layers=limit_to_cortical_layers, id_file=ds["id_file"], params_file=params_file) for l, m in data_for_spca.items(): if type(m) == np.ndarray: nu_m = np.nan_to_num(m) p = np.nonzero(nu_m[:, :])[1] p = max(p) nu_m = nu_m[:, :p] print(l) print(p) nu_m = imp.fit_transform(nu_m) data_for_spca[l] = normalize_ds(nu_m, norm_type) data_objects.append(data_for_spca) specimen_ids_list.append(specimen_ids) specimen_ids = np.hstack(specimen_ids_list) data_for_spca = {} for i, do in enumerate(data_objects): for k in do: if k not in data_for_spca: data_for_spca[k] = do[k] else: data_for_spca[k], do[k] = equal_ar_size( data_for_spca[k], do[k], k, i) data_for_spca[k] = np.vstack([data_for_spca[k], do[k]]) ##Outlier Elim? #specimen_ids, data_for_spca = outlierElim(specimen_ids, data_for_spca) df_s = pd.read_csv(spca_file, index_col=0) first_key = list(data_for_spca.keys())[0] if len(specimen_ids) != data_for_spca[first_key].shape[0]: logging.error( "Mismatch of specimen id dimension ({:d}) and data dimension ({:d})" .format(len(specimen_ids), data_for_spca[first_key].shape[0])) labels = pd.read_csv(labels_file, index_col=0) print(labels) print(labels.values) uni_labels = np.unique(labels.values) ids_list = labels.index.values if labels.shape[0] == ids_list.shape[0]: print("Same Ids loaded... Proceeding") logging.info("Proceeding with %d cells", len(specimen_ids)) for p in data_for_spca: labels_means = pd.DataFrame() arr_data = data_for_spca[p] for x in uni_labels: indx = np.where(labels['0'] == x)[0] row, col = arr_data[indx].shape n_co = np.full(col, row) mean = pd.Series(data=np.mean(arr_data[indx], axis=0), name=('Cluster ' + str(x) + ' mean')) std = pd.Series(data=np.std(arr_data[indx], axis=0), name=('Cluster ' + str(x) + ' std')) n = pd.Series(data=n_co, name=('Cluster ' + str(x) + ' n')) labels_means = labels_means.append(mean, ignore_index=True) labels_means = labels_means.append(std, ignore_index=True) labels_means = labels_means.append(n, ignore_index=True) labels_means.to_csv(output_fld + p + '_cluster_mean.csv') train_df, test_df, labels_2, _ = train_test_split(df_s, labels) rf = RandomForestClassifier(n_estimators=500, oob_score=True, random_state=0) #per = multiclass.OneVsOneClassifier(RandomForestClassifier(n_estimators=500, oob_score=True, # random_state=0), n_jobs=-1).fit(train_df.values, labels.to_numpy().flatten()) rf.fit(train_df.values, labels_2.to_numpy().flatten()) logging.info("OOB score: {:f}".format(rf.oob_score_)) pred_labels = rf.predict(test_df.values) feat_import = rf.feature_importances_ print(rf.oob_score_) logging.debug("Saving results") #pd.DataFrame(pred_labels, index=test_df.index.values).to_csv('rf_predictions.csv') pd.DataFrame(feat_import).to_csv('rf_feat_importance.csv') ### Now compute for labeled data train_ind = np.where(labels['0'] > -1)[0] labeled = labels.iloc[train_ind] labeled_df_s = df_s.iloc[train_ind] train_df, test_df, labels_2, labels_3 = train_test_split( labeled_df_s, labeled) clf1 = LogisticRegression(random_state=1, max_iter=1000) clf2 = RandomForestClassifier(n_estimators=500, random_state=1) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', n_jobs=-1) eclf.fit(train_df, np.ravel(labels_2.values)) fit_score = eclf.score(test_df, np.ravel(labels_3.values)) print(fit_score) params = { 'lr__C': np.linspace(1.0, 1000.0, 10), 'rf__n_estimators': np.linspace(20, 1000, 10, dtype=np.int64) } grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5, n_jobs=-1, verbose=1) grid.fit(train_df, np.ravel(labels_2.values)) fit_score = grid.score(test_df, np.ravel(labels_3.values)) print("grid search params") print(fit_score) grid_CV = grid.best_estimator_ full_acc = np.arange(15, dtype=np.float64) PARAMS = grid.best_estimator_ for i, a in enumerate(full_acc): train_df, test_df, labels_2, labels_3 = train_test_split( labeled_df_s, labeled, test_size=0.6, train_size=0.28) clf1 = LogisticRegression(random_state=1, max_iter=1000) clf2 = RandomForestClassifier(n_estimators=500, random_state=1) clf3 = GaussianNB() eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', n_jobs=-1) eclf.fit(train_df, np.ravel(labels_2.values)) full_acc[i] = eclf.score(test_df, np.ravel(labels_3.values)) np.savetxt('full_acc.csv', full_acc, delimiter=",", fmt='%12.5f') _, _, pvalue = permutation_test_score(grid_CV, train_df, np.ravel(labels_2.values), n_jobs=-1) print("pvalue: " + str(pvalue)) fclf = OneVsOneClassifier(grid, n_jobs=-1) fclf.fit(train_df, np.ravel(labels_2.values)) fit_score = fclf.score(test_df, np.ravel(labels_3.values)) y_pred = fclf.predict(test_df) print(fit_score) print(metrics.classification_report(y_pred, np.ravel(labels_3.values))) pred_labels = fclf.predict(df_s.values) pd.DataFrame(pred_labels, index=df_s.index.values).to_csv('full_predictions.csv') feat_import_by_label = np.hstack((0, np.full(feat_import.shape[0], np.nan))) for i in permutations(uni_labels, 2): indx_1 = np.where((labels['0'] == i[0]))[0] indx_2 = np.where((labels['0'] == i[1]))[0] indx = np.hstack((indx_1, indx_2)) if indx.shape[0] >= 100: print(indx.shape[0]) df_s_temp = df_s.iloc[indx] labels_s_temp = labels.iloc[indx] train_df, test_df, labels_2, _ = train_test_split( df_s_temp, labels_s_temp) rf = RandomForestClassifier(n_estimators=500, oob_score=True, random_state=0) #per = multiclass.OneVsOneClassifier(RandomForestClassifier(n_estimators=500, oob_score=True, # random_state=0), n_jobs=-1).fit(train_df.values, labels.to_numpy().flatten()) rf.fit(train_df.values, labels_2.to_numpy().flatten()) logging.info("OOB score: {:f}".format(rf.oob_score_)) pred_labels = rf.predict(test_df.values) feat_import = rf.feature_importances_ print(str(i) + ' ' + str(rf.oob_score_)) logging.debug("Saving results") feat_import_by_label = np.vstack( (feat_import_by_label, np.hstack((str(i), np.ravel(feat_import))))) del rf pd.DataFrame(feat_import_by_label).to_csv(output_fld + 'label_rf_feat_importance.csv') logging.info("Done.")
train_data_resampled, train_label_resampled = smote.fit_sample( rus_data, rus_label) # 学習する clf = OneVsOneClassifier( RandomForestClassifier(random_state=0, n_estimators=500)) clf.fit(train_data_resampled, train_label_resampled) # 評価する predict = clf.predict(test_data) rate_sum = 0 for i in range(len(test_label)): t = int(test_label.iloc[i]) p = int(predict[i]) rate_sum += int(min(t, p) / max(t, p) * 100) print(rate_sum / len(test_label)) print('Train score: {:.4f}'.format(clf.score(train_data, train_label))) print('Test score: {:.4f}'.format(clf.score(test_data, test_label))) print('Confusion matrix:\n{}'.format( confusion_matrix(test_label, clf.predict(test_data)))) print('Accuracy score: {:.4f}'.format( accuracy_score(test_label, clf.predict(test_data)))) print('Precision score: {}'.format( precision_score(test_label, clf.predict(test_data), average=None))) print('Recall score: {}'.format( recall_score(test_label, clf.predict(test_data), average=None))) print('f1 score: {}'.format( f1_score(test_label, clf.predict(test_data), average=None)))
# 学習する clf = OneVsOneClassifier(RandomForestClassifier(n_estimators=500)) clf.fit(train_data_resampled, train_label_resampled) # 評価する predict = clf.predict(test_data) rate_sum = 0 #for i in range(len(test_label)): # t = int(test_label.iloc[i]) # p = int(predict[i]) # rate_sum += int(min(t, p) / max(t, p) * 100) #print(rate_sum / len(test_label)) print('{:.4f}'.format(clf.score(train_data_resampled, train_label_resampled))) #Train score print('{:.4f}'.format(clf.score(test_data, test_label))) #Test score confusion = confusion_matrix(test_label, clf.predict(test_data)) confusion0 = ' '.join(map(str, confusion[0])) confusion1 = ' '.join(map(str, confusion[1])) confusion2 = ' '.join(map(str, confusion[2])) print('%s %s %s ' % (confusion0, confusion1, confusion2)) print('{:.4f}'.format(accuracy_score(test_label, clf.predict(test_data)))) precision = ' '.join( map(str, (precision_score(test_label, clf.predict(test_data), average=None)))) recall_score = ' '.join( map(str, (recall_score(test_label, clf.predict(test_data), average=None)))) f1_score = ' '.join(
# print("\n ,1versus rest logistic, ") # print(" %f" % linOnevOne.score(X_train, y_train)) # print(", %f" % linOnevOne.score(X_test, y_test)) # # for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)): # # print(x, y[x], "vs ", linOnevOne.predict((X.loc[x]).values.reshape(1,-1))) # sys.stdout.flush() #add a second layer of 10 to get perfect mlpMC = MLPClassifier(hidden_layer_sizes=(100, 100), random_state=4, max_iter=5000) onevOne = OneVsOneClassifier(mlpMC) onevOne.fit(X_train, y_train) print("\n ,1versus1 MLP, ") print(" %f" % onevOne.score(X_train, y_train)) print(", %f" % onevOne.score(X_test, y_test)) # for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)): # print(x, y[x], "vs ", onevOne.predict((X.loc[x]).values.reshape(1,-1))) sys.stdout.flush() # #class sklearn.svm.SVR(kernel=’rbf’, degree=3, gamma=’auto’, coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1) # sv1 = SVC(kernel = 'linear') # onevOne = OneVsOneClassifier(sv1) # onevOne.fit(X_train, y_train) # print("\n ,1versus1 SVM, ") # print("%f" % onevOne.score(X_train, y_train)) # print(", %f" % onevOne.score(X_test, y_test)) # # for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)): # # print(x, y[x], "vs ", sv1.predict((X.loc[x]).values.reshape(1,-1)))
#v.fit(doc_train + doc_test) v.fit(doc_train) doc_train = v.transform(doc_train) doc_test = v.transform(doc_test) v = None print(time.process_time() - pt) print("Training data shape ", doc_train.shape) print("Training...", end=" ") sys.stdout.flush() pt = time.process_time() #n_estimators = 27 m = OneVsOneClassifier(svm.LinearSVC(dual=False)) #m = OneVsOneClassifier(BaggingClassifier(svm.SVC(kernel="poly",degree=2), max_samples=1.0 / n_estimators, n_estimators=n_estimators, bootstrap=False, n_jobs=4, verbose=2)) #m = svm.SVC(kernel="poly",degree=2) #m = MultinomialNB() #scores = cross_validation.cross_val_score(m, doc_train, y_train, cv=5, n_jobs=5, verbose=1) #print(scores) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print(m.fit(doc_train, y_train)) print(time.process_time() - pt) print("Testing... ") pt = time.process_time() print(m.score(doc_train, y_train)*100.0) print(m.score(doc_test, y_test)*100.0) print("Testing took {} seconds.".format(time.process_time() - pt))
} names = [ "Twin SVM", "Twin SVM with RBF Kernel", "Twin SVM RBF Kernel with fuzzy membership" ] classifiers = [ TwinSVMClassifier(**params1), TwinSVMClassifier(**params2), TwinSVMClassifier(**params3), ] X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1, n_classes=3) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) for name, clf in zip(names, classifiers): clf = OneVsOneClassifier(clf).fit(X_train, y_train) # or OneVsRestClassifier score = clf.score(X_test, y_test) print(score)
from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) ##OvO和OvR方法1 from sklearn.linear_model import LogisticRegression log_reg = LogisticRegression() #默认OvR log_reg.fit(X_train, y_train) log_reg.score(X_test, y_test) log_reg2 = LogisticRegression(multi_class='multinomial', solver='newton-cg') #调用OvO log_reg2.fit(X_train, y_train) log_reg2.score(X_test, y_test) ##OvO和OvR方法2 from sklearn.multiclass import OneVsRestClassifier ovr = OneVsRestClassifier(log_reg) ovr.fit(X_train, y_train) ovr.score(X_test, y_test) from sklearn.multiclass import OneVsOneClassifier ovo = OneVsOneClassifier(log_reg) ovo.fit(X_train, y_train) ovo.score(X_test, y_test)