Пример #1
0
def clasificar_OVO(X, y, df, trainInputs, trainOutputs, testInputs, testOutputs, graphname):
	print("\n[" + str(graphname) + "]")
	clfBase=DecisionTreeClassifier()
	scores = cross_val_score(clfBase, X, y, cv=10)
	clf=OneVsOneClassifier(clfBase)
	clf=clf.fit(trainInputs, trainOutputs)
	precisionTrain = clf.score(trainInputs, trainOutputs)
	precisionTest = clf.score(testInputs, testOutputs)
	print("\tCCR train = %.2f%% | CCR test = %.2f%%" % (precisionTrain*100, precisionTest*100))
	prediccion_test = clf.predict(testInputs)
	print(prediccion_test)
	print(testOutputs)
	return precisionTest
def predict(X_train, X_test, y_train, y_test, k, method_name):

    print('Start knn predicting...')

    knn = neighbors.KNeighborsClassifier(n_neighbors=k,
                                         weights='distance',
                                         algorithm='auto',
                                         leaf_size=30,
                                         p=2,
                                         metric='minkowski',
                                         metric_params=None,
                                         n_jobs=-1)
    knn_ovo = OneVsOneClassifier(knn)
    knn_ovo.fit(X_train, y_train.values.ravel())
    print('Accuracy score of knn_ovo: ' +
          '%.3f' % knn_ovo.score(X_test, y_test))

    knn_ovr = OneVsRestClassifier(knn)
    knn_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of knn_ovr: ' +
          '%.3f' % knn_ovr.score(X_test, y_test))

    plot.plot_conf_matrix(X_test, y_test, knn_ovr, method_name + '_ovr')
    plot.plot_conf_matrix(X_test, y_test, knn_ovo, method_name + '_ovo')
    plot.plot_roc(X_train, X_test, y_train, y_test, knn_ovr,
                  method_name + '_ovr')
Пример #3
0
 def evaluateOneVsOne(X, Y, printReport=False):
     time = datetime.datetime.now()
     X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                         Y,
                                                         test_size=0.2,
                                                         random_state=42)
     clf = OneVsOneClassifier(LinearSVC(random_state=0))
     clf.fit(X_train, Y_train)
     if printReport:
         print 'Training time:' + str(datetime.datetime.now() - time)
         print 'Evaluation result: OutputCode: ' + str(
             clf.score(X_test, Y_test))
     Y_test = clf.predict(X_test)
     if printReport:
         print '0: ' + str((Y_test == 0).sum())
         print '1: ' + str((Y_test == 1).sum())
         print '2: ' + str((Y_test == 2).sum())
     return [clf.score(X_test, Y_test), (Y_test == 1).sum(), clf]
Пример #4
0
def multi(x,y):
	from sklearn.linear_model import LogisticRegression
	from sklearn.cross_validation import train_test_split
	from sklearn.cross_validation import cross_val_score
	from sklearn.metrics import accuracy_score
	from sklearn.multiclass import OneVsRestClassifier
	from sklearn.multiclass import OneVsOneClassifier
	OVR = OneVsRestClassifier(LogisticRegression()).fit(x,y)
	OVO = OneVsOneClassifier(LogisticRegression()).fit(x,y)
	print 'One vs rest accuracy: %.3f' % OVR.score(xt,yt)
	print 'One vs one accuracy: %.3f' % OVO.score(xt,yt)
Пример #5
0
def svcOvO(X, Y, *args):
    X_test, Y_test, X_train, Y_train = get_input_output(X, Y, *args)
    # Create classifier
    model = OneVsOneClassifier(SVC(random_state=seed, gamma='scale'))
    model.fit(X_train, Y_train)

    # Evaluate classifier
    score = model.score(X_test, Y_test)
    predictions = model.predict(X_test)

    return (predictions, score, model)
def predict(X_train, X_test, y_train, y_test):
    print('Start RandomForest predicting...')

    ran = RandomForestClassifier()
    ran_ovo = OneVsOneClassifier(ran)
    ran_ovo.fit(X_train, y_train.values.ravel())
    print('Accuracy score of ran_ovo: ' +
          '%.3f' % ran_ovo.score(X_test, y_test))

    ran_ovr = OneVsRestClassifier(ran)
    ran_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of ran_ovr: ' +
          '%.3f' % ran_ovr.score(X_test, y_test))
Пример #7
0
def predict(X_train, X_test, y_train, y_test, method_name):
    print('Start SVM predicting...')

    svm_ovo = OneVsOneClassifier(SVC(kernel='rbf', probability=True))
    svm_ovo.fit(X_train, y_train.values.ravel())

    print('Accuracy score of svm_ovo: ' + '%.3f' %
          svm_ovo.score(X_test, y_test))

    svm_ovr = OneVsRestClassifier(SVC(kernel='rbf', probability=True))
    svm_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of svm_ovr: ' + '%.3f' %
          svm_ovr.score(X_test, y_test))

    plot.plot_conf_matrix(X_test, y_test, svm_ovo, method_name+'_ovo')
    plot.plot_conf_matrix(X_test, y_test, svm_ovr, method_name+'_ovr')
    plot.plot_roc(X_train, X_test, y_train, y_test,
                  svm_ovr, method_name+'_ovr')
def predict(X_train, X_test, y_train, y_test, method_name):
    print('Start XGBoost predicting...')

    xgb_ovo = OneVsOneClassifier(XGBClassifier())
    xgb_ovo.fit(X_train, y_train.values.ravel())

    print('Accuracy score of xgb_ovo: ' +
          '%.3f' % xgb_ovo.score(X_test, y_test))

    xgb_ovr = OneVsRestClassifier(XGBClassifier())
    xgb_ovr.fit(X_train, y_train.values.ravel())

    print('Accuracy score of xgb_ovr: ' +
          '%.3f' % xgb_ovr.score(X_test, y_test))

    plot.plot_conf_matrix(X_test, y_test, xgb_ovo, method_name + '_ovo')
    plot.plot_conf_matrix(X_test, y_test, xgb_ovr, method_name + '_ovr')
    plot.plot_roc(X_train, X_test, y_train, y_test, xgb_ovr,
                  method_name + '_ovr')
def OneVsOne_LinearSVC(X_train, y_train, X_test, y_test, PCA):
    C = 1
    start = timeit.default_timer()
    model1 = OneVsOneClassifier(LinearSVC(C=C)).fit(X_train, y_train)
    stop = timeit.default_timer()

    # lin_svc = svm_model_linear_ovr.predict(X_test)
    start2 = timeit.default_timer()
    accuracy1 = model1.score(X_test, y_test)
    stop2 = timeit.default_timer()
    print('One VS One SVM accuracy :> Kernel|Linear:' + str(accuracy1 * 100),
          " Time Trainig : " + str(stop - start),
          " Time Testing : " + str(stop2 - start2))

    filename = 'OneVsOne_LinearSVC.pkl'
    if PCA == 0:
        pickle.dump(model1, open("models/" + filename, 'wb'))
    else:
        pickle.dump(model1, open("modelsPCA/" + filename, 'wb'))
def OneVsOne_ploy(X_train, y_train, X_test, y_test, PCA):
    C = 1
    start = timeit.default_timer()
    model = OneVsOneClassifier(SVC(kernel='poly', degree=2,
                                   C=C)).fit(X_train, y_train)
    stop = timeit.default_timer()

    # poly_svc = svm_model_linear_ovr.predict(X_test)
    start2 = timeit.default_timer()
    accuracy = model.score(X_test, y_test)
    stop2 = timeit.default_timer()

    print('One VS One SVM accuracy Kernel == Poly: ' + str(accuracy * 100),
          " Time Trainig : " + str(stop - start),
          " Time Testing : " + str(stop2 - start2))
    filename = 'OneVsOne_ploy.pkl'
    if PCA == 0:
        pickle.dump(model, open("models/" + filename, 'wb'))
    else:
        pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
def OneVsOne_rbf(X_train, y_train, X_test, y_test, PCA):
    C = .000001
    # svm_model_linear_ovr = OneVsRestClassifier(SVC(kernel='rbf', gamma=0.4, C=1).fit(X_train, y_train))
    start = timeit.default_timer()
    model = OneVsOneClassifier(SVC(kernel='rbf', gamma=0.4,
                                   C=C)).fit(X_train, y_train)
    stop = timeit.default_timer()

    start2 = timeit.default_timer()
    accuracy = model.score(X_test, y_test)
    stop2 = timeit.default_timer()
    print(
        'One VS One SVM accuracy Kernel == rbf Gaussian : ' +
        str(accuracy * 100), " Time Trainig : " + str(stop - start),
        " Time Testing : " + str(stop2 - start2))

    filename = 'OneVsOne_rbf.pkl'
    if PCA == 0:
        pickle.dump(model, open("models/" + filename, 'wb'))
    else:
        pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
def OneVsOnelinear(X_train, y_train, X_test, y_test, PCA):
    C = 1
    start = timeit.default_timer()
    model = OneVsOneClassifier(SVC(kernel='linear', C=C)).fit(X_train, y_train)
    stop = timeit.default_timer()

    start2 = timeit.default_timer()
    accuracy = model.score(X_test, y_test)
    stop2 = timeit.default_timer()
    print('One VS One SVM accuracy :> Kernel|Linear:' + str(accuracy * 100),
          " Time Trainig : " + str(stop - start),
          " Time Testing : " + str(stop2 - start2))

    # pred_i = model.predict(X_test)
    # error= np.mean(pred_i != y_test)
    # accuracy=np.mean(pred_i == y_test)
    # print(accuracy*100)
    filename = 'OneVsOnelinear.pkl'
    if PCA == 0:
        pickle.dump(model, open("models/" + filename, 'wb'))
    else:
        pickle.dump(model, open("modelsPCA/" + filename, 'wb'))
Пример #13
0
def multiclass_SVC(X, y):

    from sklearn.svm import LinearSVC

    from sklearn import cross_validation

    # first move: split data
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.35)

    # one-vs-rest implementation
    from sklearn.multiclass import OneVsRestClassifier

    ovr = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_train, y_train)

    # one-vs-all implementation
    from sklearn.multiclass import OneVsOneClassifier

    ovo = OneVsOneClassifier(LinearSVC(random_state=0)).fit(X_train, y_train)

    one_vs_rest = ovr.score(X_test, y_test)
    one_vs_one = ovo.score(X_test, y_test)

    return one_vs_rest, one_vs_one
Пример #14
0
# Output the hitrate and the confusion matrix for each model
print("SVM: ")
print(svm.score(X_train, y_train))
print(svm.score(X_test, y_test))
#print(confusion_matrix(pred, y_test)) 


svm2 = OneVsOneClassifier(LinearSVC(C=100.))
svm2.fit(X_train, y_train)

# Make an array of predictions on the test set
pred = svm2.predict(X_test)

# Output the hitrate and the confusion matrix for each model
print("LinearSVC: ")
print(svm2.score(X_train, y_train))
print(svm2.score(X_test, y_test))


from sklearn.neighbors import KNeighborsClassifier
neigh = (KNeighborsClassifier(n_neighbors=2))
neigh.fit(X_train, y_train) 
pred = neigh.predict(X_test)
print("knn: ")
print(neigh.score(X_train,y_train))
print(neigh.score(X_test,y_test))


from sklearn.ensemble import RandomForestClassifier
clf = (RandomForestClassifier(n_estimators=5,max_depth=None,min_samples_split=5, random_state=15))
clf = clf.fit(X_train, y_train)
ovr.fit(X_train[:,:2], y_train)
print("ovr.score:",ovr.score(X_test[:,:2],y_test))
#ovr.score: 0.6


################################################################################


#逻辑线性回归支持多分类- OVO
from sklearn.linear_model import LogisticRegression
log_reg_ovo = LogisticRegression(multi_class='multinomial', solver='newton-cg')
log_reg_ovo.fit(X_train[:,:2], y_train)
print("log_reg_ovo.score:",log_reg_ovo.score(X_test[:,:2],y_test))
#log_reg_ovo.score: 0.8

# 逻辑线性回归OVO多分类的决策边界
plot_decision_boundary(log_reg_ovo, axis=[4, 8, 1.5, 4.5])
plt.scatter(X[y==0, 0], X[y==0, 1], color='g', label='y==0')
plt.scatter(X[y==1, 0], X[y==1, 1], color='b', label='y==1')
plt.scatter(X[y==2, 0], X[y==2, 1], color='r', label='y==2')
plt.legend()
plt.show()

#OVO类,可以将任意二分类转换为多分类
from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
ovo = OneVsOneClassifier(lr)
ovo.fit(X_train[:,:2], y_train)
print("ovo.score:",ovo.score(X_test[:,:2],y_test))
#ovo.score: 0.6333333333333333
Пример #16
0
def main():
    # load data
    # training data
    data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'usps',
                                    'zip.train'),
                       header=None,
                       delimiter=' ').iloc[:, :-1]
    y_train = data.pop(0).values
    X_train = data.values

    # test data
    data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'data', 'usps',
                                    'zip.test'),
                       header=None,
                       delimiter=' ')
    y_test = data.pop(0).values
    X_test = data.values

    pca = PCA(n_components=.95)
    pca.fit(X_train)

    X_train = pca.transform(X_train)
    X_test = pca.transform(X_test)

    svm_errs = []
    with tqdm(desc="Problem 1", total=len(C_VALS)) as pbar:
        for C in C_VALS:
            svm = SVC(C=C, kernel='linear', decision_function_shape='ovo')
            svm.fit(X_train, y_train)
            pbar.update(1)

            svm_errs.append(1 - svm.score(X_test, y_test))

    lr = OVO(LR(solver='lbfgs', max_iter=5000))
    lr.fit(X_train, y_train)
    lr_score = lr.score(X_test, y_test)
    err_plot([svm_errs], ["SVM"],
             lr=1. - lr_score,
             title="One vs. One Linear SVM",
             out='hw7/ovo_linear_svm.pdf')

    ovo_svm_errs = []
    with tqdm(desc="Problem 2", total=len(C_VALS)) as pbar:
        for C in C_VALS:
            svm = OVO(SVC(C=C, kernel='poly', degree=3, gamma='auto'))
            svm.fit(X_train, y_train)
            pbar.update(1)

            ovo_svm_errs.append(1 - svm.score(X_test, y_test))

    err_plot([ovo_svm_errs], ["OvO SVM"],
             lr=1. - lr_score,
             title="One vs. One Cubic SVM",
             out='hw7/ovo_cubic_svm.pdf')

    ovr_svm_errs = []
    with tqdm(desc="Problem 3", total=len(C_VALS)) as pbar:
        for C in C_VALS:
            svm = OVR(SVC(C=C, kernel='poly', degree=3, gamma='auto'))
            svm.fit(X_train, y_train)
            pbar.update(1)

            ovr_svm_errs.append(1 - svm.score(X_test, y_test))

    err_plot([ovo_svm_errs, ovr_svm_errs], ["OvO SVM", "OvR SVM"],
             lr=1. - lr_score,
             title="One vs. Rest Cubic SVM/OvO Cubic",
             out='hw7/ovr_cubic_svm.pdf')

    n = 5
    # ensuring that we have at least n neighbors for all classes in the
    # sample
    while True:
        index = np.random.choice(X_train.shape[0], 100, replace=False)

        X_sample = X_train[index]
        y_sample = y_train[index]

        # can use a list comprehension to check
        if all([
                len(X_sample[y_sample == y_i]) >= n
                for y_i in np.unique(y_sample)
        ]):
            break

    dists = []
    for X_i, y_i in zip(X_sample, y_sample):
        X_cls = X_sample[y_sample == y_i]
        nbrs = NearestNeighbors(n_neighbors=n)
        nbrs.fit(X_cls)
        try:
            distances, _ = nbrs.kneighbors(X_i.reshape(1, -1))
        except ValueError as err:
            raise err
        # nee to use reshape b/c single sample
        dists.append(distances[-1])

    global SIGMA
    SIGMA = np.mean(dists)

    ovo_gauss_svm_errs = []
    with tqdm(desc="Problem 4 (SVM)", total=len(C_VALS),
              file=sys.stdout) as pbar:
        for C in C_VALS:
            svm = OVO(SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA**2)))
            #            svm = SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA ** 2),
            #                      decision_function_shape='ovo')
            svm.fit(X_train, y_train)
            score = svm.score(X_test, y_test)
            pbar.update(1)

            ovo_gauss_svm_errs.append(1 - score)

    knn_errs = []
    with tqdm(desc="Problem 4 (kNN)",
              total=len(np.arange(3, 11)),
              file=sys.stdout) as pbar:
        for k in np.arange(3, 11):
            knn = KNeighborsClassifier(n_neighbors=k, weights=gaussian)
            knn.fit(X_train, y_train)
            pbar.update(1)

            knn_errs.append((k, 1 - knn.score(X_test, y_test)))

    err_plot([ovo_gauss_svm_errs], ["OvO SVM"],
             knn=knn_errs,
             title="One vs. One Gaussian SVM with kNN",
             out='hw7/ovo_gaussian_svm_knn.pdf')

    ovr_gauss_svm_errs = []
    with tqdm(desc="Problem 5", total=len(C_VALS), file=sys.stdout) as pbar:
        for C in C_VALS:
            svm = OVR(SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA**2)))
            #            svm = SVC(C=C, kernel='rbf', gamma=1. / (2. * SIGMA ** 2),
            #                      decision_function_shape='ovr')
            svm.fit(X_train, y_train)
            score = svm.score(X_test, y_test)
            pbar.update(1)

            ovr_gauss_svm_errs.append(1 - score)

    err_plot([ovr_gauss_svm_errs], ["OvR SVM"],
             knn=knn_errs,
             title="One vs. Rest Gaussian SVM with kNN",
             out='hw7/ovr_gaussian_svm_knn.pdf')

    err_plot([
        svm_errs, ovo_svm_errs, ovr_svm_errs, ovo_gauss_svm_errs,
        ovr_gauss_svm_errs
    ], [
        "Linear SVM", "OvO Cubic SVM", "OvR Cubic SVM", "OvO Gaussian SVM",
        "OvR Gaussian SVM"
    ],
             lr=1. - lr_score,
             knn=knn_errs,
             title="Multiclass SVM Kernels",
             out='hw7/all_svm_knn.pdf')

    min_idx = np.argmin(svm_errs)
    min_lin_err = svm_errs[min_idx]
    min_lin_c = np.log2(C_VALS[min_idx])
    print("Min Linear SVM Error = {0:.4f}".format(min_lin_err))
    print("Min Linear SVM log2(C) = {0}".format(min_lin_c))
    print("LR Error = {0:.4f}".format(1. - lr_score))

    min_idx = np.argmin(ovo_svm_errs)
    min_lin_err = ovo_svm_errs[min_idx]
    min_lin_c = np.log2(C_VALS[min_idx])
    print("Min OvO Cubic SVM Error = {0:.4f}".format(min_lin_err))
    print("Min OvO Cubic SVM log2(C) = {0}".format(min_lin_c))

    min_idx = np.argmin(ovr_svm_errs)
    min_lin_err = ovr_svm_errs[min_idx]
    min_lin_c = np.log2(C_VALS[min_idx])
    print("Min OvR Cubic SVM Error = {0:.4f}".format(min_lin_err))
    print("Min OvR Cubic SVM log2(C) = {0}".format(min_lin_c))

    min_idx = np.argmin(knn_errs)
    min_lin_k, min_lin_err = knn_errs[min_idx]
    print("Min kNN Error = {0:.4f}".format(min_lin_err))
    print("Min kNN log2(C) = {0}".format(min_lin_k))

    min_idx = np.argmin(ovo_gauss_svm_errs)
    min_lin_err = ovo_gauss_svm_errs[min_idx]
    min_lin_c = np.log2(C_VALS[min_idx])
    print("Min OvO Gaussian SVM Error = {0:.4f}".format(min_lin_err))
    print("Min OvO Gaussian SVM log2(C) = {0}".format(min_lin_c))

    min_idx = np.argmin(ovr_gauss_svm_errs)
    min_lin_err = ovr_gauss_svm_errs[min_idx]
    min_lin_c = np.log2(C_VALS[min_idx])
    print("Min OvR Gaussian SVM Error = {0:.4f}".format(min_lin_err))
    print("Min OvR Gaussian SVM log2(C) = {0}".format(min_lin_c))

    print("sigma = {0:.4f}".format(SIGMA))
Пример #17
0
# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

log.write(str(len(X_train)) + "\n" + str(len(X_test)) + "\n")

svm_model_ovr_rbf = OneVsRestClassifier(SVC(kernel='rbf',
                                            C=1)).fit(X_train, y_train)
svm_model_ovr_rbf_predictions = svm_model_ovr_rbf.predict(X_test)
# model accuracy for X_test
accuracy = svm_model_ovr_rbf.score(X_test, y_test)

# creating a confusion matrix
# conf_matrix = confusion_matrix(y_test, svm_predictions)

log.write("Accuracy svm_model_ovr_rbf: " + str(accuracy) + "\n")
# log.write(str(conf_matrix))

svm_model_ovo_linear = OneVsOneClassifier(SVC(kernel='linear',
                                              C=1)).fit(X_train, y_train)
svm_model_linear_predictions = svm_model_ovo_linear.predict(X_test)
accuracy = svm_model_ovo_linear.score(X_test, y_test)
log.write("Accuracy svm_model_ovo_linear: " + str(accuracy) + "\n")

# creating a confusion matrix
# conf_matrix = confusion_matrix(y_test, svm_predictions)

# log_reg_model_ovr = OneVsRestClassifier(linear_model.SGDClassifier(max_iter = 1000, tol = 1e-3)).fit(X_train, y_train)
# accuracy = log_reg_model_ovr.score(X_test, y_test)
# log.write("Accuracy log_reg_model_ovr: " + str(accuracy) + "\n")
Пример #18
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 25 12:45:17 2019

@author: mfatemeh
"""

import numpy as np

from sklearn.datasets import load_digits
dataset = load_digits()
X = dataset.data
y = dataset.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

from sklearn.linear_model import Perceptron
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier

ovsr = OneVsRestClassifier(Perceptron())
ovso = OneVsOneClassifier(Perceptron())

ovsr.fit(X_train, y_train)
ovsr.score(X_test, y_test)

ovso.fit(X_train, y_train)
ovso.score(X_test, y_test)
tr_feat = tr[:,1:]
ts_feat = ts[:,1:]
tr_label = tr[:,0]
ts_label = ts[:,0]

# use sklearn C-Support Vector Classification
## == one-vs-one == ##
# The multiclass support is handled in a one-vs-one scheme
# train 
ovo_clf = OneVsOneClassifier(LinearSVC())
ovo_clf.fit(tr_feat, tr_label)

# predict
ovo_pred = ovo_clf.predict(ts_feat)
ovo_err = 1- ovo_clf.score(ts_feat, ts_label)

# confusion matrix
#
#array([[159,   7],
#       [  5, 161]])
ovo_cmat = metrics.confusion_matrix(ts_label, ovo_pred) 
pred_total = np.sum(ovo_cmat,axis = 1)
ovo_mis = 1- np.diag(ovo_cmat).astype(float) / pred_total
print("one vs. one svm - classification err: %s \n"%(ovo_err))
print("confusion matrix: \n %s"%(ovo_cmat))
print("class misclassification rate : \n %s"%(ovo_mis))
## == one-vs-rest == ##
# The multiclass support is handled in a one-vs-rest scheme
# train 
ovr_clf = OneVsRestClassifier(LinearSVC())
Пример #20
0
boundary(pipe2, [4, 8.5, 1.5, 4.5])
plt.scatter(X[y == 0, 0], X[y == 0, 1])
plt.scatter(X[y == 1, 0], X[y == 1, 1])
plt.scatter(X[y == 2, 0], X[y == 2, 1])

# 任意模型多分类器--------------------------------------------------------------------------------------------------------
X = iris.data
y = iris.target
# OneVsRestClassifier
ovr = OneVsRestClassifier(log_reg)
ovr.fit(X_train, y_train)
print("ovr =", ovr.score(X_test, y_test))
# OneVsOneClassifier
ovo = OneVsOneClassifier(log_reg)
ovo.fit(X_train, y_train)
print("ovo =", ovo.score(X_test, y_test))

plt.show()
'''
参数含义:
1.penalty:字符串,指定了正则化策略。默认为"l2"
    (1)如果为"l2",则优化的目标函数为:0.5*||w||^2_2+C*L(w),C>0,
        L(w)为极大似然函数。
    (2)如果为"l1",则优化的目标函数为||w||_1+C*L(w),C>0,
        L(w)为极大似然函数。
2.dual:布尔值。默认为False。如果等于True,则求解其对偶形式。
  只有在penalty="l2"并且solver="liblinear"时才有对偶形式。如果为False,则求解原始形式。
  当n_samples > n_features,偏向于dual=False。
3.tol:阈值。判断迭代是否收敛或者是否满足精度的要求。
4.C:float,默认为1.0.指定了正则化项系数的倒数。必须是一个正的浮点数。他的值越小,正则化项就越大。
5.fit_intercept:bool值。默认为True。如果为False,就不会计算b值。
# TEST APPRENTISSAGE MULTICLASSE
################################################################################

# ONE VS ONE

#Ici j'utilise les données multiclasses
print("ovo:\n")
ovo = OneVsOneClassifier(SVC(C=10, gamma=0.01, kernel="rbf"))
ovo.fit(datax, datay)
p = ovo.predict(datatestx)
print(classification_report(datatesty, p))

accuracytrain = []
accuracytest = []
for i in range(0, 10):
    accuracytrain.append(ovo.score(datax, datay) * 100)
    accuracytest.append(ovo.score(datatestx, datatesty) * 100)
print("Erreur moyenne  : train %f, test %f" %
      (np.mean(accuracytrain), np.mean(accuracytest)))
s
#ONE VS REST
print("ovr:\n")
ovr = OneVsRestClassifier(SVC(C=10, gamma=0.01, kernel="rbf"))
ovr.fit(datax, datay)
p = ovr.predict(datatestx)
print(classification_report(datatesty, p))

accuracytrain = []
accuracytest = []
for i in range(0, 10):
    accuracytrain.append(ovr.score(datax, datay) * 100)
Пример #22
0
        svm_train = OneVsOneClassifier(svm_train_original)
        print("svm(One vs One):")
    else:
        svm_train = OneVsRestClassifier(svm_train_original)
        print("svm(One vs Rest):")
    #LSI
    svm_train.fit(train_LSI_array, train_data.target)
    test_result = svm_train.predict(test_LSI_array)
    LSI_precision = precision_score(test_data.target,
                                    test_result,
                                    average='weighted')
    LSI_recall = recall_score(test_data.target,
                              test_result,
                              average='weighted')
    LSI_confusionMatrix = confusion_matrix(test_data.target, test_result)
    LSI_accuracy = svm_train.score(test_LSI_array, test_data.target)

    #NMF
    svm_train.fit(train_NMF_array, train_data.target)
    test_result = svm_train.predict(test_NMF_array)
    NMF_precision = precision_score(test_data.target,
                                    test_result,
                                    average='weighted')
    NMF_recall = recall_score(test_data.target,
                              test_result,
                              average='weighted')
    NMF_confusionMatrix = confusion_matrix(test_data.target, test_result)
    NMF_accuracy = svm_train.score(test_NMF_array, test_data.target)

    print("accuracy with LSI is ", LSI_accuracy)
    print("precision with LSI is ", LSI_precision)
Пример #23
0
Y_laban = pickle.load( open( "Y_Laban", "r" ) )
X, y = np.array(X), np.array(y)
baseClf = AdaBoostClassifier()
clf = OneVsOneClassifier(baseClf)
from sklearn import cross_validation
n=1
rs = cross_validation.ShuffleSplit(len(y), n_iter=n, test_size=.1, random_state=0)
res = []
resMixed = []
resLaban = []
for train_index, test_index in rs:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    Y_laban_train, Y_laban_test = Y_laban[train_index], Y_laban[test_index] 
    clf.fit(X_train, y_train)#, sample_weight)
    r = clf.score(X_test, y_test)
    res.append(r)
    
    labanClf, selectedIndices = labanUtil.getMultiTaskclassifier(X_train, Y_laban_train)
    X_train_transformed = transform(X_train, selectedIndices)
    #X_train_laban = []
    X_train_mixed = []
    for x in X_train_transformed:
        labans = labanClf.predict(x)
        newVec = np.concatenate((x, labans))
        X_train_mixed.append(newVec)
        #X_train_laban.append(labans)
    #X_train_laban=np.array(X_train_laban)
    
    X_test_transformed = transform(X_test, selectedIndices)
    X_test_laban = []
Пример #24
0
recall_score(y_train_5, y_train_pred_forest)

#  10 классов----------------------------------------------------------------------
sgd_clf.fit(X_train, y_train)
sgd_clf.predict([X_train[25001]])
sgd_clf.score(X_train, y_train)

some_digit_scores = sgd_clf.decision_function([some_digit])
np.argmax(some_digit_scores)

from sklearn.multiclass import OneVsOneClassifier
ovo_clf = OneVsOneClassifier(SGDClassifier(random_state=42))
ovo_clf.fit(X_train, y_train)
ovo_clf.predict([some_digit])
ovo_clf.score(X_train, y_train)

forest_clf.fit(X_train, y_train)
forest_clf.predict([some_digit])
forest_clf.predict_proba([some_digit])
forest_clf.score(X_train, y_train)

sgd_clf.score(X_test, y_test)
ovo_clf.score(X_test, y_test)
forest_clf.score(X_test, y_test)

cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring='accuracy')
cross_val_score(ovo_clf, X_train, y_train, cv=3, scoring='accuracy')
cross_val_score(forest_clf, X_train, y_train, cv=3, scoring='accuracy')

from sklearn.preprocessing import StandardScaler
Пример #25
0
print("################ Classificação OVO ###########################")
print("### Menu: ###")
print("1- All numbers;")
print("2- Choose numbers;")
op = input("Pick an option:  ")

while (finish == False):

    if (op == "1"):

        start_time = time.time()
        OVO = OneVsOneClassifier(LogisticRegressionCV())
        OVO.fit(x_train, y_train)

        predictionsOVO = OVO.predict(x_test)
        scoreOVO = OVO.score(x_test, y_test)
        print(scoreOVO)

        cmOVO = metrics.confusion_matrix(y_test, predictionsOVO)
        plt.figure(figsize=(9, 9))
        sns.heatmap(cmOVO,
                    annot=True,
                    fmt=".3f",
                    linewidths=.5,
                    square=True,
                    cmap='Blues_r')
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        all_sample_title = 'Accuracy Score: {0}'.format(scoreOVO)
        plt.title(all_sample_title, size=15)
Пример #26
0
tfidf_test_reduced = svd.transform(tfidf_test)
svm_test_data = tfidf_test_reduced
svm_test_tag = test.target
#for i in test.target:
#    if(i < 4):  
#        svm_test_tag.append(-1)
#    else:
#        svm_test_tag.append(1)
        
svc = SVC(kernel='linear',C = 100)
svc_ovoc=OVOC(svc)
svc_ovoc.fit(svm_train_data, svm_train_tag)
svc_ovoc_predict=svc_ovoc.predict(svm_test_data)
#precision, recall, thresholds = precision_recall_curve(svm_test_tag, svc_ovoc_predict)
#BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True)
score=svc_ovoc.score(svm_test_data,svm_test_tag)
precision = precision_score(svm_test_tag, svc_ovoc_predict, average = 'weighted')
recall = recall_score(svm_test_tag, svc_ovoc_predict, average = 'weighted')
print "1 VS 1 SVC"
print "confusion matrix:","\n",confusion_matrix(svm_test_tag, svc_ovoc_predict)
print "score=",score
print "precision=", precision
print "recall=", recall
print '\n'

svc = SVC(kernel='rbf',C = 100)
svc_ovrc=OVRC(svc)
svc_ovrc.fit(svm_train_data, svm_train_tag)
svc_ovrc_predict=svc_ovrc.predict(svm_test_data)
#precision, recall, thresholds = precision_recall_curve(svm_test_tag, svc_ovoc_predict)
#BernoulliNB(alpha=1.0, binarize=0.5, class_prior=None, fit_prior=True)
Пример #27
0
#y = np.array([0, 0, 1, 1, 2, 2, 3, 3])
print('start at %s' % startTime)
print('start training...')
clf = OneVsOneClassifier(LinearSVC(random_state = 0))
#clf = OneVsRestClassifier(LinearSVC(random_state = 0))
clf = clf.fit(X_train, y_train)
print(clf.get_params())
#joblib.dump(clf, modelPath)   # save the trained model

#lists =[[5, -1], [-2, -6], [2,1], [-2, 5]] 
#test = np.array(lists)
#test_label = np.array([3, 2, 0, 1])
print("start predicting...")

#clf = joblib.load(modelPath)   # load the model
score = clf.score(X_test, y_test)
print('accuracy is {0}'.format(score))
#==============================================================================
# count = 0
# predictions = clf.predict(X_test)
# lens = len(predictions)
# for i in xrange(lens):
#     if predictions[i] == y_test[i]:
#         count +=1
# print('accuracy is %f' % (float(count) / lens ))
#==============================================================================

endTime = time.ctime()
end = time.time()
print("start at %s, end at %s" % (startTime, endTime))
print("consume ", (end - start))
print(log_reg2.score(X_test, y_test))
plot_decision_boundary(log_reg2, axis=[4, 8.5, 1.5, 4.5])
plt.scatter(X[y == 0, 0], X[y == 0, 1])
plt.scatter(X[y == 1, 0], X[y == 1, 1])
plt.scatter(X[y == 2, 0], X[y == 2, 1])
plt.show()

# 使用所有数据
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

# OvR
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
print(log_reg.score(X_test, y_test))

# OvO
log_reg2 = LogisticRegression(multi_class="multinomial", solver="newton-cg")
log_reg2.fit(X_train, y_train)
print(log_reg2.score(X_test, y_test))

# sklearn 关于 OvR 和 OvO 的其他支持
ovr = OneVsRestClassifier(log_reg)
ovr.fit(X_train, y_train)
print(ovr.score(X_test, y_test))

ovo = OneVsOneClassifier(log_reg)
ovo.fit(X_train, y_train)
print(ovo.score(X_test, y_test))
Пример #29
0
all_sample_title = 'Accuracy Score: {0}'.format(scoreOVA)
plt.title(all_sample_title, size = 15);
'''

finish_time_OVA = time.time() - start_time_OVA

###################################################################################################
################################# OVO #############################################################

start_time_OVO = time.time()

OVO = OneVsOneClassifier(LogisticRegressionCV())
OVO.fit(x_train_thres, y_train_thres)

predictionsOVO = OVO.predict(x_test_thres)
scoreOVO = OVO.score(x_test_thres, y_test_thres)
'''
cmOVO = metrics.confusion_matrix(y_test_thres, predictionsOVO)
plt.figure(figsize=(9,9))
sns.heatmap(cmOVO, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(scoreOVO)
plt.title(all_sample_title, size = 15);
'''

finish_time_OVO = time.time() - start_time_OVO

###################################################################################################
################################# DICOTOMIA #######################################################
Пример #30
0
print(">>>> Loading finished")

feature_vec = np.zeros((len(data), kmeans.n_clusters))
for i in range(len(data)):
    mydata = data[i]
    # mydata = pca.transform(mydata)
    feature_seq = kmeans.predict(mydata)
    for j in feature_seq:
        feature_vec[i][feature_seq[j]] += 1

feature_vec = normalize(feature_vec)

train_x, test_x, train_y, test_y = \
    train_test_split(feature_vec, all_y, test_size = 1-train_ratio)

print(feature_vec.shape)
print(">>>> Data prepared")

# for alpha_ in [0.1, 0.01, 0.02, 0.03, 0.05, 0.008, 0.009, 0.006, 0.005]:
for alpha_ in [0.0001]:
    clf = OneVsOneClassifier(linear_model.SGDClassifier(alpha = alpha_, n_iter=150000, shuffle=True), n_jobs=4)
    clf.fit(train_x, train_y)
    print("       alpha", alpha_)
    print("       train score", clf.score(train_x, train_y))
    print("       test score",  clf.score(test_x, test_y))
    print(clf)

pred_y = clf.predict(test_x)
print(test_x[:2,:5])
print(pred_y)
Пример #31
0
for slip in slips:
    train = []
    label = []
    for category in label_dict.keys():
        X, Y = allSamples(path, category, label_dict, order, window, slip)
        if train == []:
            train = X
            label = Y
        else:
            train = np.concatenate((train, X), 0)
            label = np.concatenate((label, Y), 0)
    #train = dim_reduction_PCA(train,0.999)
    X_train, X_test, Y_train, Y_test = train_test_split(train, label, test_size=0.4, random_state=42)
    C = 1.0
    multiclassifier = OneVsOneClassifier(svm.SVC(kernel="rbf",gamma=0.7,C=C)).fit(X_train, Y_train)
    score.append(multiclassifier.score(X_test, Y_test))
# svc = svm.SVC(kernel='linear', C=C).fit(X_train, Y_train)
# rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X_train, Y_train)
# poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X_train, Y_train)

# print score
# plt.figure("Score-order")
# plt.plot(slips,score)
# plt.show()

#Use validate data to test the model.
path = "./"
label_dict = {"test":0}
X_test,Y_test = allSamples(path,"test",label_dict,order,window,slip)
#print X_test,Y_test
print multiclassifier.predict(X_test)
Пример #32
0
def main(params_file, output_dir, output_code, datasets, norm_type,
         labels_file, spca_file, **kwargs):

    # Load data from each dataset
    data_objects = []
    specimen_ids_list = []
    imp = SimpleImputer(
        missing_values=0,
        strategy='mean',
        copy=False,
    )

    for ds in datasets:
        if len(ds["limit_to_cortical_layers"]) == 0:
            limit_to_cortical_layers = None
        else:
            limit_to_cortical_layers = ds["limit_to_cortical_layers"]

        data_for_spca, specimen_ids = ld.load_h5_data(
            h5_fv_file=ds["fv_h5_file"],
            metadata_file=ds["metadata_file"],
            dendrite_type=ds["dendrite_type"],
            need_structure=not ds["allow_missing_structure"],
            include_dend_type_null=ds["allow_missing_dendrite"],
            limit_to_cortical_layers=limit_to_cortical_layers,
            id_file=ds["id_file"],
            params_file=params_file)
        for l, m in data_for_spca.items():
            if type(m) == np.ndarray:

                nu_m = np.nan_to_num(m)
                p = np.nonzero(nu_m[:, :])[1]
                p = max(p)
                nu_m = nu_m[:, :p]
                print(l)
                print(p)
                nu_m = imp.fit_transform(nu_m)

                data_for_spca[l] = normalize_ds(nu_m, norm_type)

        data_objects.append(data_for_spca)
        specimen_ids_list.append(specimen_ids)
    specimen_ids = np.hstack(specimen_ids_list)

    data_for_spca = {}
    for i, do in enumerate(data_objects):
        for k in do:
            if k not in data_for_spca:
                data_for_spca[k] = do[k]
            else:
                data_for_spca[k], do[k] = equal_ar_size(
                    data_for_spca[k], do[k], k, i)
                data_for_spca[k] = np.vstack([data_for_spca[k], do[k]])

    ##Outlier Elim?
    #specimen_ids, data_for_spca = outlierElim(specimen_ids, data_for_spca)
    df_s = pd.read_csv(spca_file, index_col=0)

    first_key = list(data_for_spca.keys())[0]
    if len(specimen_ids) != data_for_spca[first_key].shape[0]:
        logging.error(
            "Mismatch of specimen id dimension ({:d}) and data dimension ({:d})"
            .format(len(specimen_ids), data_for_spca[first_key].shape[0]))
    labels = pd.read_csv(labels_file, index_col=0)
    print(labels)
    print(labels.values)
    uni_labels = np.unique(labels.values)
    ids_list = labels.index.values

    if labels.shape[0] == ids_list.shape[0]:
        print("Same Ids loaded... Proceeding")
        logging.info("Proceeding with %d cells", len(specimen_ids))
        for p in data_for_spca:
            labels_means = pd.DataFrame()
            arr_data = data_for_spca[p]
            for x in uni_labels:
                indx = np.where(labels['0'] == x)[0]
                row, col = arr_data[indx].shape
                n_co = np.full(col, row)
                mean = pd.Series(data=np.mean(arr_data[indx], axis=0),
                                 name=('Cluster ' + str(x) + ' mean'))
                std = pd.Series(data=np.std(arr_data[indx], axis=0),
                                name=('Cluster ' + str(x) + ' std'))
                n = pd.Series(data=n_co, name=('Cluster ' + str(x) + ' n'))
                labels_means = labels_means.append(mean, ignore_index=True)
                labels_means = labels_means.append(std, ignore_index=True)
                labels_means = labels_means.append(n, ignore_index=True)
            labels_means.to_csv(output_fld + p + '_cluster_mean.csv')

        train_df, test_df, labels_2, _ = train_test_split(df_s, labels)

        rf = RandomForestClassifier(n_estimators=500,
                                    oob_score=True,
                                    random_state=0)
        #per = multiclass.OneVsOneClassifier(RandomForestClassifier(n_estimators=500, oob_score=True,
        # random_state=0), n_jobs=-1).fit(train_df.values, labels.to_numpy().flatten())
        rf.fit(train_df.values, labels_2.to_numpy().flatten())

        logging.info("OOB score: {:f}".format(rf.oob_score_))
        pred_labels = rf.predict(test_df.values)
        feat_import = rf.feature_importances_
        print(rf.oob_score_)
        logging.debug("Saving results")
        #pd.DataFrame(pred_labels, index=test_df.index.values).to_csv('rf_predictions.csv')
        pd.DataFrame(feat_import).to_csv('rf_feat_importance.csv')
        ### Now compute for labeled data
        train_ind = np.where(labels['0'] > -1)[0]
        labeled = labels.iloc[train_ind]
        labeled_df_s = df_s.iloc[train_ind]
        train_df, test_df, labels_2, labels_3 = train_test_split(
            labeled_df_s, labeled)

        clf1 = LogisticRegression(random_state=1, max_iter=1000)
        clf2 = RandomForestClassifier(n_estimators=500, random_state=1)
        clf3 = GaussianNB()
        eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                            ('gnb', clf3)],
                                voting='soft',
                                n_jobs=-1)
        eclf.fit(train_df, np.ravel(labels_2.values))
        fit_score = eclf.score(test_df, np.ravel(labels_3.values))
        print(fit_score)
        params = {
            'lr__C': np.linspace(1.0, 1000.0, 10),
            'rf__n_estimators': np.linspace(20, 1000, 10, dtype=np.int64)
        }

        grid = GridSearchCV(estimator=eclf,
                            param_grid=params,
                            cv=5,
                            n_jobs=-1,
                            verbose=1)
        grid.fit(train_df, np.ravel(labels_2.values))
        fit_score = grid.score(test_df, np.ravel(labels_3.values))
        print("grid search params")
        print(fit_score)
        grid_CV = grid.best_estimator_
        full_acc = np.arange(15, dtype=np.float64)
        PARAMS = grid.best_estimator_
        for i, a in enumerate(full_acc):
            train_df, test_df, labels_2, labels_3 = train_test_split(
                labeled_df_s, labeled, test_size=0.6, train_size=0.28)
            clf1 = LogisticRegression(random_state=1, max_iter=1000)
            clf2 = RandomForestClassifier(n_estimators=500, random_state=1)
            clf3 = GaussianNB()
            eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                                ('gnb', clf3)],
                                    voting='soft',
                                    n_jobs=-1)
            eclf.fit(train_df, np.ravel(labels_2.values))
            full_acc[i] = eclf.score(test_df, np.ravel(labels_3.values))

        np.savetxt('full_acc.csv', full_acc, delimiter=",", fmt='%12.5f')
        _, _, pvalue = permutation_test_score(grid_CV,
                                              train_df,
                                              np.ravel(labels_2.values),
                                              n_jobs=-1)
        print("pvalue: " + str(pvalue))
        fclf = OneVsOneClassifier(grid, n_jobs=-1)
        fclf.fit(train_df, np.ravel(labels_2.values))
        fit_score = fclf.score(test_df, np.ravel(labels_3.values))
        y_pred = fclf.predict(test_df)
        print(fit_score)
        print(metrics.classification_report(y_pred, np.ravel(labels_3.values)))
        pred_labels = fclf.predict(df_s.values)
        pd.DataFrame(pred_labels,
                     index=df_s.index.values).to_csv('full_predictions.csv')

    feat_import_by_label = np.hstack((0, np.full(feat_import.shape[0],
                                                 np.nan)))
    for i in permutations(uni_labels, 2):
        indx_1 = np.where((labels['0'] == i[0]))[0]
        indx_2 = np.where((labels['0'] == i[1]))[0]
        indx = np.hstack((indx_1, indx_2))
        if indx.shape[0] >= 100:
            print(indx.shape[0])
            df_s_temp = df_s.iloc[indx]
            labels_s_temp = labels.iloc[indx]
            train_df, test_df, labels_2, _ = train_test_split(
                df_s_temp, labels_s_temp)

            rf = RandomForestClassifier(n_estimators=500,
                                        oob_score=True,
                                        random_state=0)
            #per = multiclass.OneVsOneClassifier(RandomForestClassifier(n_estimators=500, oob_score=True,
            # random_state=0), n_jobs=-1).fit(train_df.values, labels.to_numpy().flatten())
            rf.fit(train_df.values, labels_2.to_numpy().flatten())
            logging.info("OOB score: {:f}".format(rf.oob_score_))
            pred_labels = rf.predict(test_df.values)
            feat_import = rf.feature_importances_
            print(str(i) + ' ' + str(rf.oob_score_))
            logging.debug("Saving results")
            feat_import_by_label = np.vstack(
                (feat_import_by_label,
                 np.hstack((str(i), np.ravel(feat_import)))))
            del rf
    pd.DataFrame(feat_import_by_label).to_csv(output_fld +
                                              'label_rf_feat_importance.csv')

    logging.info("Done.")
train_data_resampled, train_label_resampled = smote.fit_sample(
    rus_data, rus_label)

# 学習する
clf = OneVsOneClassifier(
    RandomForestClassifier(random_state=0, n_estimators=500))
clf.fit(train_data_resampled, train_label_resampled)

# 評価する
predict = clf.predict(test_data)
rate_sum = 0

for i in range(len(test_label)):
    t = int(test_label.iloc[i])
    p = int(predict[i])
    rate_sum += int(min(t, p) / max(t, p) * 100)
print(rate_sum / len(test_label))

print('Train score: {:.4f}'.format(clf.score(train_data, train_label)))
print('Test score: {:.4f}'.format(clf.score(test_data, test_label)))
print('Confusion matrix:\n{}'.format(
    confusion_matrix(test_label, clf.predict(test_data))))
print('Accuracy score: {:.4f}'.format(
    accuracy_score(test_label, clf.predict(test_data))))
print('Precision score: {}'.format(
    precision_score(test_label, clf.predict(test_data), average=None)))
print('Recall score: {}'.format(
    recall_score(test_label, clf.predict(test_data), average=None)))
print('f1 score: {}'.format(
    f1_score(test_label, clf.predict(test_data), average=None)))
# 学習する
clf = OneVsOneClassifier(RandomForestClassifier(n_estimators=500))
clf.fit(train_data_resampled, train_label_resampled)

# 評価する
predict = clf.predict(test_data)
rate_sum = 0

#for i in range(len(test_label)):
# t = int(test_label.iloc[i])
# p = int(predict[i])
# rate_sum += int(min(t, p) / max(t, p) * 100)
#print(rate_sum / len(test_label))

print('{:.4f}'.format(clf.score(train_data_resampled,
                                train_label_resampled)))  #Train score
print('{:.4f}'.format(clf.score(test_data, test_label)))  #Test score

confusion = confusion_matrix(test_label, clf.predict(test_data))
confusion0 = ' '.join(map(str, confusion[0]))
confusion1 = ' '.join(map(str, confusion[1]))
confusion2 = ' '.join(map(str, confusion[2]))
print('%s %s %s ' % (confusion0, confusion1, confusion2))
print('{:.4f}'.format(accuracy_score(test_label, clf.predict(test_data))))

precision = ' '.join(
    map(str,
        (precision_score(test_label, clf.predict(test_data), average=None))))
recall_score = ' '.join(
    map(str, (recall_score(test_label, clf.predict(test_data), average=None))))
f1_score = ' '.join(
Пример #35
0
# print("\n	,1versus rest logistic, ")
# print(" %f" % linOnevOne.score(X_train, y_train))
# print(",  %f" % linOnevOne.score(X_test, y_test))
# # for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)):
# # 	print(x, y[x], "vs ", linOnevOne.predict((X.loc[x]).values.reshape(1,-1)))

# sys.stdout.flush()

#add a second layer of 10 to get perfect
mlpMC = MLPClassifier(hidden_layer_sizes=(100, 100),
                      random_state=4,
                      max_iter=5000)
onevOne = OneVsOneClassifier(mlpMC)
onevOne.fit(X_train, y_train)
print("\n	,1versus1 MLP, ")
print(" %f" % onevOne.score(X_train, y_train))
print(",  %f" % onevOne.score(X_test, y_test))
# for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)):
# 	print(x, y[x], "vs ", onevOne.predict((X.loc[x]).values.reshape(1,-1)))

sys.stdout.flush()

# #class sklearn.svm.SVR(kernel=’rbf’, degree=3, gamma=’auto’, coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1)
# sv1 = SVC(kernel = 'linear')
# onevOne = OneVsOneClassifier(sv1)
# onevOne.fit(X_train, y_train)
# print("\n	,1versus1 SVM, ")
# print("%f" % onevOne.score(X_train, y_train))
# print(", %f" % onevOne.score(X_test, y_test))
# # for x in range(split + ((len(y)-split)//5), split + ((len(y)-split)//3)):
# # 	print(x, y[x], "vs ", sv1.predict((X.loc[x]).values.reshape(1,-1)))
Пример #36
0
#v.fit(doc_train + doc_test)
v.fit(doc_train)
doc_train = v.transform(doc_train)
doc_test = v.transform(doc_test)
v = None
print(time.process_time() - pt)


print("Training data shape ", doc_train.shape)
print("Training...", end=" ")
sys.stdout.flush()
pt = time.process_time()
#n_estimators = 27
m = OneVsOneClassifier(svm.LinearSVC(dual=False))
#m = OneVsOneClassifier(BaggingClassifier(svm.SVC(kernel="poly",degree=2), max_samples=1.0 / n_estimators, n_estimators=n_estimators, bootstrap=False, n_jobs=4, verbose=2))
#m = svm.SVC(kernel="poly",degree=2)
#m = MultinomialNB()
#scores = cross_validation.cross_val_score(m, doc_train, y_train, cv=5, n_jobs=5, verbose=1)
#print(scores)
#print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print(m.fit(doc_train, y_train))
print(time.process_time() - pt)


print("Testing... ")
pt = time.process_time()
print(m.score(doc_train, y_train)*100.0)
print(m.score(doc_test, y_test)*100.0)
print("Testing took {} seconds.".format(time.process_time() - pt))

Пример #37
0
}

names = [
    "Twin SVM", "Twin SVM with RBF Kernel",
    "Twin SVM RBF Kernel with fuzzy membership"
]
classifiers = [
    TwinSVMClassifier(**params1),
    TwinSVMClassifier(**params2),
    TwinSVMClassifier(**params3),
]

X, y = make_classification(n_samples=1000,
                           n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1,
                           n_classes=3)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
for name, clf in zip(names, classifiers):
    clf = OneVsOneClassifier(clf).fit(X_train,
                                      y_train)  # or OneVsRestClassifier
    score = clf.score(X_test, y_test)
    print(score)
Пример #38
0
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

##OvO和OvR方法1
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()  #默认OvR
log_reg.fit(X_train, y_train)
log_reg.score(X_test, y_test)

log_reg2 = LogisticRegression(multi_class='multinomial',
                              solver='newton-cg')  #调用OvO
log_reg2.fit(X_train, y_train)
log_reg2.score(X_test, y_test)

##OvO和OvR方法2
from sklearn.multiclass import OneVsRestClassifier
ovr = OneVsRestClassifier(log_reg)
ovr.fit(X_train, y_train)
ovr.score(X_test, y_test)

from sklearn.multiclass import OneVsOneClassifier
ovo = OneVsOneClassifier(log_reg)
ovo.fit(X_train, y_train)
ovo.score(X_test, y_test)