def clasificar_ECOC(X, y, df, trainInputs, trainOutputs, testInputs, testOutputs, graphname): print("\n[" + str(graphname) + "]") kernelRBF=1.0*RBF(1.0) clf=OutputCodeClassifier(estimator = DecisionTreeClassifier()) clf=clf.fit(trainInputs, trainOutputs) precisionTrain = clf.score(trainInputs, trainOutputs) precisionTest = clf.score(testInputs, testOutputs) print("\tCCR train = %.2f%% | CCR test = %.2f%%" % (precisionTrain*100, precisionTest*100)) prediccion_test = clf.predict(testInputs) print(prediccion_test) print(testOutputs) return precisionTest
def evaluateOutputCode(X, Y, printReport=False): time = datetime.datetime.now() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0) clf.fit(X_train, Y_train) if printReport: print 'Training time:' + str(datetime.datetime.now() - time) print 'Evaluation result: OneVsOne: ' + str( clf.score(X_test, Y_test)) Y_test = clf.predict(X_test) if printReport: print '0: ' + str((Y_test == 0).sum()) print '1: ' + str((Y_test == 1).sum()) print '2: ' + str((Y_test == 2).sum()) return [clf.score(X_test, Y_test), (Y_test == 1).sum(), clf]
def OutputCodeClassifier(data, label, pred_data, pred_last): ''' 0.76473194506 Number of mislabeled points out of a total 841 points : 211 0.749108204518 需要规范化 ''' data = np.array(data) pred_data = np.array(pred_data) label = np.array(label) pred_last = np.array(pred_last) from sklearn.multiclass import OutputCodeClassifier from sklearn.svm import LinearSVC clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0) clf.fit(data, label) print clf.score(data, label) pred_result = clf.predict(pred_data) print("Number of mislabeled points out of a total %d points : %d" % (pred_data.shape[0], (pred_last != pred_result).sum())) print clf.score(pred_data, pred_last) return pred_result
def ECOC(): print('Aplicando metodo multiclase ERROR CORRECTING OUTPUT CODES') for indice in lista_datasets: print('Base de datos: ' + str(indice)) dataset = arff.loadarff('./datasets/' + str(indice)) df = pd.DataFrame(dataset[0]) input = df.iloc[:, df.columns != 'class'] output = pd.factorize(df['class'])[0] X_train, X_test, Y_train, Y_test = train_test_split(input, output, test_size=0.25) clf = OutputCodeClassifier(KNeighborsClassifier(n_neighbors=5), code_size=2, random_state=0) clf.fit(X_train, Y_train) print('Porcentaje de bien clasificados ERROR CORRECTING OUTPUT CODES') print(clf.score(X_test, Y_test)) print('--------------------------')
# Test threshold_test = np.where((y_test == 0) | (y_test == 1) | (y_test == 7) | (y_test == 8)) y_test_thres, x_test_thres = y_test[threshold_test], x_test[threshold_test] ################################################################################################### ################################# Training a classifier (4 numbers) ############################## num_iter = 5 start_time_OCC = time.time() OCC = OutputCodeClassifier(Perceptron(max_iter=num_iter, random_state=0)) OCC.fit(x_train_thres, y_train_thres) predictionsOCC = OCC.predict(x_test_thres) scoreOCC = OCC.score(x_test_thres, y_test_thres) cmOCC = metrics.confusion_matrix(y_test_thres, predictionsOCC) plt.figure(figsize=(9, 9)) sns.heatmap(cmOCC, annot=True, fmt=".3f", linewidths=.5, square=True, cmap='Blues_r') plt.ylabel('Actual label') plt.xlabel('Predicted label') all_sample_title = 'OCC - Accuracy Score: {0}'.format(scoreOCC) plt.title(all_sample_title, size=15) plt.show()
from sklearn import datasets from sklearn.multiclass import OutputCodeClassifier from sklearn.svm import LinearSVC from sklearn.metrics import accuracy_score import warnings warnings.filterwarnings('ignore') iris = datasets.load_iris() X, y = iris.data, iris.target print('样本数量:%d, 特征数量:%d' % X.shape) # 模型对象构建 # code_size : 置顶最终使用多少个子模型, 实际的子模型的数量=code_size*label_number clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=30, random_state=0) clf.fit(X, y) # 输出预测结果值 print(clf.predict(X)) print('准确率:%.3f' % accuracy_score(y, clf.predict(X))) print(clf.score(X, y)) # 模型属性输出 k = 1 for item in clf.estimators_: print('第%d个模型:' % k, end='') print(item) k += 1 print(clf.classes_)
from sklearn.svm import LinearSVC import numpy import pandas from numpy import genfromtxt from sklearn import datasets from sklearn import metrics from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 from sklearn.ensemble import ExtraTreesClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import make_classification my_data = genfromtxt('cartrain.csv', delimiter=',', dtype=int, usecols=(0, 1, 2, 3, 4, 5)) mytarget = genfromtxt('target.csv', delimiter=',', dtype=int) trainx = my_data[0:760] trainy = mytarget[0:760] testx = my_data[760:] testy = mytarget[760:] model = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=1) model.fit(trainx, trainy) print(model.score(testx, testy))