def sklearn_multiclass_prediction(mode, X_train, y_train, X_test): ''' Use Scikit Learn built-in functions multiclass.OneVsRestClassifier and multiclass.OneVsOneClassifier to perform multiclass classification. Arguments: mode: one of 'ovr', 'ovo' or 'crammer'. X_train, X_test: numpy ndarray of training and test features. y_train: labels of training data, from 0 to 9. Returns: y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays, being your prediction of labels on training and test data, from 0 to 9. ''' if mode == 'ovr': clf = multiclass.OneVsRestClassifier(svm.LinearSVC(random_state = 12345)) clf.fit(X_train, y_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X_test) elif mode == 'ovo': clf = multiclass.OneVsOneClassifier(svm.LinearSVC(random_state = 12345)) clf.fit(X_train, y_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X_test) else: clf = svm.LinearSVC(multi_class = 'crammer_singer') clf.fit(X_train, y_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X_test) return y_pred_train,y_pred_test
def multiclass_example(): iris = datasets.load_iris() X, y = iris.data, iris.target random_state = 0 clf = svm.LinearSVC(random_state=random_state) #-------------------- ovr_clf = multiclass.OneVsRestClassifier(clf) ovr_clf.fit(X, y) pred = ovr_clf.predict(X) print('Prediction (ovr) =\n', pred) #-------------------- ovo_clf = multiclass.OneVsOneClassifier(clf) ovo_clf.fit(X, y) pred = ovo_clf.predict(X) print('Prediction (ovo) =\n', pred) #-------------------- oc_clf = multiclass.OutputCodeClassifier(clf, code_size=2, random_state=random_state) oc_clf.fit(X, y) pred = oc_clf.predict(X) print('Prediction (oc) =\n', pred)
def multiClass(trainx, trainy, testx, testy): """ Traitement des cas multiclasses à partir de classifieurs binaires :param trainx: Contient les exemples de la base d'apprentissage :param trainy: Labels de la base d'apprentissage :param testx: Contient les exemples de la base de test :param testy: Labels de la base de test """ Unvs1 = multiclass.OneVsOneClassifier(svm.LinearSVC(random_state=0)) UnvsAll = multiclass.OneVsRestClassifier(svm.LinearSVC(random_state=0)) Unvs1.fit(trainx, trainy) err_Unvs1_train = round(1 - Unvs1.score(trainx, trainy), 3) err_Unvs1_test = round(1 - Unvs1.score(testx, testy), 3) UnvsAll.fit(trainx, trainy) err_UnvsAll_train = round(1 - UnvsAll.score(trainx, trainy), 3) err_UnvsAll_test = round(1 - UnvsAll.score(testx, testy), 3) print("Err_1vs1 : train %f, test %f\n" % (err_Unvs1_train, err_Unvs1_test)) print("Err_1vsAll : train %f, test %f\n" % (err_UnvsAll_train, err_UnvsAll_test)) print("========== Prediction UnvsUn ==============") print(Unvs1.predict(testx).shape) print("========= Prediction UnvsRest =============") print(UnvsAll.predict(testx).shape)
def SVM_FullConnected(pretrained_model, C, d_, batch_size=141, epoches=32): start_time = time.time() #制造训练数据通道 train_ds = ReadImage.getKmeansDataSet(batch_size) print('训练通道构建完成') iterator = train_ds.make_initializable_iterator() data_element = iterator.get_next() sess = tf.Session() sess.run(iterator.initializer) train_label = [] # 增量式学习不需要生成全体样本特征 train_data = [] for i in range(epoches): cur_train_image, cur_train_label = sess.run(data_element) cur_train_data = pretrained_model.predict( cur_train_image) #svm使用的是均值池化后的输出向量 train_data.extend(cur_train_data) train_label.extend(cur_train_label) print('训练数据提取完成。训练数据维度%d %d' % np.shape(train_data)) sess.close() train_data = np.asarray(train_data) train_label = np.asarray(train_label) #得到测试数据 test_image, test_label = ReadImage.getTestDateSet('bow') print('采集测试数据完成') test_data = pretrained_model.predict(test_image) print('测试数据提取完成,测试数据维度为%d %d' % np.shape(test_data)) end_time1 = time.time() print('数据提取用时:%.8ss' % (end_time1 - start_time)) #将训练数据和测试数据进行降维处理 pca_model = PCA(n_components=d_) pca_model = KernelPCA(n_components=d_, kernel='rbf') #使用rbf降维方式 pca_model.fit(X=train_data) train_data_d_ = pca_model.transform(train_data) print('降维后的训练数据集维度为%d %d' % np.shape(train_data_d_)) test_data_d_ = pca_model.transform(test_data) print('降维后的训练数据集维度为%d %d' % np.shape(test_data_d_)) d_time = time.time() print('降维时间:%.8ss' % (d_time - end_time1)) #将训练数据核测试数据输入svm中 svc_classifier = svm.SVC(C=C, kernel='rbf', random_state=0, gamma='auto') model = multiclass.OneVsOneClassifier(svc_classifier, -1) clf = model.fit(train_data_d_, train_label) end_time2 = time.time() print('训练完成,用时%.8ss' % (end_time2 - d_time)) train_accuracy = clf.score(train_data_d_, train_label) end_time3 = time.time() print('tran_accuracy:%f,用时%.8s s' % (train_accuracy, end_time3 - end_time2)) test_accuracy = clf.score(test_data_d_, test_label) end_time4 = time.time() print('test_accuracy:%f,用时%.8s s' % (test_accuracy, end_time4 - end_time3)) print('训练完成,总用时%.8s s ' % (end_time4 - start_time)) f = open('./BoW/txt_record/svmFCRecord.txt', 'a+') f.write('惩罚系数为:%f \n' % C) # f.write('降维指标为:%d 降维方式:rbf内核\n'%d_) f.write('one vs one classfier') f.write('训练准确率: %.4f 测试准确率:%.4f\n\r' % (train_accuracy, test_accuracy)) f.close()
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test): ''' Use Scikit Learn built-in functions multiclass.OneVsRestClassifier and multiclass.OneVsOneClassifier to perform multiclass classification. Arguments: mode: one of 'ovr', 'ovo' or 'crammer'. X_train, X_test: numpy ndarray of training and test features. y_train: labels of training data, from 0 to 9. Returns: y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays, being your prediction of labels on training and test data, from 0 to 9. ''' if mode == 'ovr': model = multiclass.OneVsRestClassifier( svm.LinearSVC(random_state=12345)) # print("start training") start = time.time() model.fit(X_train, y_train) total = time.time() - start # print("elapsed time {:3.2f}, start predicting".format(total)) result = (model.predict(X_train), model.predict(X_test)) return result elif mode == 'ovo': model = multiclass.OneVsOneClassifier( svm.LinearSVC(random_state=12345)) # print("start training") start = time.time() model.fit(X_train, y_train) total = time.time() - start # print("elapsed time {:3.2f}, start predicting".format(total)) # print("start predicting") result = (model.predict(X_train), model.predict(X_test)) return result else: model = svm.LinearSVC(multi_class='crammer_singer', random_state=12345) # print("start training") start = time.time() model.fit(X_train, y_train) total = time.time() - start # print("elapsed time {:3.2f} start predicting".format(total)) # print("start predicting") result = (model.predict(X_train), model.predict(X_test)) return result
def learnSVM(C, train_code, train_label, test_code, test_label): n = np.shape(train_code)[1] # 获得n_cluster数量 print('进入SVM训练') svc_classifier = svm.SVC(C=C, kernel='rbf', random_state=0, gamma='auto') model = multiclass.OneVsOneClassifier(svc_classifier, -1) clf = model.fit(train_code, train_label) joblib.dump(clf, filename='./BoW/result/svm_' + str(n) + '_' + str(C) + '_.pkl') print('训练完成') train_accuracy = clf.score(train_code, train_label) print('train_accuracy:%f' % train_accuracy) test_accuracy = clf.score(test_code, test_label) print('test_accuracy:%f' % test_accuracy) print('训练完成') f = open('./BoW/txt_record/svmRecord.txt', 'a+') f.write('特征维度为:%d \n' % n) f.write('惩罚系数为:%f \n' % C) f.write('训练准确率: %.4f 测试准确率:%.4f\n\r' % (train_accuracy, test_accuracy)) f.close()
def build(self, input_model, model_calibrator_id, model_calibrator_params): """Build a model calibrator using the specified id""" if model_calibrator_id == 'sklearn_CalibratedClassifierCV': params = model_calibrator_params params['base_estimator'] = input_model return calibration.CalibratedClassifierCV(**params) elif model_calibrator_id == 'sklearn_GridSearchCV': params = model_calibrator_params params['estimator'] = input_model return model_selection.GridSearchCV(**params) elif model_calibrator_id == 'sklearn_OneVsRestClassifier': params = model_calibrator_params params['estimator'] = input_model return multiclass.OneVsRestClassifier(**params) elif model_calibrator_id == 'sklearn_OneVsOneClassifier': params = model_calibrator_params params['estimator'] = input_model return multiclass.OneVsOneClassifier(**params) return None
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test): """ Use Scikit Learn built-in functions multiclass.OneVsRestClassifier and multiclass.OneVsOneClassifier to perform multiclass classification. Arguments: mode: one of 'ovr', 'ovo' or 'crammer'. X_train, X_test: numpy ndarray of training and test features. y_train: labels of training data, from 0 to 9. Returns: y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays, being your prediction of labels on training and test data, from 0 to 9. """ y_pred_train = None y_pred_test = None # using random_state=12345 for reproductivity svm_model = svm.LinearSVC(random_state=12345) if mode == 'ovr': ovr_model = multiclass.OneVsRestClassifier(svm_model) ovr_model.fit(X_train, y_train) y_pred_train = ovr_model.predict(X_train) y_pred_test = ovr_model.predict(X_test) elif mode == 'ovo': ovo_model = multiclass.OneVsOneClassifier(svm_model) ovo_model.fit(X_train, y_train) y_pred_train = ovo_model.predict(X_train) y_pred_test = ovo_model.predict(X_test) elif mode == 'crammer': # using random_state=12345 for reproductivity crammer_singer_model = svm.LinearSVC(multi_class='crammer_singer', random_state=12345) crammer_singer_model.fit(X_train, y_train) y_pred_train = crammer_singer_model.predict(X_train) y_pred_test = crammer_singer_model.predict(X_test) else: print("Invalid mode. Mode should be 'ovr', 'ovo' or 'crammer'.") return y_pred_train, y_pred_test
def svmClassification(n_cluster, C): start_time = time.time() wholelabel = readCsv('label', 'wholelabel', n_cluster) train_data = np.asarray(readCsv('data', 'train', n_cluster)) print(np.shape(train_data)) train_label_str = readCsv('label', 'train', n_cluster) train_data = str2float(train_data) train_label_int = getHistogramlabel(train_label_str, wholelabel) print('训练集数据读取完成') test_data = np.asarray(readCsv('data', 'test', n_cluster)) print(np.shape(test_data)) test_data = str2float(test_data) test_label_str = readCsv('label', 'test', n_cluster) test_label_int = getHistogramlabel(test_label_str, wholelabel) print('测试集数据读取完成') svc_classifier = svm.SVC(C=C, kernel='rbf', gamma='scale') model = multiclass.OneVsOneClassifier(svc_classifier, -1) print('进入训练') clf = model.fit(train_data, train_label_int) print('训练完成') train_accuracy = clf.score(train_data, train_label_int) test_accuracy = clf.score(test_data, test_label_int) print('train_accuracy:%f' % train_accuracy) print('test_accuracy:%f' % test_accuracy) end_time = time.time() f = open('../result/svm/svmRecord.txt', 'a+') f.write('特征维度为:%d \n' % n_cluster) f.write('惩罚系数为:%d \n' % C) f.write('训练准确率: %.4f 测试准确率:%.4f\n' % (train_accuracy, test_accuracy)) f.write('耗时:%.8s \r\n' % (end_time - start_time)) f.close() print('耗时:%.8s' % (end_time - start_time)) print('训练结束')
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test): ''' Use Scikit Learn built-in functions multiclass.OneVsRestClassifier and multiclass.OneVsOneClassifier to perform multiclass classification. Arguments: mode: one of 'ovr', 'ovo' or 'crammer'. X_train, X_test: numpy ndarray of training and test features. y_train: labels of training data, from 0 to 9. Returns: y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays, being your prediction of labels on training and test data, from 0 to 9. ''' #pass # x_train dimension: (5000,784) #y_train dimension: (5000,) if(mode == "ovr"): clf = svm.LinearSVC(multi_class = "ovr", random_state = 12345) #print("clfclflalalalalal", clf) ovr_classifier = multiclass.OneVsRestClassifier(clf) ovr_classifier.fit(X_train,y_train) y_pred_train = ovr_classifier.predict(X_train) y_pred_test = ovr_classifier.predict(X_test) if(mode =="ovo"): clf = svm.LinearSVC(random_state = 12345) ovo_classifier = multiclass.OneVsOneClassifier(clf) ovo_classifier.fit(X_train,y_train) y_pred_train = ovo_classifier.predict(X_train) y_pred_test = ovo_classifier.predict(X_test) if(mode == "crammer"): clf = svm.LinearSVC(multi_class = "crammer_singer", random_state = 12345) clf.fit(X_train,y_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X_test) return y_pred_train, y_pred_test
def sklearn_multiclass_prediction(mode, X_train, y_train, X_test): ''' Use Scikit Learn built-in functions multiclass.OneVsRestClassifier and multiclass.OneVsOneClassifier to perform multiclass classification. Arguments: mode: one of 'ovr', 'ovo' or 'crammer'. X_train, X_test: numpy ndarray of training and test features. y_train: labels of training data, from 0 to 9. Returns: y_pred_train, y_pred_test: a tuple of 2 numpy ndarrays, being your prediction of labels on training and test data, from 0 to 9. ''' clf = None estimator = svm.LinearSVC(random_state=12345, verbose=False) # if mode == 'ovr': clf = multiclass.OneVsRestClassifier(estimator=estimator, n_jobs=-1) elif mode == 'ovo': clf = multiclass.OneVsOneClassifier(estimator=estimator, n_jobs=-1) elif mode == 'crammer': clf = svm.LinearSVC(random_state=12345, multi_class='crammer_singer') else: print("Invalid mode {:s}".format(mode)) return -1 # Fit the model with given data clf.fit(X_train, y_train) # Predict the training data using the model y_pred_train = clf.predict(X_train) # Predict the testing data using the model y_pred_test = clf.predict(X_test) return y_pred_train, y_pred_test
def train_model(folders): """ Takes a list of folders from which to draw data files to train the model. Parses sentences in a similar way to when testing, by iteratively looking at target nodes in the remaining subtrees of the sentence. For each pair, the algorithm derives a list of features and a correct construction action. Once there are all found it uses them to generate a model, which is returned. ========== INCOMPLETE ========== """ raw_features = [] classifications = [] for filepath in data_file_paths_for_folders(folders): for sentence in dt.parsed_sents(filepath): T = flattened_node_list(sentence) i = 0 no_construction = True while len(T) >= 1: if i == len(T) - 1: if no_construction: break no_construction = True i = 0 else: target_features = get_contextual_features(T, i) target_classification = get_classification(T, i, sentence) raw_features.append(target_features) classifications.append(target_classification) construction(T, i, target_classification) if target_classification != SHIFT: no_construction = False i += 1 vectorizer = DictVectorizer() feature_matrix = vectorizer.fit_transform(raw_features) feature_names = vectorizer.get_feature_names() model = multiclass.OneVsOneClassifier(svm.LinearSVC()) model.fit(feature_matrix, classifications) return vectorizer, model
n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=-1) all_ask_cluster_model.fit(all_ask_prices_nm) all_ask_labels = all_ask_cluster_model.predict(all_ask_prices_nm) #Classifying on the basis of clusters print("Classifying...") bid_cluster_classifier_ada = multiclass.OneVsOneClassifier(estimator=ensemble.AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None), n_jobs=-1) bid_cluster_classifier_ada.fit(trainFeatures, all_bid_labels) print("Bid accuracy with AdaBoost: ", bid_cluster_classifier_ada.score(trainFeatures, all_bid_labels)) ask_cluster_classifier_ada = multiclass.OneVsOneClassifier(estimator=ensemble.AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None), n_jobs=-1) ask_cluster_classifier_ada.fit(trainFeatures, all_ask_labels) print("Ask accuracy with AdaBoost Classifier: ", ask_cluster_classifier_ada.score(trainFeatures, all_ask_labels)) ada = {'bid': bid_cluster_classifier_ada, 'ask': ask_cluster_classifier_ada}
for ix, row in test_table.iterrows(): X = (np.array(row[featureColumns])).flatten('F') testX[index, :] = X index = index + 1 print "Classifier for Clusters..." bid_cluster_classifier = multiclass.OneVsOneClassifier( estimator=ensemble.RandomForestClassifier(n_estimators=30, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None), n_jobs=-1) bid_cluster_classifier.fit(trainX, all_bid_labels) print "Bid accuracy with Random Forest: ", bid_cluster_classifier.score( trainX, all_bid_labels) ask_cluster_classifier = multiclass.OneVsOneClassifier( estimator=ensemble.RandomForestClassifier(n_estimators=30, criterion='gini',
float(sl[7]), float(sl[8]), float(sl[9]), float(sl[10]), float(sl[11])]]) data = np.concatenate((data,d)) target = np.append(target, sl[0]) data = data[1:,:] file_in.close() # define classifiers kNeighborsClassifier = neighbors.KNeighborsClassifier() nearestCentroid = neighbors.NearestCentroid() gaussianNB = naive_bayes.GaussianNB() multinomialNB = naive_bayes.MultinomialNB() bernoulliNB = naive_bayes.BernoulliNB() linearSVC = svm.LinearSVC() oneVsRestClassifier = multiclass.OneVsRestClassifier(linearSVC) oneVsOneClassifier = multiclass.OneVsOneClassifier(linearSVC) ridgeClassifier = linear_model.RidgeClassifier() logisticRegression = linear_model.LogisticRegression() decisionTreeClassifier = tree.DecisionTreeClassifier() extraTreeClassifier = tree.ExtraTreeClassifier() extraTreesClassifier = ensemble.ExtraTreesClassifier() adaBoost = ensemble.AdaBoostClassifier() randomForest = ensemble.RandomForestClassifier() baggingClassifier = ensemble.BaggingClassifier() gradientBoostingClassifier = ensemble.GradientBoostingClassifier() classifiers = [ kNeighborsClassifier, nearestCentroid, gaussianNB, multinomialNB,
cnt = 0 for i, v in enumerate(word_vectors.index2word): if v.startswith("a"): x_train.append(word_vectors.vectors[i]) y_value = 5 if v in target_author: y_value = target_author[v] cnt = cnt + 1 y_train.append([y_value]) x = np.array(x_train) y = np.array(y_train) x_train = x[:int((0.8 * len(x)))] y_train = y[:int((0.8 * len(x)))] x_test = x[int(0.8 * len(x)):] y_test = y[int(0.8 * len(x)):] print("Training:\n") model = multiclass.OneVsOneClassifier(svm.LinearSVC()) model.fit(x_train, y_train) print("Saving model to model.pkl\n") joblib.dump(model, 'model.pkl') print("Accuracy on test\n") y_test_predicted = model.predict(x_test) metrics.accuracy_score(y_test, y_test_predicted) print("Accuracy on train\n") y_train_predicted = model.predict(x_train) metrics.accuracy_score(y_train, y_train_predicted)
def generate_classifier(self): self.classifer = sklearn_multiclass.OneVsOneClassifier(sklearn_svm.SVC(kernel=self.kernel, gamma=self.gamma, C=self.constant))
[ 7., 6., 414., 2., 52., 0., 51., 0., 1., 0.], [ 7., 2., 3., 407., 11., 0., 4., 0., 0., 0.], [ 0., 0., 26., 7., 367., 0., 19., 0., 0., 0.], [ 0., 0., 0., 0., 0., 432., 0., 48., 0., 5.], [ 64., 6., 43., 43., 54., 0., 347., 0., 3., 0.], [ 0., 0., 0., 0., 0., 7., 0., 410., 0., 6.], [ 23., 2., 14., 20., 15., 46., 26., 6., 495., 2.], [ 0., 0., 0., 0., 0., 14., 0., 36., 0., 487.]]) # ## sklearn svm # In[ ]: # Multi class using SK learn 1 vs 1 # Initialize classifier classifier = multiclass.OneVsOneClassifier(svm.SVC(kernel='rbf',C=1,gamma=0.05)) # In[ ]: Classifiers = [] # In[ ]: # Train Yi = np.ones(2250).astype(float) # numCLF = 0 st = time.time()
def skl_ovo(X_train, y_train, X_test): model = multiclass.OneVsOneClassifier(svm.LinearSVC()).fit( X_train, y_train) y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) return y_pred_train, y_pred_test
remaining_x = np.genfromtxt( './Datasets/ismir04_genre/RemainingFeaturesISMIR.csv', delimiter=',') X = np.hstack((X, remaining_x)) scale = MinMaxScaler((-1, 1)) scaled_x = scale.fit_transform(X) ovo_feature_sets = np.load('./Results/Experiment2/ISMIR/ind_ovo_fs.npy') ovo_fs_hs = ovo_feature_sets[:, 0] ovo_fs_cs = ovo_feature_sets[:, 1] ovo_fs_dfa = ovo_feature_sets[:, 2] clf = svm.SVC(kernel='rbf', C=3, gamma=0.02) ovo_clf = multiclass.OneVsOneClassifier(clf) ovo_clf_hs = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_hs) ovo_clf_cs = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_cs) ovo_clf_dfa = ovo_multi_features.OneVsOneClassifier(clf, ovo_fs_dfa) cv = len(np.unique(y)) dataset_size = len(y) scores_array_length = cv * 10 no_fs_time = np.empty(scores_array_length) hs_time = np.empty(scores_array_length) cs_time = np.empty(scores_array_length) dfa_time = np.empty(scores_array_length) no_fs_score = np.empty(scores_array_length) hs_score = np.empty(scores_array_length)
min_samples_leaf=3, oob_score=True, bootstrap=True, random_state=4) _RFC.fit(Xconcat.drop('y', 1), Xconcat.y) _RFC.score(Xconcat.drop('y', 1), Xconcat.y) _RFC.oob_score_ _RFC.score(testConcat.drop('y', 1), testConcat.y) _RFC.feature_importances_ ### Multi-class tests ### _SVC = svm.SVC(class_weight='auto') #One versus one _OVO = multiclass.OneVsOneClassifier(_SVC, n_jobs=-1) _OVO.fit(X.get_values(), y.get_values()) _OVO.score(Xtest.get_values(), ytest.get_values()) #One versus rest _OVR = multiclass.OneVsRestClassifier(_SVC, n_jobs=-1) _OVR.fit(X.get_values(), y.get_values()) _OVR.score(Xtest.get_values(), ytest.get_values()) #ECOC for i in np.arange(1, 10): _ECOC = multiclass.OutputCodeClassifier(_SVC, i, n_jobs=-1) _ECOC.fit(X.get_values(), y.get_values()) _ECOC.score(Xtest.get_values(), ytest.get_values()) #Random Forest usando os valores iniciais sem SVM
def svm(data, kernel, classification, weighted=False, plot=False, onePlot=False): # list of labels labels = ["1", "2", "3", "5", "6", "7"] # initialize lists for confusion matrices, accuracies, and best parameters confusions = [] accuracies = [] bestParams = [] plots = [] timePerFold = [] # set the proper classification if classification == 'ovo': # if weighted option is chosen, balance the dataset so that the weights are inversely proportional to frequency if weighted == False: svc = multiclass.OneVsOneClassifier(SVC(kernel=kernel)) else: svc = multiclass.OneVsOneClassifier( SVC(kernel=kernel, class_weight='balanced')) # initialize parameters for GridSearchCV params = { "estimator__C": [0.01, 1, 10, 100, 500, 1000], 'estimator__gamma': [0.01, 1, 10] } c = "estimator__C" gamma = 'estimator__gamma' degree = 'estimator__degree' elif classification == 'ovr': svc = SVC(kernel=kernel) # initialize parameters for GridSearchCV params = {"C": [0.01, 1, 10, 100, 500, 1000], "gamma": [0.01, 1, 10]} c = "C" gamma = 'gamma' degree = 'degree' else: print('Invalid Classifier Type') return # if we have a polynomial kernel we want to reduce the penalty parameter to reduce training time if kernel == 'poly': params[c] = [0.001, 0.01, 0.1] params[degree] = [2, 3, 4] elif kernel == 'sigmoid': params[gamma].append(0.001) params[gamma].append(0.0001) # split the data into features and labels data = data.values x = data[:, 0:9] y = data[:, 9:] # normalize the data scaler = StandardScaler() x = scaler.fit_transform(x) # perform k-fold cross validation, in this case we're using 5 folds kf = KFold(n_splits=5, random_state=1, shuffle=True) foldNumber = 0 totalTime = 0 for train, test in kf.split(data): # increase which fold we're on foldNumber += 1 # get training and test splits x_train, x_test = x[train], x[test] y_train, y_test = y[train], y[test] y_train, y_test = y_train.ravel(), y_test.ravel() # perform Grid Search on the training sets clf = GridSearchCV(svc, cv=5, param_grid=params, iid=True) # start time for the training t0 = time.time() # fit the model clf.fit(x_train, y_train) # end time for the training t1 = time.time() timePerFold.append(t1 - t0) totalTime += t1 - t0 # test the model y_pred = clf.predict(x_test) # save the best parameters for this fold bestParams.append(clf.best_params_) # plot the grid search results, save for later if (plot and kernel != 'poly') or onePlot: plots.append( plotGridSearch(clf.cv_results_, params[c], params[gamma], "C", "Gamma", foldNumber, kernel, classification)) elif plot and kernel == 'poly': plots.append( plotGS3D(clf.cv_results_, params[c], params[gamma], params[degree], foldNumber, classification)) # append confusion matrix and accuracy to respective list accuracies.append(accuracy_score(y_test, y_pred)) confusions.append(confusion_matrix(y_test, y_pred)) # get mean accuracy meanAccuracy = np.mean(accuracies) * 100 # add weighted to kernel name if applicable if weighted: kernel += "_Weighted" # save all the plots as a pdf if plot: # if we are on the polynomial kernel, flatten the list of lists if kernel == 'poly' or kernel == 'poly_Weighted': plots = [plot for subplot in plots for plot in subplot] file = pdf.PdfPages(kernel.title() + "_Kernel_" + classification.upper() + "_Classification.pdf") for fig in plots: file.savefig(fig, bbox_inches='tight') plt.close(fig) file.close() # get averaged confusion matrix confusions = [ pd.DataFrame(data=c, columns=labels, index=labels) for c in confusions ] concatCM = pd.concat(confusions) cm_total = concatCM.groupby(concatCM.index) cm_average = cm_total.mean() # plot average confusion matrix plotConfusionMatrix(cm_average, kernel, classification) elif onePlot: plots[1].show() # print some useful information print("-" * 300) print("Classification: ", classification) print("Kernel: ", kernel) print("Mean Accuracy: ", meanAccuracy) print("Time it took to train: ", totalTime) print("Time per Fold", timePerFold) print("Best Parameters per Fold: ", bestParams) print() return totalTime, meanAccuracy
ovr = multiclass.OneVsRestClassifier(gbc) y_pred = ovr.fit(X_train, y_train).predict(X_test) for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: n = n + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 print "One vs. Rest: ", n / 0.72 for i in range(0, 6): for j in range(0, 6): print '{:5.0f} '.format(box[i, j]), print n = 0 box = np.zeros([6, 6]) ovo = multiclass.OneVsOneClassifier(gbc) y_pred = ovo.fit(X_train, y_train).predict(X_test) for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: n = n + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 print "One vs. One: ", n / 0.72 for i in range(0, 6): for j in range(0, 6): print '{:5.0f} '.format(box[i, j]), print for k in range(30, 36, 6): box = np.zeros([6, 6]) accuracy = np.zeros(100)
def oneVOne(x, y, x_test): result = mcl.OneVsOneClassifier(LinearSVC()).fit(x, y) preds = result.predict(x_test) return preds
ovr = multiclass.OneVsRestClassifier(gnb) y_pred = ovr.fit(X_train, y_train).predict(X_test) for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: n = n + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 print "One vs. Rest: ", n / 0.72 for i in range(0, 6): for j in range(0, 6): print '{:5.0f} '.format(box[i, j]), print n = 0 box = np.zeros([6, 6]) ovo = multiclass.OneVsOneClassifier(gnb) y_pred = ovo.fit(X_train, y_train).predict(X_test) for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: n = n + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 print "One vs. One: ", n / 0.72 for i in range(0, 6): for j in range(0, 6): print '{:5.0f} '.format(box[i, j]), print for k in range(60, 66, 6): box = np.zeros([6, 6]) accuracy = np.zeros(100)
class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='squared_hinge', n_iter=1000, n_jobs=1, penalty='l1', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False) multiOvO = multiclass.OneVsOneClassifier(sgdBase) # sgd + adaboost numBoost = 1000 # multiOvO.fit(trainXInput, trainYInput) boostEps, boostAlp, boostModel = adaBoost(multiOvO, numBoost, trainXInput, trainYInput) # prediction nClasses = trainingTargets.shape[1] boundaryVal = 0.33 # defined output baseFileName = '../multiOvOSgd' outBoost = range(100, numBoost + 1, 100) for i in range(len(outBoost)):