def predict(transformed_data, args, trn_label, tst_label): print 'imgpred', sys.stdout.flush() (ndim, nsample, nsubjs) = transformed_data.shape accu = np.zeros(shape=nsubjs) tst_data = np.zeros(shape=(ndim, nsample)) trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample)) # image stimulus prediction for tst_subj in range(nsubjs): tst_data = transformed_data[:, :, tst_subj] trn_subj = range(nsubjs) trn_subj.remove(tst_subj) for m in range(nsubjs - 1): trn_data[:, m * nsample:(m + 1) * nsample] = transformed_data[:, :, trn_subj[m]] # scikit-learn svm for classification #clf = NuSVC(nu=0.5, kernel = 'linear') clf = NuSVC(nu=0.5, kernel='linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu[tst_subj] = sum(pred_label == tst_label) / float(len(pred_label)) return accu
def fd_svm_time_prior(train, test, ytrain, ytest, seq, k): for i in range(len(train) - seq + 1): for j in range(1, seq): train[i] = train[i] + train[i + j] train = train[:-seq + 1] train = np.array(train).astype('float64') train_y = np.array(ytrain[seq - 1:]).astype('float64') for i in range(len(test) - seq + 1): for j in range(1, seq): test[i] = test[i] + test[i + j] test = test[:-seq + 1] test = np.array(test).astype('float64') test_y = np.array(ytest[seq - 1:]).astype('float64') clf = NuSVC() clf.fit(train, train_y) predict_y = clf.predict(test) # return clf.predict(test) predict_y = list(predict_y) for i in range(len(predict_y) - k + 1): if 0 in set(predict_y[i:i + k]): continue else: for j in range(i + k, len(predict_y)): predict_y[j] = 1 break for i in range(len(predict_y)): if predict_y[i] == test_y[i]: predict_y[i] = 1 else: predict_y[i] = 0 return np.average(predict_y)
class RbfSVM: def __init__(self): self.clf = NuSVC(nu=0.7, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, verbose=False, max_iter=-1) self.pattern ='(?u)\\b[A-Za-z]{3,}' self.tfidf = TfidfVectorizer(sublinear_tf=False, use_idf=True, smooth_idf=True, stop_words='english', token_pattern=self.pattern, ngram_range=(1, 3)) def train(self,fileName): print "RbfSVM Classifier is being trained" table = pandas.read_table(fileName, sep="\t", names=["cat", "message"]) X_train = self.tfidf.fit_transform(table.message) Y_train = [] for item in table.cat: Y_train.append(int(item)) self.clf.fit(X_train, Y_train) print "RbfSVM Classifier has been trained" def classify(self,cFileName, rFileName): table = pandas.read_table(cFileName, names=["message"]) X_test = self.tfidf.transform(table.message) print "Data have been classified" with open(rFileName,'w') as f: for item in self.clf.predict(X_test).astype(str): f.write(item+'\n') def validate(self,fileName): table = pandas.read_table(fileName, sep="\t", names=["cat", "message"]) X_validate = self.tfidf.transform(table.message) Y_validated = self.clf.predict(X_validate).astype(str) totalNum = len(table.cat) errorCount = 0 for i in range(0,totalNum): if int(table.cat[i])!=int(Y_validated[i]): errorCount += 1 print "Data have been validated! Precision={}".format((totalNum-errorCount)/float(totalNum))
def nusvc_model(x_train, y_train, x_val, y_val, x_test, testid): scaler = StandardScaler() x_stand_train = scaler.fit_transform(x_train) x_stand_val = scaler.transform(x_val) x_stand_test = scaler.transform(x_test) #nus = [0.05,0.1,0.2,0.3,0.4,0.5,0.6] #for i in range(len(nus)): clf = NuSVC(nu=0.3, kernel="linear", probability=True, decision_function_shape="ovo", gamma="scale", class_weight="balanced") clf.fit(x_train, y_train) y_pred_val = clf.predict(x_val) BMAC = balanced_accuracy_score(y_val, y_pred_val) print("BMAC of this model: ", BMAC) print("\n") print("=" * 30) y_pred = clf.predict(x_test) return y_pred, testid
def predict_loo(transformed_data, args, trn_label ,tst_label): print 'imgpred loo', print args.loo, sys.stdout.flush() (ndim, nsample , nsubjs) = transformed_data.shape loo = args.loo loo_idx = range(nsubjs) loo_idx.remove(loo) #tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample)) # image stimulus prediction # tst_data : ndim x nsample tst_data = transformed_data[:,:,loo] for m in range(len(loo_idx)): trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,loo_idx[m]] # scikit-learn svm for classification clf = NuSVC(nu=0.5, kernel = 'linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu = sum(pred_label == tst_label)/float(len(pred_label)) return accu
def cross_validation(type): f1 = 0 acc = 0 skf = StratifiedKFold(n_splits=8) df_x, df_y, model = tfidf([], []) df_x = model if type is 'NuSMV': clf = NuSVC() elif type is 'LinearSMV': clf = LinearSVC() else: clf = DecisionTreeClassifier() for train_index, test_index in skf.split(df_x, df_y): x_train, x_test = df_x[train_index], df_x[test_index] y_train, y_test = df_y[train_index], df_y[test_index] clf.fit(x_train, y_train) prediction = clf.predict(x_test) # print(classification_report(y_test, prediction)) f1 += f1_score(y_test, prediction, average='weighted') acc += accuracy_score(y_test, prediction) return f1 / 8, acc / 8
def Nusvc(X,y,xtest): print("Nu Support-Vector-Machine") from sklearn.svm import NuSVC clf = NuSVC(random_state=0,gamma='auto') clf.fit(X, y) y_pred=clf.predict(xtest) return y_pred
def predict(transformed_data, args, trn_label ,tst_label): print 'imgpred', sys.stdout.flush() (ndim, nsample , nsubjs) = transformed_data.shape accu = np.zeros(shape=nsubjs) tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample)) # image stimulus prediction for tst_subj in range(nsubjs): tst_data = transformed_data[:,:,tst_subj] trn_subj = range(nsubjs) trn_subj.remove(tst_subj) for m in range(nsubjs-1): trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,trn_subj[m]] # scikit-learn svm for classification #clf = NuSVC(nu=0.5, kernel = 'linear') clf = NuSVC(nu=0.5, kernel = 'linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu[tst_subj] = sum(pred_label == tst_label)/float(len(pred_label)) return accu
def svm_support_vector_nu_classification(self, nu=0.5, kernel='rbf', degree=3, gamma='auto', coef0=0, shrinking=True, probability=False, max_iter=1): """ Nu-Support Vector Classification. Similar to SVC but uses a parameter to control the number of support vectors. :param nu: Fraction of training errors and a lower bound of the fraction of support vectors. Must be between (0,1] :param kernel: Kernel type to use in the algorithm, inputs: linear, poly, rbf, sigmoid, precomputed :param degree: Degree of the polynomial kernel function (only works if kernel = poly). :param gamma: Kernel coefficient for rbf, poly and sigmoid. :param max_iter: Max limit of iterations, -1 for no limit. :param probability: Whether or not enable probabilities, it will slow down the computation. :param shrinking: whether or not to use shrinking heuristic. :param coef0: independent kernel parameter that only works with poly and sigmoid. :return:probability, conf_matrix """ model = NuSVC(nu=nu, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, max_iter=max_iter) model.fit(self.__x_train, self.__y_train) self.__model = model
def predict_loo(transformed_data, args, trn_label, tst_label): print 'imgpred loo', print args.loo, sys.stdout.flush() (ndim, nsample, nsubjs) = transformed_data.shape loo = args.loo loo_idx = range(nsubjs) loo_idx.remove(loo) #tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample)) # image stimulus prediction # tst_data : ndim x nsample tst_data = transformed_data[:, :, loo] for m in range(len(loo_idx)): trn_data[:, m * nsample:(m + 1) * nsample] = transformed_data[:, :, loo_idx[m]] # scikit-learn svm for classification clf = NuSVC(nu=0.5, kernel='linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu = sum(pred_label == tst_label) / float(len(pred_label)) return accu
class svm(): def __init__(self): # self.clf = SVC(kernel='rbf') self.clf = NuSVC() def train(self, inputs): # Parameters: # inputs: An array of Input objects containing input vectors along with their corresponding labels. # Creates lists to use for fitting model X = [] Y = [] for data in inputs: X.append((data.x/np.linalg.norm(data.x))) Y.append(data.y) # Fit model self.clf.fit(X, Y) def predict(self, input): # Parameters: # input: An Input object containing an input vector to be used for predicting a label. x = input.x/np.linalg.norm(input.x) if isinstance(input, Input): return self.clf.predict(x) else: x = input/np.linalg.norm(input) return self.clf.predict(x)
def nu(newX, y, newDev, devLabel): clNu = NuSVC(gamma='scale') clNu.fit(newX, y) nuResult = clNu.predict(newDev) finalResult = nuResult != devLabel
def svm_nu(training, labels, test, real): #make lists of different parameters for SVC, then iterate through some of them kern = ['rbf','poly','linear'] for i in range(len(kern)): model = NuSVC(kernel = kern[i], nu = 0.38, degree = 3, gamma = 0.00005, coef0 = 1) model.fit(training, labels) accuracy = model.score(test, real) print("kernel: ", kern[i], ", accuracy: ", accuracy)
def fit(self, X, Y, W): clf = NuSVC(nu=self.nu, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, probability=self.probability, tol=self.tol, cache_size=self.cache_size, max_iter=self.max_iter) if W is not None: return NuSVMClassifier(clf.fit(X, Y.reshape(-1), W.reshape(-1))) return NuSVMClassifier(clf.fit(X, Y.reshape(-1)))
def optimize_clf(nf, optimize=1): acc_list = [ ] #array with accuracies for each pair within each LOOVC fold def nf_select(nf): #fselector = mvpa2.FixedNElementTailSelector(np.round(nf), tail='upper',mode='select', sort=False) #sbfs = mvpa2.SensitivityBasedFeatureSelection(mvpa2.OneWayAnova(), fselector, enable_ca=['sensitivities'], auto_train=True) if (optimize >= 1): not_test_ds = ds[ds.chunks != chunk] val_ds = not_test_ds[not_test_ds.chunks == val_chunk] train_ds = not_test_ds[not_test_ds.chunks != val_chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #val_ds = sbfs(val_ds) return train_ds, val_ds elif (optimize == 0): train_ds = ds[ds.chunks != chunk] test_ds = ds[ds.chunks == chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #test_ds = sbfs(test_ds) return train_ds, test_ds train_ds, not_train_ds = nf_select(nf) for y in range(0, len(pair_list2)): def mask(y, train_ds, test_ds): stim_mask1 = (train_ds.targets == pair_list2[y][0]) | ( train_ds.targets == pair_list2[y][1]) stim_mask2 = (not_train_ds.targets == pair_list2[y][0]) | ( not_train_ds.targets == pair_list2[y][1]) ds_temp_train = train_ds[stim_mask1] ds_temp_not_train = not_train_ds[stim_mask2] return ds_temp_train, ds_temp_not_train ds_temp_train, ds_temp_not_train = mask( y, train_ds, not_train_ds) #clf = mvpa2.LinearNuSVMC(nu=0.5)#defines a classifier, linear SVM in this case clf = NuSVC(nu=0.5, max_iter=2000) #clf = SKLLearnerAdapter(knn) #clf = SKLLearnerAdapter(linear_model.SGDClassifier()) #clf.train(ds_temp_train) clf.fit(ds_temp_train.samples, ds_temp_train.targets) #predictions = clf.predict(ds_temp_not_train) predictions = clf.predict(ds_temp_not_train.samples) labels = ds_temp_not_train.targets bool_vec = predictions == labels acc_list.append( sum(bool_vec) / float(len(bool_vec))) #array with accuracies for each pair if (optimize == 1): #print len(acc_list) #print np.mean(acc_list) return 1 - np.mean(acc_list) else: #print np.mean(acc_list), 'for chunk:', chunk return acc_list
def SVM_nuSVC(self): clf = NuSVC(nu=0.5, kernel=b'rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=None) print('nuSVC Classifier is fitting...') clf.fit(self.X_train, self.y_train) return clf
def NuSVM(x, y): nus = [_ / 10 for _ in range(1, 11, 1)] for nu in nus: nusvc = NuSVC(nu=nu) try: nusvc.fit(x, y) return nusvc except ValueError as e: print("nu {} not feasible".format(nu))
def svc(x_train, y_train, x_test, y_test): clf = NuSVC() # class clf.fit(x_train, y_train) # training the svc model result = clf.predict(x_test) # predict the target of testing samples predict_list = result.tolist() cnt_true = 0 for i in range(len(y_test)): if int(predict_list[i]) == int(y_test[i]): cnt_true += 1 print float(cnt_true) / float(len(y_test))
def fit(self, X, y, sample_weight=None): if self.kernel in ['linear', 'rbf', 'poly', 'sigmoid']: logging.info("sklearn.svm.NuSVC.fit: " + get_patch_message("onedal")) self._onedal_fit(X, y, sample_weight) else: logging.info("sklearn.svm.NuSVC.fit: " + get_patch_message("sklearn")) sklearn_NuSVC.fit(self, X, y, sample_weight) return self
def train_test_SVM(self, X_train, y_train, X_test, y_test): print('Training SVM Classifier') svm_classifier = NuSVC() svm_classifier.fit(X_train, y_train) print('Testing SVM Classifier') y_pred = svm_classifier.predict(X_test) print(y_pred.shape) cm = confusion_matrix(y_test, y_pred) print(cm)
def update_event(self, input_called=-1): if input_called == 0: clf = NuSVC() if self.input(1) != None: clf.set_params(**self.input(1)) X = self.input(2) y = self.input(3) clf.fit(X, y) self.set_output_val(1, clf) self.exec_output(0)
def _test_nu_svc(self, num_classes, backend="torch", extra_config={}): model = NuSVC() np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, backend, X, extra_config=extra_config) self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)
def NonLinearSupportVectorMachine(x_train, y_train, x_cv, y_cv): """ Non Linear Support Vector Machine """ #print "Classifier: Support Vector Machine" clfr = NuSVC(probability=False) clfr.fit(x_train, y_train) #print 'Accuracy in training set: %f' % clfr.score(x_train, y_train) #if y_cv != None: #print 'Accuracy in cv set: %f' % clfr.score(x_cv, y_cv) return clfr
def testing(): plot_x = range(1, 10) plot_y = [] for i in xrange(1,10): vals = [] for _ in xrange(20): train_data, validation_data, train_labels, validation_labels = split_data() clf = NuSVC(**get_kwargs(i)) clf.fit(train_data, train_labels) vals.append(check_fit(clf.predict(validation_data), validation_labels)) plot_y.append(np.mean(vals)) plot_results(plot_x, plot_y)
def fit_model_7(self,toWrite=False): model = NuSVC(probability=True,kernel='linear') for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict_proba(X_test)[:,1] print("Model 7 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model7/model.pkl','w') pickle.dump(model,f2) f2.close()
def sigmoidNuSVC(): maxRandomPerformance = [] for gamma in xrange(1,200): clf = NuSVC(kernel="sigmoid",gamma=gamma) clf.fit(trainData, trainLabel) maxRandomPerformance.append(clf.score(validationData, validationLabel)) gammaValue = maxRandomPerformance.index(max(maxRandomPerformance)) + 1 clfFinal = NuSVC(kernel='sigmoid', gamma=gammaValue) clfFinal.fit(trainData,trainLabel) score = clfFinal.score(testData,testLabel) guideToGraph['Sigmoid Nu-SVC'] = score
def sigmoidNuSVC(): maxRandomPerformance = [] for gamma in xrange(1, 200): clf = NuSVC(kernel="sigmoid", gamma=gamma) clf.fit(trainData, trainLabel) maxRandomPerformance.append(clf.score(validationData, validationLabel)) gammaValue = maxRandomPerformance.index(max(maxRandomPerformance)) + 1 clfFinal = NuSVC(kernel='sigmoid', gamma=gammaValue) clfFinal.fit(trainData, trainLabel) score = clfFinal.score(testData, testLabel) guideToGraph['Sigmoid Nu-SVC'] = score
def polyNuSVC(): maxRandomPerformance = [] for deg in xrange(1,200): clf = NuSVC(kernel="poly",degree=deg) clf.fit(trainData, trainLabel) maxRandomPerformance.append(clf.score(validationData, validationLabel)) gammaValue = maxRandomPerformance.index(max(maxRandomPerformance)) + 1 clfFinal = NuSVC(kernel='poly', gamma=gammaValue) clfFinal.fit(trainData,trainLabel) score = clfFinal.score(testData,testLabel) guideToGraph['Polynomial Nu-SVC'] = score
def fit(self, X, Y, W): clf = NuSVC(nu=self.nu, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, probability=self.probability, tol=self.tol, cache_size=self.cache_size, max_iter=self.max_iter) if W.shape[1] > 0: return NuSVMClassifier(clf.fit(X, Y.reshape(-1), W.reshape(-1))) return NuSVMClassifier(clf.fit(X, Y.reshape(-1)))
def buildModel(self, train_data, train_labels): #print("%s: " % self.classifierName) if self.classifierName == "SVM with sigmoid kernel": model = NuSVC(kernel='sigmoid') elif self.classifierName == "SVM with polinomial kernel": model = NuSVC(kernel='poly') elif self.classifierName == "SVM with sigmoid kernel": model = NuSVC(kernel='sigmoid') elif self.classifierName == "SVM with RBF kernel": model = NuSVC() else: model = self.classifierType() model.fit(train_data, train_labels) return model
def svm(train_feature, train_label): clf = NuSVC(kernel='rbf', gamma='scale', probability=True) clf.fit(train_feature, train_label) pre_score = clf.predict_proba(train_feature) _pre_label = [] _pre_score = [] for item in pre_score: if item[0] > item[1]: _pre_label.append('0') else: _pre_label.append('1') _pre_score.append(item[1]) print ('The auc is: {}'.format(roc_auc_score(train_label,_pre_score))) return clf.score(train_feature,train_label),_pre_score
def polyNuSVC(): maxRandomPerformance = [] for deg in xrange(1, 200): clf = NuSVC(kernel="poly", degree=deg) clf.fit(trainData, trainLabel) maxRandomPerformance.append(clf.score(validationData, validationLabel)) gammaValue = maxRandomPerformance.index(max(maxRandomPerformance)) + 1 clfFinal = NuSVC(kernel='poly', gamma=gammaValue) clfFinal.fit(trainData, trainLabel) score = clfFinal.score(testData, testLabel) guideToGraph['Polynomial Nu-SVC'] = score
def nusvc_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...NuSVC') clf = NuSVC(nu=0.8) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_nusvc_' + str(id) + '.pkl') return clf.score(x_test, y_test)
class NuSVM(object): #building def __init__(self, dataset, target, kernel, nu, degree, gamma, validation): self.dataset = dataset self.target = target self.kernel = kernel self.validation = validation self.nu = nu self.degree = degree self.gamma = gamma def trainingMethod(self, kindDataSet): self.model = NuSVC(kernel=self.kernel, degree=self.degree, gamma=self.gamma, nu=self.nu, probability=True) self.NuSVMAlgorithm = self.model.fit(self.dataset, self.target) params = "kernel:%s-degree:%f-gamma:%f-nu:%f-probability:True" % ( self.kernel, self.degree, self.gamma, self.nu) self.performanceData = responseTraining.responseTraining( self.NuSVMAlgorithm, 'NuSVM', params, self.validation) if kindDataSet == 1: self.performanceData.estimatedMetricsPerformance( self.dataset, self.target) else: self.performanceData.estimatedMetricsPerformanceMultilabels( self.dataset, self.target)
def fd_svm_time(train, test, ytrain, ytest, seq): for i in range(len(train) - seq + 1): for j in range(1, seq): train[i] = train[i] + train[i + j] train = train[:-seq + 1] train = np.array(train).astype('float64') train_y = np.array(ytrain[seq - 1:]).astype('float64') for i in range(len(test) - seq + 1): for j in range(1, seq): test[i] = test[i] + test[i + j] test = test[:-seq + 1] test = np.array(test).astype('float64') test_y = np.array(ytest[seq - 1:]).astype('float64') clf = NuSVC() clf.fit(train, train_y) return clf.score(test, test_y)
def NuSVCMethod(trainData, testData, trainLabel, testLabel): info = {'name': 'NuSVCMethod', 'accuracy': 0, 'time': 0, 'remark': ''} startTime = time.time() from sklearn.svm import NuSVC clf = NuSVC() clf.fit(trainData, trainLabel) labelPred = clf.predict(testData) testAccuracy = accuracy_score(testLabel, labelPred) # print("SVM Test Accuracy: %.2f%%" % (testAccuracy * 100.0)) info['time'] = time.time() - startTime info['accuracy'] = testAccuracy return info
def nu_support_vector_machines(corpus, documents_training, documents_test, words_features, kernel, nu): """ Another implementation of Support Vector Machines algorithm. :param corpus: :param documents_training: :param documents_test: :param words_features: :param kernel: :param nu: :return: """ print print "----- nu-Support Vector Machines algorithm ------" print "Creating Training Vectors..." categories = util_classify.get_categories(corpus) array_vector_training = [] array_categories = [] for (id, original_category, annotations) in documents_training: array_vector_training.append(util_classify.transform_document_in_vector(annotations, words_features, corpus)) array_categories.append(util_classify.get_categories(corpus).index(original_category)) print "Training the algorithm..." classifier = NuSVC(nu=nu, kernel=kernel) X_train_features = [] y_train_categories = [] # Train all for (id, original_category, annotations) in documents_training: X_train_features.append(util_classify.transform_document_in_vector(annotations, words_features, corpus)) y_train_categories.append(original_category) classifier.fit(np.array(X_train_features), np.array(y_train_categories)) print "Calculating metrics..." estimated_categories = [] original_categories = [] for (id, cat_original, annotations) in documents_test: cat_estimated = classifier.predict(np.array((util_classify.transform_document_in_vector(annotations, words_features, corpus)))) estimated_categories.append(categories.index(cat_estimated)) original_categories.append(categories.index(cat_original)) return original_categories, estimated_categories
def predict(self): """ trains the scikit-learn python machine learning algorithm library function https://scikit-learn.org then passes the trained algorithm the features set and returns the predicted y test values form, the function then compares the y_test values from scikit-learn predicted to y_test values passed in then returns the accuracy """ algorithm = NuSVC() algorithm.fit(self.X_train, self.y_train) y_pred = list(algorithm.predict(self.X_test)) self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test) return self.acc
def svm_models(x_train, y_train): from sklearn.svm import SVC classifier1 = SVC(kernel='rbf', random_state=0) classifier1.fit(x_train, y_train) from sklearn.svm import NuSVC classifier2 = NuSVC(kernel='rbf', random_state=0) classifier2.fit(x_train, y_train) from sklearn.svm import LinearSVC classifier3 = LinearSVC(dual=False) classifier3.fit(x_train, y_train) print('SVC training accuracy: ', classifier1.score(x_train, y_train)) print('NuSVC training accuracy: ', classifier2.score(x_train, y_train)) print('LinearSVC training accuracy: ', classifier3.score(x_train, y_train)) return classifier1, classifier2, classifier3
def _train(self, X_matrix, y, **kwargs): """训练数据 Parameters: X_matrix (numpy.array): - 由训练数据组成的特征矩阵 y (numpy.array): - 特征数据对应的标签向量 Returns: sklearn.model: - sklearn的模型 """ from sklearn.svm import NuSVC model = NuSVC(**kwargs) model.fit(X_matrix, y) return model
def prediction(parent_text, child_text, type): pair = parent_text + " STOP " + child_text df_x, df_y, model = tfidf([pair], []) if type is 'NuSMV': clf = NuSVC() elif type is 'LinearSMV': clf = LinearSVC() else: clf = DecisionTreeClassifier() clf.fit(model[1:], df_y) prediction = clf.predict(model[0]).tolist() if (prediction[0] is 0): return "Attack" else: return "Support"
class Classifier: def __init__(self, objective_data, subjective_data): OBJECTIVE = 0 SUBJECTIVE = 1 self.objective_data = objective_data self.subjective_data = subjective_data self.text = objective_data + subjective_data self.labels = [OBJECTIVE for i in objective_data] + [SUBJECTIVE for i in subjective_data] tuple_list = zip(self.text, self.labels) random.shuffle(tuple_list) self.text = [x for x,y in tuple_list] self.label = [y for x,y in tuple_list] self.count_vectorizer = CountVectorizer(stop_words="english", min_df=3) # count vectorizer and specific classifier that will be used self.counts = self.count_vectorizer.fit_transform(self.text) self.classifier = None self.tf_transformer = TfidfTransformer(use_idf=True) self.frequencies = self.tf_transformer.fit_transform(self.counts) def multinomialNB(self): self.classifier = MultinomialNB(alpha=.001) self.classifier.fit(self.frequencies, self.labels) def predict(self, examples): example_counts = self.count_vectorizer.transform(examples) example_tf = self.tf_transformer.transform(example_counts) predictions = self.classifier.predict(example_tf) return predictions def linearSVC(self): self.classifier = LinearSVC() self.classifier.fit(self.frequencies, self.labels) def nuSVC(self): self.classifier = NuSVC() self.classifier.fit(self.frequencies, self.labels) def accurracy(self, text, labels): prediction = self.predict(text) accurracy = 0 for i in range(len(prediction)): if prediction[i] == labels[i]: accurracy += 1 return accurracy / float(len(prediction)) def f1(self, text, actual): prediction = self.predict(text) return f1_score(actual, prediction)
def test_nusvc(): # print '==== NuSVC ====' # print 'Training...' clf = NuSVC() clf = clf.fit( train_data, train_labels ) # print 'Predicting...' output = clf.predict(test_data).astype(int) predictions_file = open("CLF.csv", "wb") open_file_object = csv.writer(predictions_file) open_file_object.writerow(["PassengerId","Survived"]) open_file_object.writerows(zip(test_id, output)) predictions_file.close() # print 'Done.' print 'NuSVC : '
def svmClassifier(): for deg in xrange(1,200): print deg print "RBF Nu-SVC" clf = NuSVC(gamma=deg) clf.fit(trainData, trainLabel) print(clf.score(testData,testLabel)) print "LINEAR Nu-SVC" clf = NuSVC(kernel="linear") clf.fit(trainData, trainLabel) print(clf.score(testData,testLabel)) print "POLYNOMIAL Nu-SVC" clf = NuSVC(kernel="poly",gamma=deg) clf.fit(trainData, trainLabel) print(clf.score(testData,testLabel)) print "SIGMOID Nu-SVC" clf = NuSVC(kernel="sigmoid",gamma=deg) clf.fit(trainData, trainLabel) print(clf.score(testData,testLabel))
def runClassifier(classifier, trainData,trainLabel, testData, testLabel, bestParameters): if classifier[0] == 'KNN': neighTest = KNeighborsClassifier(n_neighbors=int(bestParameters['KNN'][0]), algorithm='auto', p=2,weights=bestParameters['KNN'][1]) neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) return scoreTest - classifier[1] elif classifier[0] == 'Random Forests': neighTest = RandomForestClassifier(n_estimators = int(bestParameters['Random Forests'][0]),criterion=bestParameters['Random Forests'][1]) neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) return scoreTest - classifier[1] elif classifier[0] == 'Linear Nu-SVC': clf = NuSVC(kernel="linear") clf.fit(trainData, trainLabel) scoreTest = clf.score(testData, testLabel) return scoreTest - classifier[1] elif classifier[0] == 'RBF Nu-SVC': clfFinal = NuSVC(gamma = bestParameters['RBF Nu-SVC']) clfFinal.fit(trainData,trainLabel) score = clfFinal.score(testData,testLabel) return score - classifier[1] elif classifier[0] == 'Gradient Boosting': neighTest = GradientBoostingClassifier(n_estimators = int(bestParameters['Gradient Boosting'][0]),loss='deviance') neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) return scoreTest - classifier[1] elif classifier[0] == 'Multinomial Naive Bayes': clfTest = MultinomialNB(alpha = bestParameters['Multinomial Naive Bayes'], fit_prior=True) clfTest.fit(trainData, trainLabel) scoreTest = clfTest.score(testData, testLabel) return scoreTest - classifier[1] elif classifier[0] == 'Decision (IG)': clf = tree.DecisionTreeClassifier(criterion='entropy') clf.fit(trainData, trainLabel) scoreTest = clf.score(testData, testLabel) return scoreTest - classifier[1]
auc_score(ACTION[27000:], pred[:, 1]) lr.fit(model_mat_train[:, np.where(rfe.support_)[0]], ACTION) pred = lr.predict_proba(model_mat_test[:, np.where(rfe.support_)[0]]) pd.DataFrame({"Id": test_data.index, "Action": pred[:, 1]}).to_csv( "../lr2_submission.csv", header=True, index=False ) ## svms svc = SVC(C=1.0, kernel="rbf", probability=True, class_weight="auto", verbose=2) svc.fit(model_mat_train[:27000, np.where(rfe.support_)[0]], ACTION[:27000]) pred = svc.predict_proba(model_mat_train[27000:, np.where(rfe.support_)[0]]) auc_score(ACTION[27000:], pred[:, 1]) nusvc = NuSVC(nu=0.11, kernel="rbf", degree=3, probability=True, cache_size=1024, verbose=2) nusvc.fit(model_mat_train[:27000, np.where(rfe.support_)[0]], ACTION[:27000]) svc_pred = nusvc.predict_proba(model_mat_train[27000:, np.where(rfe.support_)[0]]) auc_score(ACTION[27000:], svc_pred[:, 1]) nusvc = NuSVC(nu=0.11, kernel="rbf", degree=3, probability=True, cache_size=1024, verbose=2) nusvc.fit(model_mat_train[:27000], ACTION[:27000]) svc_pred = nusvc.predict_proba(model_mat_train[27000:]) auc_score(ACTION[27000:], svc_pred[:, 1]) nusvc.fit(model_mat_train[:, np.where(rfe.support_)[0]], ACTION) svc_pred = nusvc.predict_proba(model_mat_test[:, np.where(rfe.support_)[0]]) pd.DataFrame({"Id": test_data.index, "Action": svc_pred[:, 1]}).to_csv( "../nusvc_submission.csv", header=True, index=False ) ## random forest
def test_full(): clf = NuSVC() clf.fit(train, trainLabels) print check_fit(clf.predict(train), trainLabels)
#posterior[m] = knc.predict_proba(X_test) print "Error-Correcting Output Code: ", np.mean(accuracy)/0.72, np.std(accuracy)/0.72 print k for i in range(0,6): for j in range(0,6): print '{:5.2f} '.format(box[i,j]/100.0), print #end GBC(n_estimators=60, max_depth=3) nusvc = NuSVC(nu=0.66,degree=1) n=0 box = np.zeros([6,6]) y_pred = nusvc.fit(X_train, y_train).predict(X_test) for i in range(0,len(y_pred)): if y_pred[i] == y_test[i]: n = n+1 box[y_test[i]-1,y_pred[i]-1] = box[y_test[i]-1,y_pred[i]-1] + 1 print "NuSVC, nu=0.66, degree=1: ",n/0.72 for i in range(0,6): for j in range(0,6): print '{:5.0f} '.format(box[i,j]), print n=0 nusvc = NuSVC(nu=0.22,degree=1) box = np.zeros([6,6]) ovr = multiclass.OneVsRestClassifier(nusvc) y_pred = ovr.fit(X_train, y_train).predict(X_test)
def handle_data(context, data): prices = history(bar_count = context.historical_bars, frequency='1d', field='price') for stock in context.stocks: try: # create moving averages for 50 and 200 days to filter the results that we want # to get out of the nueral network. ma1 = data[stock].mavg(50) ma2 = data[stock].mavg(200) start_bar = context.feature_window price_list = prices[stock].tolist() X = [] y = [] bar = start_bar # feature creation # this is where I build out the Neural Network that # learns from the history of the stocks. while bar < len(price_list)-1: try: end_price = price_list[bar+1] begin_price = price_list[bar] pricing_list = [] xx = 0 for _ in range(context.feature_window): price = price_list[bar-(context.feature_window-xx)] pricing_list.append(price) xx += 1 features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1) # print(features) if end_price > begin_price: label = 1 else: label = -1 bar += 1 X.append(features) y.append(label) except Exception as e: bar += 1 print(('feature creation',str(e))) clf1 = RandomForestClassifier() clf2 = LinearSVC() clf3 = NuSVC() clf4 = LogisticRegression() last_prices = price_list[-context.feature_window:] current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1) X.append(current_features) X = preprocessing.scale(X) current_features = X[-1] X = X[:-1] clf1.fit(X,y) clf2.fit(X,y) clf3.fit(X,y) clf4.fit(X,y) p1 = clf1.predict(current_features)[0] p2 = clf2.predict(current_features)[0] p3 = clf3.predict(current_features)[0] p4 = clf4.predict(current_features)[0] if Counter([p1,p2,p3,p4]).most_common(1)[0][1] >= 4: p = Counter([p1,p2,p3,p4]).most_common(1)[0][0] else: p = 0 print(('Prediction',p)) if p == 1 and ma1 > ma2: order_target_percent(stock,0.33) elif p == -1 and ma1 < ma2: order_target_percent(stock,-0.33) except Exception as e: print(str(e)) record('ma1',ma1) record('ma2',ma2) record('Leverage',context.account.leverage)
def linearNuSVC(): clf = NuSVC(kernel="linear") clf.fit(trainData, trainLabel) guideToGraph['Linear Nu-SVC'] = clf.score(validationData, validationLabel)
accuracy = np.zeros((subjects,)) cm = [None] * subjects for subject in range(subjects): # Concatenate the subjects' data for training into one matrix train_subjects = list(range(subjects)) train_subjects.remove(subject) TRs = image_data_shared[0].shape[1] train_data = np.zeros((image_data_shared[0].shape[0], len(train_labels))) for train_subject in range(len(train_subjects)): start_index = train_subject*TRs end_index = start_index+TRs train_data[:, start_index:end_index] = image_data_shared[train_subjects[train_subject]] # Train a Nu-SVM classifier using scikit learn classifier = NuSVC(nu=0.5, kernel='linear') classifier = classifier.fit(train_data.T, train_labels) # Predict on the test data predicted_labels = classifier.predict(image_data_shared[subject].T) accuracy[subject] = sum(predicted_labels == test_labels)/float(len(predicted_labels)) # Create a confusion matrix to see the accuracy of each class cm[subject] = confusion_matrix(test_labels, predicted_labels) # Normalize the confusion matrix cm[subject] = cm[subject].astype('float') / cm[subject].sum(axis=1)[:, np.newaxis] # Plot and print the results plot_confusion_matrix(cm, title="Confusion matrices for different test subjects with Probabilistic SRM") print("SRM: The average accuracy among all subjects is {0:f} +/- {1:f}".format(np.mean(accuracy), np.std(accuracy)))
print "Cross-domain error for kitchen-DVD (trained PCA kitchen data to predict test PCA DVD data)", kitchenSVC.score( DVD_test_matrix.todense(), test_label ) print "Cross-domain error for kitchen-electronics (trained PCA kitchen data to predict test PCA electroincs data)", kitchenSVC.score( electronics_test_matrix.todense(), test_label ) print "In-domain error for kitchen-kitchen (trained PCA kitchen data to predict test PCA kitchen data)", kitchenSVC.score( kitchen_test_matrix.todense(), test_label ) print "----PCA + NON-linear SVM" # NuSVC results book->all book_clf = NuSVC() book_clf.fit(book_train_new_fit, train_label) print "In-domain error for book-book (trained PCA book data to predict test PCA book data)", book_clf.score( book_test_new_fit, test_label ) print "Cross-domain error for book-DVD (trained PCA book data to predict test PCA DVD data)", book_clf.score( DVD_test_new_fit, test_label ) print "Cross-domain error for book-electronics (trained PCA book data to predict test PCA electroincs data)", book_clf.score( electronics_test_new_fit, test_label ) print "Cross-domain error for book-kitchen (trained PCA book data to predict test PCA kitchen data)", book_clf.score( kitchen_test_new_fit, test_label ) # NuSVC results DVD->all DVD_clf = NuSVC()
class Learner: #@input recurrence: Dimensionality of the feature-space # with dimension corresponding to the last n returns # this is a positive integer #@input realy_recurrent: default=False # if true: the last decision is also a dimension in the # feature space #@input label_par paramter used for labeling 'r' for returns # 'p' for prices def __init__(self, recurrence=30, w_size=20,hybrid = False): self.learner = NuSVC() #size of each training batch self.batch_size = w_size * (recurrence) #size of the sliding window for sharpé ratio self.window_size = 5 * self.batch_size #true if part of a hybrid learner self.hybrid = hybrid # the data matrix of a single batch # Data-Vector = r_1, ... r_n # with r_n := r_n - r_n-1 self.returns = list() #training data for experimental apporach self.train_dat = list() self.labels = list() self.decisions = list() self.recurrence = recurrence self.last_decision = 0 self.ready = False self.tstep = 0 self.prices = list() return def predict(self,new_price,old_price,tstep = 0): #default decision value decision = 0 #Add prices to sliding window self.prices.append(new_price) if(len(self.prices) > self.window_size): self.prices.pop(0) latest_return = new_price - old_price #add next label if(self.tstep > self.recurrence): self.labels.append(self.label_returns(latest_return)) #increment timer self.tstep += 1 #add latest return to history self.returns.append(latest_return) if(self.tstep > self.window_size): if(len(self.returns) > self.window_size): self.returns.pop(0) #if batch is full, start training if(self.tstep%self.batch_size == 0 and self.tstep != 0): self.train() #disabled this, normally for predicting prices, but performance is #worse, so this is actually dead code #setup x-vector if(self.tstep > self.recurrence): x = self.returns[len(self.returns)-self.recurrence-1:len(self.returns)-1] #set up training matrix x = np.array(x) x = x.reshape((len(x),1)) self.train_dat.append(x) x = np.transpose(x) #create decision only if svm is trained if(self.ready): decision = np.tanh(self.learner.decision_function(x)) decision = decision[0] #if the system is truly recurrent (uses the last decision input-vecotr) #append the decision self.last_decision = decision return decision #calls partial_fit() on the svm to adjust it's internal model def train(self): #setup training matrix train_dat = np.zeros((len(self.labels),self.recurrence)) for i in range(len(train_dat)): train_dat[i][:] = np.transpose(self.train_dat[i]) #np.transpose(train_dat) self.learner.fit(train_dat, self.labels) #clear the training-related strzctures self.labels = list() self.train_dat = list() self.ready = True return #calls partial_fit() on the svm to adjust it's internal model #labeling function using the complete vector #very simple, since it only detects trends depending on the mu def label_set(self,return_list): mu_current = np.mean(return_list) mu_total = np.mean(self.returns) if(mu_current >= mu_total): return 1 else: return -1 def label_returns(self,next_return): if next_return > 0: return 1 else: return -1
def fit_nusvc(X_train, y_train, nu, kernel, gamma=0.1, degree=4, coef0=1): print "Training, nu = ", nu start = time.time() clf = NuSVC(nu=nu, kernel=kernel, degree=degree, coef0=coef0) clf.fit(X_train, y_train) return clf, time.time() - start
X_test = sample2 y_test = labels[272:,i] else: X_train = training y_train = labels[:172,i] X_test = sampletest y_test = labels[172:,i] #best case: 67, 1 posterior = np.empty([100,72,6]) for j in range(1,67): for k in range(1,2): box = np.zeros([6,6]) accuracy = np.zeros(72) for m in range(0,10): nsvc = NuSVC(nu=j/100.0, degree=k) nsvc.fit(X_train, y_train) y_pred = nsvc.predict(X_test) n=0 for i in range(0,len(y_pred)): if y_pred[i] == y_test[i]: #print i, y_pred[i], y_test[i] n = n+1 accuracy[i] = accuracy[i]+1 box[y_test[i]-1,y_pred[i]-1] = box[y_test[i]-1,y_pred[i]-1] + 1 #posterior[m] = knc.predict_proba(X_test) #print j, k, np.mean(accuracy)/0.72, np.std(accuracy)/0.72 print j, k, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0 ''' means = np.empty([72,6]) stds = np.empty([72,6])
degree=4) svc_new.fit(train_x_reduced, train_y_practice) print svc_new.score(test_x_reduced, test_y_practice) """ """ parameters = {'degree':(1, 3, 6)} svclass = NuSVC(kernel='poly', probability=True, gamma=0, nu=.5852, tol=.00001) clf = GridSearchCV(svclass, parameters, cv=10) clf.fit(train_x_reduced, train_y_practice) print "SVC" print clf.best_estimator_ print clf.best_score_ print clf.best_params_ """ svc_new = NuSVC(kernel='poly', probability=True, gamma=0, nu=.5852, tol=.00001) svc_new.fit(train_x_reduced, train_y_practice) print svc_new.score(test_x_reduced, test_y_practice) print 'Predicting' estimator = SelectKBest(score_func=f_classif, k=components) estimator.fit(train_x, train_y_leaderboard) train_x_reduced = estimator.transform(train_x) test_x_reduced = estimator.transform(test_x) print train_x.shape print train_x_reduced.shape #svc_new = SVC(probability=True, C=.000001, kernel='poly', gamma=4, # degree=4) svc_new = NuSVC(kernel='poly', probability=True, gamma=0, nu=.5852, tol=.00001) svc_new.fit(train_x_reduced, train_y_leaderboard)
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in spamreader: #print len(row) if len(row) == 11 and "?" not in row : x.append(row[1:10]) y.append(int(row[10])) for i in x: for j in i: temp.append(int(j)) z.append(temp) temp = [] ######################################################################## #NuSVM classifier from sklearn.svm import NuSVC clf = NuSVC() clf.fit(z[1:200], y[1:200]) valid = clf.predict(z[201:698]) for i in valid: if i != y[count+201]: mis+=1 count+=1 print("NuSVM misclassification rate is") print(float(float(mis)/498) * 100) ######################################################################### #Random Forest from sklearn.ensemble import RandomForestClassifier mis = 0 count=0 clf1 = RandomForestClassifier(n_estimators=10) clf1.fit(z[1:200], y[1:200]) RandomForestClassifier(n_estimators=10, max_depth=None,
Ytrain[500:750] = 3 Yval = np.ones((valshape[0])) Yval_predict = np.ones((valshape[0])) Yval[0:150] = 1 Yval[150:300] = 2 Yval[300:450] = 3 Ytest = np.ones((testshape[0])) Ytest_predict = np.ones((testshape[0])) Ytest[0:100] = 1 Ytest[100:200] = 2 Ytest[200:300] = 3 clf = NuSVC() clf.fit(Xtrain, Ytrain) NuSVC(cache_size=2000, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma='auto', kernel='polynomial', max_iter=-1, nu=0.5, probability=False, random_state=None, shrinking=True, tol=0.00001, verbose=False) for i in range(trainshape[0]): Ytrain_predict[i] = clf.predict([Xtrain[i, :]]) for i in range(valshape[0]): Yval_predict[i] = clf.predict([Xval[i, :]]) for i in range(testshape[0]): Ytest_predict[i] = clf.predict([Xtest[i, :]]) Y = np.concatenate((Ytrain, Yval, Ytest))
def final_run(): clf = NuSVC() clf.fit(train, trainLabels) predict_labels = clf.predict(test) write_out(predict_labels)