def test_svc_invalid_break_ties_param(SVCClass): X, y = make_blobs(random_state=42) svm = SVCClass(kernel="linear", decision_function_shape='ovo', break_ties=True, random_state=42).fit(X, y) with pytest.raises(ValueError, match="break_ties must be False"): svm.predict(y)
def testLinear(self): ## 加载数据 dataArr, labelArr = self.loadDataSet('data/dataset2svm/testSet.txt') svm = SVMLib() ## 训练一个线性分类器 ws, b = svm.fit(dataArr, labelArr, 0.6, 0.001, 40) print ws dataMat = mat(dataArr) ## 前半部分计算值为分类结果,后面为实际结果 ## SVM分类器是个二元分类器,其结果为-1或1 ## 因此训练时,训练集的值也为-1或1 print '-----------------' print svm.predict(dataMat[0], ws, b), labelArr[0]
def testMultiLinear(self): ## 加载数据 dataArr, labelArr = self.loadMultiDataSet('data/dataset2svm/horseColicTest.txt') svm = SVMLib() ## 训练一个线性分类器 ws, b = svm.fit(dataArr, labelArr, 0.6, 0.001, 40) print ws dataMat = mat(dataArr) ## 前半部分计算值为分类结果,后面为实际结果 ## SVM分类器是个二元分类器,其结果为-1或1 ## 因此训练时,训练集的值也为-1或1 print '-----------------' ## 根据SVM判断第4个数据的分类,大于0为1,小于0为-1 print svm.predict(dataMat[3], ws, b), labelArr[3]
def leave_one_out_cv(gram_matrix, labels, alg = 'SVM'): """ leave-one-out cross-validation """ scores = [] preds = [] loo = sklearn.cross_validation.LeaveOneOut(len(labels)) for train_index, test_index in loo: X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index] y_train, y_test = labels[train_index], labels[test_index] if(alg == 'SVM'): svm = sklearn.svm.SVC(kernel = 'precomputed') svm.fit(X_train, y_train) preds += svm.predict(X_test).tolist() score = svm.score(X_test, y_test) elif(alg == 'kNN'): knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) preds += knn.predict(X_test).tolist() score = knn.score(X_test, y_test) scores.append(score) print "Mean accuracy: %f" %(np.mean(scores)) print "Stdv: %f" %(np.std(scores)) return preds, scores
def k_fold_cv(gram_matrix, labels, folds = 10, alg = 'SVM', shuffle = True): """ K-fold cross-validation """ pdb.set_trace() scores = [] preds = [] loo = sklearn.cross_validation.KFold(len(labels), folds, shuffle = shuffle, random_state = random.randint(0,100)) #loo = sklearn.cross_validation.LeaveOneOut(len(labels)) for train_index, test_index in loo: X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index] y_train, y_test = labels[train_index], labels[test_index] if(alg == 'SVM'): svm = sklearn.svm.SVC(kernel = 'precomputed') svm.fit(X_train, y_train) preds += svm.predict(X_test).tolist() score = svm.score(X_test, y_test) elif(alg == 'kNN'): knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) preds += knn.predict(X_test).tolist() score = knn.score(X_test, y_test) scores.append(score) print "Mean accuracy: %f" %(np.mean(scores)) print "Stdv: %f" %(np.std(scores)) return preds, scores
def run_model(train_data, train_labels, test_data, test_labels): ''' Algorithm which will take in a set of training text and labels to train a bag of words model This model is then used with a logistic regression algorithm to predict the labels for a second set of text Method modified from code available at: https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-1-for-beginners-bag-of-words Args: train_data_text: Text training set. Needs to be iterable train_labels: Training set labels test_data_text: The text to Returns: pred_labels: The predicted labels as determined by logistic regression ''' #use Logistic Regression to train a model svm = SVC() # we create an instance of Neighbours Classifier and fit the data. svm.fit(train_data, train_labels) #Now that we have something trained we can check if it is accurate with the test set pred_labels = svm.predict(test_data) perform_results = performance_metrics.get_perform_metrics(test_labels, pred_labels) #Perform_results is a dictionary, so we should add other pertinent information to the run perform_results['vector'] = 'Bag_of_Words' perform_results['alg'] = 'Support_Vector_Machine' return pred_labels, perform_results
def svm_iterkernel(train_data, train_labels, test_data, test_labels, op_name_dir): label_set=np.unique(train_labels) if op_name_dir != ('None' or 'none'): fo=open(op_name_dir,'a') predict_list={} for kernel in ['linear']: #, 'poly', 'rbf']: t0=time.time() svm = SVC(C=1., kernel=kernel, cache_size=10240) svm.fit(train_data, train_labels) prediction=svm.predict(test_data) predict_list[kernel]=prediction pred_acc_tot =(float(np.sum(prediction == test_labels)))/len(test_labels) print time.time() - t0, ',kernel = '+kernel, ',pred acc = '+str(round(pred_acc_tot*100)) if op_name_dir != ('None' or 'none'): fo.write('time='+str(time.time() - t0)+'sec,kernel='+kernel+',pred acc='+str(round(pred_acc_tot*100))+'\n') for lab_unq in label_set: pred_acc=(prediction == lab_unq) & (test_labels == lab_unq) pred_acc=float(pred_acc.sum())/(len(test_labels[test_labels == lab_unq])) print 'pred_'+str(lab_unq)+','+str(round(pred_acc*100)) if op_name_dir != ('None' or 'none'): fo.write('pred_'+str(lab_unq)+','+str(round(pred_acc*100))+'\n') if op_name_dir != ('None' or 'none'): fo.close() return predict_list
def get_error(svm, X, y): err = 0 N = y.shape[0] for i in range(N): if y[i] != svm.predict(X[i])[0]: err += 1 return err*1. / N
def plotSVM(svm,n,title): plt.subplot(2,2,n) Z = svm.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8) plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) plt.title(title)
def test_accuracy(svm,x,y): """determines the accuracy of a svm classifier on validation set""" hypothesis = svm.predict(x) flat_y = y.ravel() misclassification_count = 0 for i in xrange(len(flat_y)): if not( hypothesis[i] == flat_y[i] ): misclassification_count += 1 return misclassification_count
def increment_svm(svm, L_ids, baseline_accuracy): L = X[L_ids] y_l = y[L_ids] U_ids = np.array(list((set(instance_ids) - set(L_ids)))) U = X[U_ids] y_u = y[U_ids] ordered_indices = np.argsort(svm.decision_function(U)) smallest_indices = ordered_indices[:500] smallest_ids = U_ids[smallest_indices] largest_indices = ordered_indices[-500:] largest_ids = U_ids[largest_indices] high_confidence_unlabeled = scipy.sparse.vstack([U[smallest_indices], U[largest_indices]]) high_confidence_ids = np.concatenate([smallest_ids, largest_ids]) high_confidence_predicted_labels = svm.predict(high_confidence_unlabeled) high_confidence_true_labels = y[high_confidence_ids] splits = sklearn.cross_validation.StratifiedShuffleSplit(high_confidence_predicted_labels, n_iter=2, test_size=0.9) saved_L_primes = [] saved_L_prime_ids = [] saved_cv_accuracies = [] for augment_indices, test_indices in splits: augment = high_confidence_unlabeled[augment_indices] test = high_confidence_unlabeled[test_indices] augment_ids = high_confidence_ids[augment_indices] test_ids = high_confidence_ids[test_indices] augment_labels = high_confidence_predicted_labels[augment_indices] test_labels = high_confidence_predicted_labels[test_indices] L_prime = scipy.sparse.vstack([L, augment]) y_l_prime = np.concatenate([y_l, augment_labels]) L_prime_ids = np.concatenate([L_ids, augment_ids]) saved_L_primes.append(L_prime) saved_L_prime_ids.append(L_prime_ids) svm_prime = sklearn.svm.LinearSVC(penalty='l2', C=10, dual=False) accuracy = sklearn.cross_validation.cross_val_score(svm_prime, L_prime, y_l_prime, cv=5, n_jobs=7).mean() saved_cv_accuracies.append(accuracy) best_index = np.argmax(saved_cv_accuracies) best_L_prime_ids = saved_L_prime_ids[best_index] best_accuracy = saved_cv_accuracies[best_index] return best_L_prime_ids, best_accuracy
def predict_embedded_attributes_labels(data_mat, svms): """ Calculate class label predictions for each feature vector (=row) in data_mat. @return: Matrix with each column containing class labels for one feature vector. """ num_attributes = len(svms) num_examples = data_mat.shape[0] A = np.zeros(shape=(num_attributes, num_examples)) log.d("Classifying {} examples...".format(num_examples)) for att_idx, svm in enumerate(svms): log.update_progress(att_idx + 1, num_attributes) if svm is not None: if sklearn.__version__ == '0.14.1': A[att_idx] = svm.predict(data_mat) else: # the return format of this function was changed in 0.15... A[att_idx] = svm.predict(data_mat).T print("") return A
def test_svc_ovr_tie_breaking(SVCClass): """Test if predict breaks ties in OVR mode. Related issue: https://github.com/scikit-learn/scikit-learn/issues/8277 """ X, y = make_blobs(random_state=27) xs = np.linspace(X[:, 0].min(), X[:, 0].max(), 1000) ys = np.linspace(X[:, 1].min(), X[:, 1].max(), 1000) xx, yy = np.meshgrid(xs, ys) svm = SVCClass(kernel="linear", decision_function_shape='ovr', break_ties=False, random_state=42).fit(X, y) pred = svm.predict(np.c_[xx.ravel(), yy.ravel()]) dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()]) assert not np.all(pred == np.argmax(dv, axis=1)) svm = SVCClass(kernel="linear", decision_function_shape='ovr', break_ties=True, random_state=42).fit(X, y) pred = svm.predict(np.c_[xx.ravel(), yy.ravel()]) dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()]) assert np.all(pred == np.argmax(dv, axis=1))
def hw1q18(): print "----------------------------------------" print " Homework 1 Question 18 " print "----------------------------------------" Y_train_0 = (Y_train == 0).astype(int) Y_test_0 = (Y_test == 0).astype(int) print "in the training set:" print "n(+) =", np.count_nonzero(Y_train_0 == 1), "n(-) =", np.count_nonzero(Y_train_0 == 0) print "in the test set:" print "n(+) =", np.count_nonzero(Y_test_0 == 1), "n(-) =", np.count_nonzero(Y_test_0 == 0) for C in (0.001, 0.01, 0.1, 1, 10): svm = sklearn.svm.SVC(C=C, kernel="rbf", gamma=100, tol=1e-7, shrinking=True, verbose=False) svm.fit(X_train, Y_train_0) print "----------------------------------------" print "C =", C support = svm.support_ coef = svm.dual_coef_[0] b = svm.intercept_[0] print "nSV =", len(support) Y_predict = svm.predict(X_test) print "in the prediction:" print "n(+) =", np.count_nonzero(Y_predict == 1), "n(-) =", np.count_nonzero(Y_predict == 0) print "E_out =", np.count_nonzero(Y_test_0 != Y_predict) print fig = plt.figure() plt.suptitle("C =" + str(C)) plt.subplot(311) plt.title("Training data: green +, red -") plot_01(X_train, Y_train_0) plt.tick_params(axis="x", labelbottom="off") plt.subplot(312) plt.title("Prediction on test data: green +, red -") plot_01(X_test, Y_predict) plt.tick_params(axis="x", labelbottom="off") plt.subplot(313) plt.title("Support vectors: blue") plt.plot(X_train[:, 0], X_train[:, 1], "r.") plt.plot(X_train[support, 0], X_train[support, 1], "b.") plt.show()
def testSVM(svm,zero,one): numcorrect = 0 numwrong = 0 for correct,testing in ((0,zero),(1,one)): for d in testing: import pdb;pdb.set_trace() r = svm.predict(d)[0] if(r==correct): numcorrect += 1 else: numwrong += 1 print "Correct",numcorrect print "Wrong",numwrong
def runSVM(self): """ Runs the SVM on 5 different splits of cross validation data """ for train, test in self.kf: svm = self.models["SVM"] train_set, train_labels = self.getCurrFoldTrainData(train) test_set, test_labels = self.getCurrFoldTestData(test) svm.fit(train_set, train_labels) preds = svm.predict(test_set) acc = self.getAccuracy(test_labels, preds) print "(SVM) Percent correct is", acc
def test_svm(svm, testing_dict, name): num_correct = 0 num_wrong = 0 for correct, testing in testing_dict.items(): for test in testing: r = svm.predict(test)[0] if r == correct: num_correct += 1 else: num_wrong += 1 print("\n{1} - Correct:{0}".format(num_correct, name), end="") print("\n{1} - Wrong:{0}".format(num_wrong, name), end="") accuracy = float(num_correct)/(num_correct+num_wrong)*100 print("\n{1} - Accuracy:{0:.2f}%".format(round(accuracy,2), name), end="")
def plotSVM(svm, n, title): X = np.array(training_0[plot_num:] + training_1[plot_num:] + training_2[plot_num:]) colors = np.array(["g" for i in training_2d_0][plot_num:] + ["r" for i in training_2d_1][plot_num:] + ["b" for i in training_2d_2][plot_num:]) plt.subplot(2, 2, n) Z = svm.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap = plt.cm.Paired, alpha = 0.8) plt.scatter(X[:, 0], X[:, 1], c = colors, cmap = plt.cm.Paired) plt.title(title)
def repare(): conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='', db='small_rekomendacyjny') cur = conn.cursor() cur.execute("SELECT * FROM help WHERE checked is NULL AND correct = 0") svm = Helper.train_svm() for row in cur: print(row) vector = [Helper.prepare_vector(mov_id=row[1], us_id=row[0], rat=float(row[2]))] repared = svm.predict(vector) repared = float(repared[0]) new_curr = conn.cursor() print('stara: ' + str(float(row[2])) + ' poprawiona: ' + str(repared)) new_curr.execute( "UPDATE help SET rating=%s, checked = 1, correct = 2, where user_id = %s AND movie_id = %s", [repared, row[0], row[1]]) conn.commit()
def hog_svm_logo_cls_rm(): # tlogos, imgs = load_delogo_dataset("tests", (373, 54)) # 从图片dir加载数据 tlogos, imgs = load_delogo_dataset("cls_rm_tests", (373, 54)) # 从图片dir加载数据 hog = get_hog() svm = joblib.load("save/logo_svm_cls.pkl") pngp = "ilogor.png" for tlogo, img in zip(tlogos, imgs): hogs = hog_extractor(hog, tlogo) # hog 特征提取 cls = svm.predict([hogs]) # svm 分类器 print("svm 分类:", cls) if cls[0] == 1: print("识别到logo水印,消去水印...") logo_clean(img, pngp, savp="xout") # 去核心水印 else: Image.open(img).show() print("没有logo水印,pass") print("over!")
def trainTest(): data2010, labels2010 = read_tac('2010') data2011, labels2011 = read_tac("2011") #classifiers gnb = naive_bayes.GaussianNB() svm = svm.SVC(kernel = "linear") logReg = linear_model.LogisticRegression() gnb.fit(data2010, labels2010) svm.fit(data2010, labels2010) logReg.fit(data2010, labels2010) gnbPrediction = gnb.predict(data2011) svmPrediction = svm.predict(data2011) logRegPrediction = logReg.predict(data2011) gnbAccuracy = accuracy(labels2011, gnbPrediction) svmAccuracy = accuracy(labels2011, svmPrediction) logRegAccuracy = accuracy(labels2011, logRegPrediction) confusionMatrix = metrics.confusion_matrix(labels2011, logRegPrediction) print "Results:" print "Gaussian Naive Bayes: " print gnbAccuracy print "Support Vector Machine: " print svmAccuracy print "Logistic Regression: " print logRegAccuracy print confusionMatrix fh.write("Results:" + "\n") fh.write("Gaussian Naive Bayes: " + "\n") fh.write(gnbAccuracy + "\n") fh.write("Support Vector Machine: " + "\n") fh.write(svmAccuracy + "\n") fh.write("Logistic Regression: " + "\n") fh.write(logRegAccuracy + "\n") for i in confusionMatrix: fh.write(str(i)) fh.write("\n") fh.write("-------------------------------------------------\n") fh.write("\n\n")
def test1 (): # Set up toy problem X = np.array([ [1,1], [2,1], [1,2], [2,3], [1,4], [2,4] ]) y = np.array([-1,-1,-1,1,1,1]) # Train your model svm453X = SVM453X() svm453X.fit(X, y) print(svm453X.w, svm453X.b) # Compare with sklearn svm = sklearn.svm.SVC(kernel='linear', C=1e15) # 1e15 -- approximate hard-margin svm.fit(X, y) print(svm.coef_, svm.intercept_) acc = np.mean(svm453X.predict(X) == svm.predict(X)) print("Acc={}".format(acc))
def main(): hog = cv2.HOGDescriptor() #GET IMAGES AND THEIR LABEL print("Loading pictures...") train_img, train_labels = get_images("train", "train_labels.csv") test_img, test_labels = get_images("test", "test_labels.csv") print("Loaded...") #RESIZE print("Resizing images...") train_img = resize_images(train_img) test_img = resize_images(test_img) print("Resized...") #MAP LABEL TO INT train_labels = list(map(strToNumberLabels, train_labels)) test_labels = list(map(strToNumberLabels, test_labels)) #EXTRACT FEATURES print("Before hog extraction") features_train = hog_compute(hog, train_img) features_test = hog_compute(hog, test_img) print("passed hog extraction") # trainingDataMat = np.array(features_train) labelsMat = np.array(train_labels) svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 100, 1.e-10)) svm.train(trainingDataMat, cv2.ml.ROW_SAMPLE, labelsMat) sample_data = np.array(features_test, np.float32) svm.setC(100) #svm.setGamma(0.1) print("Training model...") svm.train(trainingDataMat, cv2.ml.ROW_SAMPLE, labelsMat) response = svm.predict(sample_data) final = [] for y in response[1]: final.append(int(y[0])) countAccuracy(final, test_labels)
def hw1q16(): print "----------------------------------------" print " Homework 1 Question 16 " print "----------------------------------------" # polynomial kernel: (coef0 + gamma * x1.T * x2) ** degree for idx in (0, 2, 4, 6, 8): svm = sklearn.svm.SVC( C=0.01, kernel="poly", degree=2, gamma=1, coef0=1, tol=1e-4, shrinking=True, verbose=False ) Y_train_i = (Y_train == idx).astype(int) svm.fit(X_train, Y_train_i) Y_predict_i = svm.predict(X_train) support = svm.support_ coef = svm.dual_coef_[0] b = svm.intercept_[0] E_in = np.count_nonzero(Y_train_i != Y_predict_i) print "For class %d:" % (idx) print "sum(alpha) =", np.sum(np.abs(coef)) print "b =", b print "E_in =", E_in fig = plt.figure() # plt.suptitle('%d vs rest' % (idx)) plt.subplot(311) plt.title("Training data: green +, red -") plot_01(X_train, Y_train_i) plt.tick_params(axis="x", labelbottom="off") plt.subplot(312) plt.title("Prediction: green +, red -") plot_01(X_train, Y_predict_i) plt.tick_params(axis="x", labelbottom="off") plt.subplot(313) plt.title("Support vectors: blue") plt.plot(X_train[:, 0], X_train[:, 1], "r.") plt.plot(X_train[support, 0], X_train[support, 1], "b.") plt.show()
def OpenCvSVM(X,y): X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2,random_state=1) X_train=np.array(X_train,dtype=np.float32) X_test=np.array(X_test,dtype=np.float32) y_test=np.array(y_test,dtype=np.int32) sc=StandardScaler() sc.fit(X_train) X_train=sc.transform(X_train) X_test=sc.transform(X_test) svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train) y_predict = svm.predict(X_test) y_pred=np.zeros(len(y_predict[1])) for i in range(len(y_predict[1])): y_pred[i]=y_predict[1][i][0] return y_pred,y_test
def classify(self,X): x,y = X[-2:] xbin,ybin = pos_to_xybin(x,y) try: svm = self.svms[xbin][ybin] guess = svm.predict(np.ravel(X[:-2])) except: guess = self.svms[xbin][ybin] if guess == self.labels[0]: return {self.labels[0]: 1, self.labels[1]: 0} elif guess == self.labels[1]: return {self.labels[0]: 0, self.labels[1]: 1} else: return {self.labels[0]: .5, self.labels[1]: .5}
def find_better_hour(thetime, theday, theweather, thenbhd, svm_pred, kde_pred): hours = list(range(8, 18)) best_hour = -1 best_kde = -100 for hour in hours: proc = process(theday, hour, theweather, thenbhd) this_svm = svm.predict(proc)[0] this_kde = kde.score_samples(proc)[0] if this_svm == -1 and this_kde > best_kde: best_hour = hour best_kde = this_kde if best_hour == -1 or best_hour == thetime: return best_hour else: return str(hour) + ':00'
def svm_test(): X_train = np.array([[0, 0], [1, 0], [0, 2], [-2, 0]]) Y_train = np.array([1, 1, 0, 0]) svm = sklearn.svm.SVC(C=100000, kernel='linear', shrinking=False, verbose=False) svm.fit(X_train, Y_train) Y_predict = svm.predict(X_train) print Y_predict b = svm.intercept_[0] print b plt.figure() plt.suptitle('svm test') plt.subplot(211) plot_01(X_train, Y_train) plt.subplot(212) plot_01(X_train, Y_predict) plt.plot(X_train[Y_predict == 0, 0], X_train[Y_predict == 0, 1], 'ro') plt.plot(X_train[Y_predict == 1, 0], X_train[Y_predict == 1, 1], 'go') plt.show()
def hw1q16(): print '----------------------------------------' print ' Homework 1 Question 16 ' print '----------------------------------------' # polynomial kernel: (coef0 + gamma * x1.T * x2) ** degree for idx in (0, 2, 4, 6, 8): svm = sklearn.svm.SVC(C=0.01, kernel='poly', degree=2, gamma=1, coef0=1, tol=1e-4, shrinking=True, verbose=False) Y_train_i = (Y_train == idx).astype(int) svm.fit(X_train, Y_train_i) Y_predict_i = svm.predict(X_train) support = svm.support_ coef = svm.dual_coef_[0] b = svm.intercept_[0] E_in = np.count_nonzero(Y_train_i != Y_predict_i) print 'For class %d:' % (idx) print 'sum(alpha) =', np.sum(np.abs(coef)) print 'b =', b print 'E_in =', E_in fig = plt.figure() # plt.suptitle('%d vs rest' % (idx)) plt.subplot(311) plt.title('Training data: green +, red -') plot_01(X_train, Y_train_i) plt.tick_params(axis='x', labelbottom='off') plt.subplot(312) plt.title('Prediction: green +, red -') plot_01(X_train, Y_predict_i) plt.tick_params(axis='x', labelbottom='off') plt.subplot(313) plt.title('Support vectors: blue') plt.plot(X_train[:, 0], X_train[:, 1], 'r.') plt.plot(X_train[support, 0], X_train[support, 1], 'b.') plt.show()
def hw1q19(): print '----------------------------------------' print ' Homework 1 Question 19 ' print '----------------------------------------' Y_train_0 = (Y_train == 0).astype(int) Y_test_0 = (Y_test == 0).astype(int) for gamma in (1, 10, 100, 1000, 10000): svm = sklearn.svm.SVC(C=0.1, kernel='rbf', gamma=gamma, tol=1e-7, shrinking=True, verbose=False) svm.fit(X_train, Y_train_0) print '----------------------------------------' print 'gamma =', gamma Y_predict_0 = svm.predict(X_test) print 'in the prediction:' print 'n(+) =', np.count_nonzero(Y_predict_0 == 1), 'n(-) =', np.count_nonzero(Y_predict_0 == 0) print 'E_out =', np.count_nonzero(Y_test_0 != Y_predict_0) print
def hw1q19(): print "----------------------------------------" print " Homework 1 Question 19 " print "----------------------------------------" Y_train_0 = (Y_train == 0).astype(int) Y_test_0 = (Y_test == 0).astype(int) for gamma in (1, 10, 100, 1000, 10000): svm = sklearn.svm.SVC(C=0.1, kernel="rbf", gamma=gamma, tol=1e-7, shrinking=True, verbose=False) svm.fit(X_train, Y_train_0) print "----------------------------------------" print "gamma =", gamma Y_predict_0 = svm.predict(X_test) print "in the prediction:" print "n(+) =", np.count_nonzero(Y_predict_0 == 1), "n(-) =", np.count_nonzero(Y_predict_0 == 0) print "E_out =", np.count_nonzero(Y_test_0 != Y_predict_0) print
def plot_decision_boundary(X, y, clf, test_ind = None, resolution = 0.02): ''' x: 2D array, size [batch, features] , features = 2 ''' markers = ('s', 'x', 'v') # markers for plot colors = ('red', 'green', 'blue', 'gray') n_class = len(np.unique(y)) cmap = ListedColormap(colors[:n_class]) x1min, x1max = X[:, 0].min(), X[:, 0].max() x2min, x2max = X[:, 1].min(), X[:, 1].max() xx, yy = np.meshgrid(np.arange(x1min, x1max, resolution), np.arange(x2min, x2max, resolution)) grid_point = np.c_[xx.ravel(), yy.ravel()] # [feature, sampples] z = svm.predict(grid_point).reshape(xx.shape) plt.contour(xx, yy, z, alpha = 0.4, cmap = cmap) plt.xlim(x1min, x1max) plt.ylim(x2min, x2max) # plot data points for idx, c1 in enumerate(np.unique(y)): # for class 1, 2, 3 plt.scatter( x = X[y == c1, 0], # data points of each class separately y = X[y == c1, 1], c = cmap(idx), # use index of class to get from cmap alpha = 0.4, edgecolor = 'black', marker = markers[idx], ) # highlight test samples if test_ind: plt.scatter( x = x_test[:, 0], y = x_test[:, 1], c = '', alpha = 1.0, #透明度of markder marker = 'o', edgecolor = 'black', linewidths = 2, s = 55 # size of marker )
def repare_one(svc, movie_id, user_id, rating): conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='', db='small_rekomendacyjny') # cur = conn.cursor() # cur.execute("SELECT * FROM help WHERE checked is NULL AND correct = 0") svm = svc # for row in cur: # print(row) vector = [Helper.prepare_vector(mov_id=movie_id, us_id=user_id, rat=rating)] print(vector) repared = svm.predict(vector) print(repared) repared = float(repared[0]) new_one = round(repared + rating) / 2.0 new_curr = conn.cursor() print('stara: ' + str(rating) + ' poprawiona: ' + str(new_one)) new_curr.execute( "UPDATE help SET rating=%s, checked = 1, correct = 2 WHERE user_id = %s AND movie_id = %s", [new_one, user_id, movie_id]) conn.commit()
def trainTestSvm(data,labels,lblvec,B,L,T,split = 0.8,initializer = np.zeros,use_bias = True,kernel = None): #sectioning data into only two labels and shuffling them data1,data2 = getData(data,labels,lblvec) label1,label2 = np.ones([data1.shape[0],1])*1,np.ones([data2.shape[0],1])*-1 data12 = np.concatenate((data1,data2)) label12 = np.concatenate((label1,label2)) perm = np.random.permutation(data12.shape[0]) data12 = data12[perm] label12 = label12[perm] #split into training and testing datasets sp = int(split * data12.shape[0]) train,trainlbl,test,testlbl = data12[:sp],label12[:sp],data12[sp:],label12[sp:] #create and train the svm on the training set svm = pegasos_solver() bias = initializer([1]) if use_bias else None svm.init(train,trainlbl,initializer([1,data12.shape[1]]),bias) #train and test the svm either with primal subgradient descent or mercer kernels if kernel is None: errs = svm.train(B,L,T) tres = svm.predict(test) else: svm.kernelTrain(L,T,kernel) tres = svm.predictKernel(kernel,test) tp,fp,tn,fn = 0,0,0,0 for i,j in zip(tres,testlbl): if i > 0: if j > 0: tp += 1 else: fp += 1 else: if j < 0: tn += 1 else: fn += 1 print("Accuracy",(tp + tn)/(tres.shape[0]),"TPR",tp/(tp + fn), "FPR",fp/(tn + fp)) return (tp + tn)/(tres.shape[0])
def svm_test(): X_train = np.array([[0, 0], [1, 0], [0, 2], [-2, 0]]) Y_train = np.array([1, 1, 0, 0]) svm = sklearn.svm.SVC(C=100000, kernel="linear", shrinking=False, verbose=False) svm.fit(X_train, Y_train) Y_predict = svm.predict(X_train) print Y_predict b = svm.intercept_[0] print b plt.figure() plt.suptitle("svm test") plt.subplot(211) plot_01(X_train, Y_train) plt.subplot(212) plot_01(X_train, Y_predict) plt.plot(X_train[Y_predict == 0, 0], X_train[Y_predict == 0, 1], "ro") plt.plot(X_train[Y_predict == 1, 0], X_train[Y_predict == 1, 1], "go") plt.show()
def predict(self, svm, test): """ Subroutine to predict the value of a landmark encoding. NOTE: First the SVM needs to be trained Input ----- svm: SVM object for detection. test: Landmark array for which the SVM will give a prediction, . Output ------ prediction: The prediction of the SVM. TODO: This can be the same for both cv2 and sklearn """ prediction = svm.predict(test) return prediction
def plot_decision_boundary(svm,X,border_size): """colors decision boundaries for two classes""" min_x0 = X[:,0].min() max_x0 = X[:,0].max() min_x1 = X[:,1].min() max_x1 = X[:,1].max() num_samples = 100 x0 = np.linspace(min_x0,max_x0,num_samples) x1 = np.linspace(min_x1,max_x1,num_samples) xx0,xx1 = np.meshgrid(x0,x1) xx0_flat = xx0.ravel() xx1_flat = xx1.ravel() hypotheses = np.empty(xx0.shape).ravel() for i in xrange(len(xx0_flat)): x0 = xx0_flat[i] x1 = xx1_flat[i] hypotheses[i] = svm.predict([x0,x1]) hypotheses = np.reshape(hypotheses, (num_samples,num_samples) ) plt.contourf(xx0,xx1,hypotheses, cmap=plt.cm.Paired, alpha=0.8) plt.axis([min_x0 - border_size,max_x0 + border_size,min_x1 - border_size,max_x1 + border_size])
def LinearSVC(X_train, y_train, X_test, y_test): from sklearn import svm from sklearn.model_selection import cross_val_score clf = svm.LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=0) svm = clf.fit(X_train, y_train) y_pred = svm.predict(X_test) return y_pred, cross_val_score(svm, X_test, y_test).mean()
def results(): if request.method == 'POST': result = request.form thetime = result['time'] theday = result['weekday'] theweather = result['weather'] thenbhd = result['nbhd'] processed = process(theday, thetime, theweather, thenbhd) #[[theday, thetime] + enc.transform([[theweather, thenbhd]]).toarray()[0].tolist()] weather_scale = { 'Clear or Partly Cloudy': 0, 'Fog/Smog/Smoke': 5.25, 'Snowing': 4.79, 'Raining': 1.2 } svm_pred = svm.predict(processed)[0] kde_pred = kde.score_samples(processed)[0] + weather_scale[theweather] better_day = find_better_day(thetime, theday, theweather, thenbhd, svm_pred, kde_pred) better_hour = find_better_hour(thetime, theday, theweather, thenbhd, svm_pred, kde_pred) better_nbhd = find_better_nbhd(thetime, theday, theweather, thenbhd, svm_pred, kde_pred) # plt.scatter(x=range(10), y=[num**2 for num in range(10)]) # plt.savefig('/home/amaurer/Documents/Insight/flask_app/static/out.png') # plt.close() return render_template("results.html", result=result, svm_pred=svm_pred, kde_pred=kde_pred, processed=processed, better_day=better_day, better_hour=better_hour, better_nbhd=better_nbhd) return render_template('results.html')
def scores_ovo_student(self, X): ''' Compute class scores for OVO. Arguments: X: Features to predict. Returns: scores: a numpy ndarray with scores. ''' pred = [] for x in X: scores = np.zeros(len(self.labels)) for (l1, l2), svm in self.binary_svm.items(): p = svm.predict([x]) if p: scores[l1] += 1 else: scores[l2] += 1 pred.append(scores) return np.array(pred)
def predict(feature): manmade_test = os.getcwd() + "/Images/manmade_test/" natural_test = os.getcwd() + "/Images/natural_test/" index, responses = calc_features_and_labels(manmade_test, natural_test, feature) results_knn = [] results_svm = [] for feat in index: results_knn.append(knn.predict([feat])[0]) results_svm.append(svm.predict([feat])[0]) target_names = ['manmade', 'natural'] print 'KNN Classifier' print classification_report(responses, results_knn, target_names=target_names) print 'SVM Classifier' print classification_report(responses, results_svm, target_names=target_names)
def main(args): raw = pandas.read_csv(args.raw_data_file) features = extract_features.generate_features(raw).values features_labels = features[:, 0] feature_data = features[:, 1:] data = mkf.load_files(args.feature_dir) target = data[:, 0] fvs = data[:, 1:] svm = mkf.SVM(500) forest = RandomForestClassifier(max_depth=3) svm.fit(fvs, target) forest.fit(fvs, target) svm_res = svm.predict(feature_data) forest_res = forest.predict(feature_data) print("Accuracy of svm: {}".format(accuracy_score(svm_res, features_labels))) print("Accuracy of random forest: {}".format( accuracy_score(forest_res, features_labels)))
class predict: input_file = pd.read_csv('sample30DataAllInt.csv') dataframe = pd.DataFrame(input_file) dataframe['diff'] = dataframe['endtime'] - dataframe['starttime'] dataframe = dataframe.drop('station', axis=1) dataframe = dataframe.drop('starttime', axis=1) dataframe = dataframe.drop('endtime', axis=1) dataframe = dataframe.drop('Status', axis=1) dataframe = dataframe.drop('channel', axis=1) print(dataframe) y = input_file['Status'] X_train, X_test, y_train, y_test = train_test_split(dataframe, y, random_state=1) svm = svm.SVC() svm = svm.fit(X_train, y_train) y_predict = svm.predict(X_test) joblib.dump(svm, 'svm.pkl') print(y_predict) s = [[460, 34567, 876545678765, 60]] accuracy_score(y_test, y_predict)
def hw1q20(): print "----------------------------------------" print " Homework 1 Question 20 " print "----------------------------------------" Y_train_0 = (Y_train == 0).astype(int) C = 0.1 m = len(Y_train_0) gammas = [1, 10, 100, 1000, 10000] counts = [0] * len(gammas) for nrun in range(10): print "run", nrun # generate a random order of m indices arr = np.arange(m) np.random.shuffle(arr) # pick 1000 for cross validation X_curval_0 = X_train[arr[:1000]] Y_curval_0 = Y_train_0[arr[:1000]] X_curtrain_0 = X_train[arr[1000:]] Y_curtrain_0 = Y_train_0[arr[1000:]] E_vals = [0.0] * len(gammas) for i in range(len(gammas)): gamma = gammas[i] svm = sklearn.svm.SVC(C=C, kernel="rbf", gamma=gamma, tol=1e-3, shrinking=True, verbose=False) svm.fit(X_curtrain_0, Y_curtrain_0) Y_curpredict_0 = svm.predict(X_curval_0) E_val = np.count_nonzero(Y_curval_0 != Y_curpredict_0) E_vals[i] = E_val counts[np.argmin(E_vals)] += 1 for i in range(len(gammas)): print "gamma", gammas[i], "got picked", counts[i], "times"
def hw1q20(): print '----------------------------------------' print ' Homework 1 Question 20 ' print '----------------------------------------' Y_train_0 = (Y_train == 0).astype(int) C = 0.1 m = len(Y_train_0) gammas = [1, 10, 100, 1000, 10000] counts = [0] * len(gammas) for nrun in range(10): print 'run', nrun # generate a random order of m indices arr = np.arange(m) np.random.shuffle(arr) # pick 1000 for cross validation X_curval_0 = X_train[arr[:1000]] Y_curval_0 = Y_train_0[arr[:1000]] X_curtrain_0 = X_train[arr[1000:]] Y_curtrain_0 = Y_train_0[arr[1000:]] E_vals = [0.0] * len(gammas) for i in range(len(gammas)): gamma = gammas[i] svm = sklearn.svm.SVC(C=C, kernel='rbf', gamma=gamma, tol=1e-3, shrinking=True, verbose=False) svm.fit(X_curtrain_0, Y_curtrain_0) Y_curpredict_0 = svm.predict(X_curval_0) E_val = np.count_nonzero(Y_curval_0 != Y_curpredict_0) E_vals[i] = E_val counts[np.argmin(E_vals)] += 1 for i in range(len(gammas)): print 'gamma', gammas[i], 'got picked', counts[i], 'times'
def test(path): cap = cv2.VideoCapture(path_video) testing = [] while (True): ret, frame = cap.read() res = cv2.resize(frame, (250, 250)) gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) xarr = np.squeeze(np.array(gray_image).astype(np.float32)) m, v = cv2.PCACompute(xarr) arr = np.array(v) flat_arr = arr.ravel() testing.append(flat_arr) #cv2.imshow('frame', frame) #if cv2.waitKey(1) & 0xFF == ord("q"): # break #cap.release() #cv2.destroyAllWindows() logos = svm.predict(testing) uniqlogos = list(set(logos)) for i in uniqlogos: print(i)
def test(path): cap = cv2.VideoCapture(path_video) testing=[] while(True): ret, frame = cap.read() res=cv2.resize(frame,(250,250)) gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) xarr=np.squeeze(np.array(gray_image).astype(np.float32)) m,v=cv2.PCACompute(xarr) arr= np.array(v) flat_arr= arr.ravel() testing.append(flat_arr) #cv2.imshow('frame', frame) #if cv2.waitKey(1) & 0xFF == ord("q"): # break #cap.release() #cv2.destroyAllWindows() logos=svm.predict(testing) uniqlogos=list(set(logos)) for i in uniqlogos: print(i)
def rbfSVM(): # generate fake data np.random.seed(0) X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) # split the data into training and testing data # 70% training and 30% test X_train, X_test, y_train, y_test = \ train_test_split(X_xor, y_xor, test_size=0.3,random_state=109) # find best C and gamma parms C_range = np.logspace(-2, 10, 13) gamma_range = np.logspace(-9, 3, 13) param_grid = dict(gamma=gamma_range, C=C_range) cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv) grid.fit(X_train, y_train) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) # create classifier C = grid.best_params_['C'] gamma = grid.best_params_['gamma'] svm = SVC(kernel='rbf', random_state=0, gamma=gamma, C=C) svm.fit(X_train, y_train) y_pred = svm.predict(X_test) plot_decision_regions(X_train, X_test, y_train, y_pred, classifier=svm) # evaluate accuracy evaluateSVM(y_test, y_pred)
def predict(df, hate=10, threshold=0.6): '''This function takes a dataset with comments labels as tweet and predicts the hatefulness. Optional parameters are hate and threshold. It returns 5 elements in this order [first_hate, count_hate, count_comments, hate_ratio] - first_hate: First hate comments as dict - count_hate: Total amount of hate comments= sum(y_pred_svm) - count_comments: Total amount of comments and subcomments - hate_ratio = count_hate/count_comments ''' test = pd.DataFrame(df) test_tweet = clean_tweets(test["tweet"]) test["clean_tweet"] = test_tweet x_test_vec = vectorizer.transform(test_tweet) y_pred_svm = svm.predict(x_test_vec) test["prediction"] = y_pred_svm y_pred_proba = svm.predict_proba(x_test_vec) proba = [] for i in y_pred_proba: a = i[0] proba.append(a) test["proba"] = proba test_sort = test.sort_values(by=["proba"], ascending=False) hateful_comments = test_sort[(test_sort["prediction"] == 1) & (test_sort["proba"] >= threshold)] count_hate = len(hateful_comments) count_comments = len(y_pred_svm) hate_ratio = count_hate / count_comments percentage = round(hate_ratio * 100, 2) first_hate = hateful_comments[["tweet", "proba"]][:hate] return [ first_hate.to_dict('records'), count_hate, count_comments, percentage ]
def main(argv): reader.read_labels() reader.read_data_files() reader.read_other_data_file() convo_ids = reader.get_all_convos() harrass_convos = reader.get_harrassment_convos() words = reader.read_bad_words() labels, tokens, true_class = text_mining.tokenize_words( convo_ids, reader.conversation_text, reader.conversation_labels, set(words)) train_labels, train_tokens, test_labels, test_tokens, test_true_class, train_true_labels = get_splits( convo_ids, harrass_convos, labels, tokens, true_class) for idx in range(10): svm = get_svm(train_tokens[idx], train_true_labels[idx]) c_tokens = test_tokens[idx] tfidf_test = tfidf_vect.transform(c_tokens) predicted = svm.predict(tfidf_test) true_classes = test_true_class[idx] evaluator.evalute(true_classes, predicted) evaluator.average()
def test_and_print_svm_regression(test_x, test_t, svm): test_size = len(test_t) predictions = svm.predict(test_x) rounded_predictions = [round(pred, 0) for pred in predictions] num_correct = 0 value = 0 for i in range(test_size): temp = rounded_predictions[i] if temp > 5.0: temp = 5.0 if temp < 1.0: temp = 1.0 value += abs(temp - test_t[i]) if rounded_predictions[i] == test_t[i]: num_correct += 1 # print value/test_size #avg error print "Average Error: " + str(value/test_size) #avg error print "Accuracy: " + str(float(num_correct)/test_size) #accuracy
def score(x, y, svm, interval): guesses = [] for i in range(interval, len(x)): guesses.append(int(svm.predict([x[i]]))) tp = 0.0 tn = 0.0 totalp = 0.0 totaln = 0.0 for i in range(dataSplit, len(resultY)): guessesIndex = i - dataSplit if (resultY[i] == 1): totalp += 1 if (guesses[guessesIndex] == 1): tp += 1 else: totaln += 1 if (resultY[i] == guesses[guessesIndex]): tn += 1 if ((tn == 0.0) and (tp == totalp)): return 0.0 else: return (float((tp + tn) / (totaln + totalp)))
def predict(filename): with open(filename, "rt") as data_file: reader = csv.reader(data_file) iterator = iter(reader) next(iterator) results = list(iterator) for result in results: teamA = result[0] teamB = result[1] date = result[2] teamA_data = result for row2 in results: if row2[0] == teamB and row2[2] == date: teamB_data = row2 data = teamA_data[4:] + teamB_data[4:] try: print("bayes: " + bayes.predict({'attributes': dict(enumerate(data))})) print("svm: " + svm.predict({'attributes': dict(enumerate(data))})) print("actual: " + result[3]) except: pass
def testSVM(title, svm, zero, one, two): numcorrect = 0 numwrong = 0 for correct, testing in ((0, zero),(1, one), (2, two)): for d in testing: r = svm.predict(d)[0] if(r == correct): numcorrect += 1 else: numwrong += 1 print title print "Correct", numcorrect print "Wrong", numwrong print numcorrect * 100 / (numcorrect + numwrong), '%', "\n" if write_results: f = open('results.txt', 'a') f.write(title + "\n") f.write("Correct: " + str(numcorrect) + "\n") f.write("Wrong: " + str(numwrong) + "\n") f.write(str(numcorrect * 100 / (numcorrect + numwrong)) + '%' + "\n\n") f.close()
def get_hard_negatives(svm, negative_set): hard_negatives = [] descriptors = [] print("Getting hard features") i = 0 for image in negative_set: print(i, image.shape) i += 1 subWindowsIndexes = getSubWindows(image) percentIndexes = 0.1 subWindowsIndexes = random.sample( subWindowsIndexes, int(len(subWindowsIndexes) * percentIndexes)) gradientImage = computeCenteredGradient( image, (image.shape[0], image.shape[1])) integralHistogram = getIntegralHistogram(gradientImage) # Create sub image print(len(subWindowsIndexes)) for index in subWindowsIndexes: top_left = index[0] bottom_right = index[1] sub_image = np.array(image[top_left[0]:bottom_right[0], top_left[1]:bottom_right[1]]) # Save the descriptor of the sub image. #cells_matrix = getOrientationBinMatrix(gradientImage, integralHistogram, top_left, bottom_right) #descriptor = getHogDescriptor(cells_matrix) descriptor = train_image(sub_image) descriptors.append(descriptor) print("Testing on ", len(descriptors), "negative inputs") svm_result = svm.predict(descriptors) print("Result size:", len(svm_result)) for i in range(len(svm_result)): if svm_result[i] == "pos": hard_negatives.append(descriptors[i]) return hard_negatives
def runEnsemble(self): """ Predicts the target label for a feature vector by combining and weighting the predictions of the individual classifiers """ for train, test in self.kf: # Extract models knn = self.models["KNN"] kmeans = self.models["KMEANS"] svm = self.models["SVM"] gmm = self.models["GMM"] # Set up training and test data train_set, train_labels = self.getCurrFoldTrainData(train) test_set, test_labels = self.getCurrFoldTestData(test) if increase: train_set, train_labels=self.subsetData(train_set, train_labels) # Fit the models knn.fit(train_set, train_labels) kmeans.fit(train_set, train_labels) svm.fit(train_set, train_labels) gmm.fit(train_set, train_labels) # Generate predictions by weighting each model using accuracies # created from earlier runs knn_pred = knn.predict(test_set) kmeans_pred = kmeans.predict(test_set) svm_pred = svm.predict(test_set) gmm_pred = gmm.predict(test_set) preds = self.weightPredictions(knn_pred, kmeans_pred, \ svm_pred, gmm_pred) acc = self.getAccuracy(test_labels, preds) print "(ENSEMBLE) Percent correct is", acc
def run(attrib_idx): results = np.load("principal_directions/wspace_att_%d.npy" % attrib_idx).item() pruned_indices = list(range(results['latents'].shape[0])) # pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) # keep = int(results['latents'].shape[0] * 0.95) # print('Keeping: %d' % keep) # pruned_indices = pruned_indices[:keep] # Fit SVM to the remaining samples. svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) space = 'dlatents' svm_inputs = results[space][pruned_indices] svm = sklearn.svm.LinearSVC(C=1.0, dual=False, max_iter=10000) svm.fit(svm_inputs, svm_targets) svm.score(svm_inputs, svm_targets) svm_outputs = svm.predict(svm_inputs) w = svm.coef_[0] np.save("principal_directions/direction_%d" % attrib_idx, w)
#Split the date and labels X_train, X_test, y_train, y_test = train_test_split(df_data, df_labels, test_size=0.2, random_state=1377) print('ya') #Selecting paramaters #param = {'criterion':['gini','entropy'], 'splitter':['best','random'], 'min_samples_split':[2,5,10,15], 'min_weight_fraction_leaf':[0,.5], 'min_impurity_decrease':[0,2,3,5]} #Make the models #clf = GridSearchCV(tree.DecisionTreeClassifier(random_state=420), param) clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=420) #Train the model start = time() clf.fit(X_train, y_train) print('yeet with a time of:', time() - start, 'seconds') #Esting the model y_pred = clf.predict(X_test) #Evaluation print("Accuracy Score:", accuracy_score(y_test, y_pred)) #Saveing the model #pkl_filename = "DecisionTree.pkl" #with open(pkl_filename, 'wb') as file: # pickle.dump(clf, file) #print('Model saved')
# see http://cs.nyu.edu/~rostami/presentations/L1_vs_L2.pdf for some more info. """ Note : "Choosing between scaling and standardizing is a confusing choice, you have to dive deeper in your data and learner that you are going to use to reach the decision. For starters, you can try both the methods and check cross validation score for making a choice." """ #============================================================================== # Support Vector Machine #============================================================================== svm = svm.SVC( C=1.0, # let's look at all the param's I should be thinking about. cache_size=10, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma=100, kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=1) svm.fit(X_train_scale, Y_train.values.ravel()) print "Accuracy_score of SVM with standardized data = ", \ accuracy_score(Y_test,svm.predict(X_test_scale)), '-'*60
plt.show() #%% svm = sklearn.svm.SVC(C = 1.0, gamma = numpy.power(10.0, -4.0))#C = numpy.power(10.0, 1.0) print numpy.mean(sklearn.cross_validation.cross_val_score(svm, Xs4[:10000], labels4[:10000])) svm.fit(Xs4[:10000], labels4[:10000]) #%% import sklearn.linear_model sgdsvm = sklearn.linear_model.SGDClassifier() #print numpy.mean(sklearn.cross_validation.cross_val_score(svm, Xs3[idxs], labels3[idxs])) idxs = range(numpy.random.randint(0, len(Xs3), 10000) sgdsvm.fit(Xs3[idxs], labels3[idxs]) #%% out = svm.predict(Xs3[0:10000]) - labels3[0:10000] print numpy.count_nonzero(out) #%% for i, (index, row) in enumerate(df.iterrows()):#df.loc[87:88] im = skimage.io.imread(row['f'], as_grey = True) im = im[:(im.shape[0] / 8) * 8, :(im.shape[1] / 8) * 8] hogs = hog.run(im) hogs -= mean hogs /= std labels = gmm.predict(hogs.reshape((-1, hogs.shape[-1]))).reshape((hogs.shape[0], hogs.shape[1])) boxes = labels2boxes(labels, 20, b = 15, padding_mode = 'reflect')