def train_and_test(svm,datasets): testset = datasets['testset'] finaltrainset = datasets['finaltrainset'] svm.train(finaltrainset) outputs, costs = svm.test(testset) id_to_class = {} for label, id in testset.class_to_id.iteritems(): id_to_class[id] = label # Ground truth lbl = datasets['ground_truth'] auto_lbl = np.array([int(id_to_class[output[0]]) for output in outputs]) # Predicted labels len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) dice = dice[~np.isnan(dice)] return dice.mean()
def main(): svm = SVM(C=100000, fraction_split=0.7) svm.load_dataset() accuracies = [] f1s = [] num_splits = int(sys.argv[2]) kf = KFold(n_splits=num_splits) split_num = 1 for training_indices, testing_indices in kf.split(svm.dataset): print("Split {}/{}".format(split_num, num_splits)) svm.update_train_and_test_sets(training_indices, testing_indices) svm.train() accuracy, f1 = svm.test() accuracies.append(accuracy) f1s.append(f1) split_num += 1 return stats.mean(accuracies), stats.mean(f1s)
def train(target, labels): # First half is trainData, remaining is testData train_cells = target ###### Now training ######################## deskewed = [map(deskew, row) for row in train_cells] hogdata = [map(hog, row) for row in deskewed] trainData = np.float32(hogdata).reshape(-1, 64) print(trainData.shape) #response is array of n * 1, i.e. [[1],[2],[3]...] responses = np.float32(labels)[:, np.newaxis] svm = cv2.SVM() svm.train(trainData, responses, params=svm_params) #svm.save('svm_data.dat') ###### Now testing ######################## deskewed = [map(deskew, row) for row in train_cells] hogdata = [map(hog, row) for row in deskewed] testData = np.float32(hogdata).reshape(-1, bin_n * 4) result = svm.predict_all(testData) #print(len(result)) #print(len(responses)) #print(type(result)) #print(result.size) #print(responses) '''for d in testData: svm.predict(d) print(svm.get_var_count())''' ####### Check Accuracy ######################## mask = result == responses #for c in result: # print(chr(c + ord('A'))) mask = mask.astype(np.uint8) #print(mask) correct = np.count_nonzero(mask) #print(correct) print(correct * 100.0 / result.size)
def trainTestSvm(data,labels,lblvec,B,L,T,split = 0.8,initializer = np.zeros,use_bias = True,kernel = None): #sectioning data into only two labels and shuffling them data1,data2 = getData(data,labels,lblvec) label1,label2 = np.ones([data1.shape[0],1])*1,np.ones([data2.shape[0],1])*-1 data12 = np.concatenate((data1,data2)) label12 = np.concatenate((label1,label2)) perm = np.random.permutation(data12.shape[0]) data12 = data12[perm] label12 = label12[perm] #split into training and testing datasets sp = int(split * data12.shape[0]) train,trainlbl,test,testlbl = data12[:sp],label12[:sp],data12[sp:],label12[sp:] #create and train the svm on the training set svm = pegasos_solver() bias = initializer([1]) if use_bias else None svm.init(train,trainlbl,initializer([1,data12.shape[1]]),bias) #train and test the svm either with primal subgradient descent or mercer kernels if kernel is None: errs = svm.train(B,L,T) tres = svm.predict(test) else: svm.kernelTrain(L,T,kernel) tres = svm.predictKernel(kernel,test) tp,fp,tn,fn = 0,0,0,0 for i,j in zip(tres,testlbl): if i > 0: if j > 0: tp += 1 else: fp += 1 else: if j < 0: tn += 1 else: fn += 1 print("Accuracy",(tp + tn)/(tres.shape[0]),"TPR",tp/(tp + fn), "FPR",fp/(tn + fp)) return (tp + tn)/(tres.shape[0])
# 获取当前时间 start = time.time() # 读取训练文件 print('load TrainData') X_train, y_train = loadData('./mnist/mnist_train.csv') # 读取测试文件 print('load TestData') X_test, y_test = loadData('./mnist/mnist_test.csv') print('Init SVM classifier') svm=SVM(X_train[0:6000],y_train[0:6000]) print('start to train') svm.train() print('start to test') svm.test(X_test[0:400], y_test[0:400]) # clf = svm.SVC() # clf.fit(X_train[0:6000],y_train[0:6000]) # print(clf.score(X_test[0:400], y_test[0:400])) # 获取结束时间 end = time.time() print('run time:', end - start)
if 'sad' in j: L.append(1) if 'angry' in j: L.append(2) if 'surprise' in j: L.append(3) if 'natural' in j: L.append(4) if 'fear' in j: L.append(5) if 'disgust' in j: L.append(6) h = hog.compute(resized_image) Big.append(h) svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) BN = np.array(Big, np.float32) LN = np.array(L, np.int) print(BN.shape) print(LN.shape) svm.train(BN, cv2.ml.ROW_SAMPLE, LN) svm.save('emotions.dat') #filename = 'SVModel.sav' #pickle.dump(svm, open(filename, 'wb'))
if __name__ == '__main__': train_img, train_labels = load_images("train", "train_labels.csv") test_img, test_labels = load_images("test", "test_labels.csv") train_labels_int = encode_labels(train_labels) test_labels_int = encode_labels(test_labels) train_img = resize_images(train_img, (128, 128)) test_img = resize_images(test_img, (128, 128)) features_train = extract_features(train_img) features_test = extract_features(test_img) svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 100, 1.e-10)) svm.setC(100) svm.setGamma(0.1) svm.train(np.array(features_train), cv2.ml.ROW_SAMPLE, np.array(train_labels_int)) predicted = svm.predict(np.array(features_test, np.float32)) result = [] for p in predicted[1]: result.append(int(p[0])) print("Acurracy: " + str(count_accuracy(result, test_labels_int)))
from sklearn import svm import dlib fl = open( "/home/shubham/Documents/project/Final_project/data_functioning/data_images.txt", "r") lst_data = [] lst_label = [] lst_label_name = [] for line in fl: if (line != ""): str_arr, label = tuple(line.split(";")) label, label_id = tuple(label.split(":")) arr_com = list(map(float, str_arr.split(","))) lst_data.append(arr_com) lst_label_name.append(label) lst_label.append(int(label_id)) x = dlib.vectors() y = dlib.vectors() for i in range(0, len(lst_label)): x.append(dlib.vector(lst_data[i])) y.append(dlib.vector([lst_label[i]])) svm = dlib.svm_c_trainer_linear() svm.be_verbose() svm.set_c(10) classifier = svm.train(x, y) print("Prediction for first sample: " + lst_label_name[classifier(x[0]) - 1])
def main(): # 入力データのファイルパス LOAD_INPUT_DATA_PATH = "/Users/panzer5/github/sample/python/scikit/svm/ex4_data/train.csv" # 学習済みモデルデータの出力先パス SAVE_TRAINED_DATA_PATH = '/Users/panzer5/github/sample/python/scikit/svm/ex4_data/train.learn' # テストデータのファイルパス LOAD_TEST_DATA_PATH = "/Users/panzer5/github/sample/python/scikit/svm/ex4_data/test.csv" # グラフ出力先パス SAVE_GRAPH_IMG_PATH = '/Users/panzer5/github/sample/python/scikit/svm/ex4_data/graph.png' # 説明変数の列名 NAME_X = ["x1", "x2"] # 目的変数の列名 NAME_Y = "x3" # SVMのパラメータ GAMMA = 0.1 C = 1 KERNEL = "rbf" # クラスのデータとプロット時に割り当てる色 CLASS_DATAS = [0, 1, 2] CLASS_COLORS = ["blue", "red", "green"] svm = SVM() # 学習済みモデルの作成 svm.train(load_input_data_path=LOAD_INPUT_DATA_PATH, save_trained_data_path=SAVE_TRAINED_DATA_PATH, name_x=NAME_X, name_y=NAME_Y, gamma=GAMMA, C=C, kernel=KERNEL) # 学習済みモデルの検証 svm.test(load_trained_data_path=SAVE_TRAINED_DATA_PATH, load_test_data_path=LOAD_TEST_DATA_PATH, name_x=NAME_X, name_y=NAME_Y) # 未知データを入力して予測 """ svm.test( load_trained_data_path = SAVE_TRAINED_DATA_PATH, load_input_data_path = LOAD_INPUT_DATA_PATH, name_x = NAME_X, name_y = NAME_Y) """ # グラフにプロットして決定境界を可視化 svm.plot2d( load_input_data_path=LOAD_INPUT_DATA_PATH, load_trained_data_path=SAVE_TRAINED_DATA_PATH, save_graph_img_path=SAVE_GRAPH_IMG_PATH, class_datas=CLASS_DATAS, class_colors=CLASS_COLORS, name_x=NAME_X, name_y=NAME_Y, x1_name="x1", x2_name="x2", fig_size_x=10, fig_size_y=10, lim_font_size=25, )
pred_valid = self.svm.predict(np.asarray(v_x,dtype=np.float32)) logloss = 0.0 for i,id in enumerate(v_ids): tmp_y = [0.,0.,0.] tmp_y[v_y[i]]=1. norm_v_probs = [0.,0.,0.] norm_v_probs[pred_valid[i]] = 1.0 if any(norm_v_probs)==1.: norm_v_probs = np.asarray([np.max([np.min(p,1-1e-15),1e-15]) for p in norm_v_probs]) logloss += np.sum(np.asarray(tmp_y)*np.log(np.asarray(norm_v_probs))) logloss = -logloss/len(v_ids) print('SVM logloss (valid): ',logloss) return tr_ids,pred_tr,tr_y def test(self,test_x): pred_y = self.svm.predict(np.asarray(test_x,dtype=np.float32)) ind_pred = [] for p in pred_y: tmp = [0,0,0] tmp[p] = 1. ind_pred.append(tmp) return ind_pred if __name__ == '__main__': svm = SVM() svm.train()
def main(): # 学習済みモデルデータの出力先パス SAVE_TRAINED_DATA_PATH = 'C:/github/sample/python/scikit/svm/ex6_data/train.learn' # テスト用の画像データ(ペイントソフトで2と書いて保存したもの) LOAD_TEST_IMG_PATH = 'C:/github/sample/python/scikit/svm/ex6_data/test_2.png' # SVMのパラメータ GAMMA = 0.1 C = 1 KERNEL = "linear" # 手書き数字画像の場合はrbfだと学習結果が悪い svm = SVM() # 学習用のデータを読み込み(Digitsデータセットを利用) digits_dataset = datasets.load_digits() # 説明変数(学習データ:手書き数字の画像データ8*8, 2次元配列)を抽出 X = digits_dataset.images print("X1:", X[1]) # 目的変数:数字(0~9) y = digits_dataset.target # xの二次元配列を1次元に変換(-1で変換元の要素数に合わせて自動で値が決定:変換前要素数=変換後要素数となる) X = X.reshape((-1, 64)) # 説明変数のデータを、学習用データと検証用データに分割(学習用90%、検証用10%、シャッフルする) train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=True) print("train_X size:", train_X.shape) print("train_y size:", train_y.shape) print("test_X size:", test_X.shape) print("test_y size:", test_y.shape) # 学習済みモデルの作成 svm.train(save_trained_data_path=SAVE_TRAINED_DATA_PATH, train_X=train_X, train_y=train_y, gamma=GAMMA, C=C, kernel=KERNEL) # 学習済みモデルの検証 svm.test(load_trained_data_path=SAVE_TRAINED_DATA_PATH, test_X=test_X, test_y=test_y) # 学習済みモデルを使って予測 # OpenCVで任意の手書き数字画像をロードし, # グレースケール変換, 、白黒反転 # 8*8にリサイズ, 1次元配列に変換, # 値を0~16に収めてモデルに入力 test_img = cv2.imread(LOAD_TEST_IMG_PATH) test_gray = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY) test_gray = cv2.bitwise_not(test_gray) test_gray = cv2.resize(test_gray, (8, 8)) test_gray = test_gray.reshape(-1, 64) test_gray = np.clip(test_gray, 0, 16) predict_y = svm.predict(load_trained_data_path=SAVE_TRAINED_DATA_PATH, test_X=test_gray) print("test_X:", test_gray) print("predict_y:", predict_y) """
test = [] play = [1, 2, 3] test.append(play) print(type(hist)) # print(hist.type()) # samples = np.array([np.float32(j) for j in samples]) samples = np.float32(test) labels = np.array(labels) svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_RBF) svm.setGamma(5.383) svm.setC(2.67) svm.train(samples, cv2.ml.ROW_SAMPLE, labels) # print(samples) # [l.tolist() for l in samples] # print(samples) # samples = np.float32(samples) # # print("samples", samples) # [print("labels", labels)] # positive_path = "archive/train/ben_afflek" # # img = cv2.imread(positive_path) # img = cv2.imread(glob.glob(os.path.join(positive_path, '*jpg'))[0]) # # print(img) # print("anser: ",glob.glob(os.path.join(positive_path, '*jpg'))[0])
def main(): parser = argparse.ArgumentParser(description='Assignment 4') parser.add_argument('-tr', dest="train_data", default="train_examples.tsv", help='File name of the training data.') parser.add_argument('-d', dest="dev_data", default="dev_examples.tsv", help='File name of the development data.') parser.add_argument('-t', dest="test_data", default="test_examples.tsv", help='File name of the test data') parser.add_argument('-w', dest="write_fname", default=None, help='File name of the output.') parser.add_argument('-b', dest="binning", default=False, help='Whether you want to bin the features or not') parser.add_argument('-c', dest="classifier", default=None, help='Classifier already trained') args = parser.parse_args() global write_fname write_fname = args.write_fname # 1.1 and 1.2 nb = build_classifier("nb") sys.stdout = open(write_fname, 'a+') print("Training different Naïve Bayes classifiers: ") sys.stdout = sys.__stdout__ print("Training the model...") nb = train(nb, args.train_data, args.binning) print("Evaluating the model on the development set...") dev_acc, dev_cm = evaluate(nb, args.dev_data, args.binning) print_results(write_fname, 'development', 'All', dev_acc, dev_cm) print("Evaluating the model on the test set...") test_acc, test_cm = evaluate(nb, args.test_data, args.binning) print_results(write_fname, 'test', 'All', test_acc, test_cm) print("Selecting best set of features...") selected_features, best = feat_selection(nb, args.train_data, args.dev_data, args.test_data, args.binning) sys.stdout = open(args.write_fname, 'a+') print('Best set of features: {}'.format(best[1])) sys.stdout = sys.__stdout__ print("Predicting with the best NB classifier...") best_nb = build_classifier("nb") fct_train, texts_train = build_features(args.train_data, "word_features", args.binning) for feat_dict, label in fct_train: feat_dict = select_features(feat_dict, selected_features) best_nb = best_nb.train(fct_train) fct_dev, texts_dev = build_features(args.dev_data, "word_features", args.binning) for feat_dict, label in fct_dev: feat_dict = select_features(feat_dict, selected_features) fct_test, texts_test = build_features(args.test_data, "word_features", args.binning) for feat_dict, label in fct_test: feat_dict = select_features(feat_dict, selected_features) accuracy, cm = select_evaluate(best_nb, fct_test) print_results(write_fname, 'test', best[1], accuracy, cm) sys.stdout = open("nb-word_features-{}_features.txt".format(best[1]), 'a+') print('Predictions of the best NB classifier.:') print(best_nb.classify_many([feat[0] for feat in fct_test])) sys.stdout = sys.__stdout__ # 2.1 nb_sk = build_classifier("nb_sk") print("Training BernoulliNB classifier...") sys.stdout = open(write_fname, 'a+') print("Training BernoulliNB classifier: ") sys.stdout = sys.__stdout__ nb_sk = nb_sk.train(fct_train) accuracy, cm = select_evaluate(nb_sk, fct_dev) print_results(write_fname, 'dev', best[1], accuracy, cm) accuracy, cm = select_evaluate(nb_sk, fct_test) print_results(write_fname, 'test', best[1], accuracy, cm) dt_sk = build_classifier("dt_sk") print("Training Decision Tree classifier...") sys.stdout = open(write_fname, 'a+') print("Training Decision Tree classifier: ") sys.stdout = sys.__stdout__ dt_sk = dt_sk.train(fct_train) accuracy, cm = select_evaluate(dt_sk, fct_dev) print_results(write_fname, 'dev', best[1], accuracy, cm) accuracy, cm = select_evaluate(dt_sk, fct_test) print_results(write_fname, 'test', best[1], accuracy, cm) # 2.2.1 print("Generating w2vec features...") train_w2vec_feats = build_w2vec(args.train_data) dev_w2vec_feats = build_w2vec(args.dev_data) test_w2vec_feats = build_w2vec(args.test_data) # 2.2.2 svm = build_classifier("svm") print("Training SVM classifier with word features...") sys.stdout = open(write_fname, 'a+') print("Training SVM classifier with word features: ") sys.stdout = sys.__stdout__ svm = svm.train(fct_train) accuracy, cm = select_evaluate(svm, fct_dev) print_results(write_fname, 'dev', best[1], accuracy, cm) accuracy, cm = select_evaluate(svm, fct_test) print_results(write_fname, 'test', best[1], accuracy, cm) svm_w2v = build_classifier("svm") print("Training SVM classifier with w2vec features...") sys.stdout = open(write_fname, 'a+') print("Training SVM classifier with w2vec features: ") sys.stdout = sys.__stdout__ svm_w2v = svm_w2v.train(train_w2vec_feats) accuracy, cm = select_evaluate(svm_w2v, dev_w2vec_feats) print_results(write_fname, 'dev', 'All', accuracy, cm) accuracy, cm = select_evaluate(svm_w2v, test_w2vec_feats) print_results(write_fname, 'test', 'All', accuracy, cm)
#Extract for i in glob.glob(manmade_src_dir_training + "*.jpg"): h = read_src_images(i) traindata.extend(h) trainlabels.append(1) #ManMade Label for i in glob.glob(natural_src_dir_training + "*.jpg"): h = read_src_images(i) if h is not None: traindata.extend(h) trainlabels.append(0) #Natural Label #Create SVM svm = cv2.ml.SVM_create() svm.train(np.array(traindata), cv2.ml.ROW_SAMPLE, np.array(trainlabels)) #svm.train(np.array(traindata), np.array(trainlabels)) #knn = KNeighborsClassifier(n_neighbors=5) #knn.fit(traindata, np.array(trainlabels)) #man_made=0 #natural=0 #total=0 results = [] labelMAN = [] labelNAT = [] compute(manmade_src_dir_test, results) compute(natural_src_dir_test, results) #Prepare labels
sum = 0 for j in range(len(alpha)): sum += alpha[j] * labels[j] * self.calcK(data, dataSet[j]) #g(x)的计算 return sum + b if __name__ == '__main__': clf = svm.SVC(kernel='linear', C=1.0) # class dataSet, labels = loadDataSet() clf.fit(dataSet, labels) # training the svc model w = clf.coef_[0] print("w = ", end='') print(w) print("b = ", end='') print(clf.intercept_) svm = SVM() weights, b = svm.train(dataSet, labels) # weights, b = svm.train(dataSet[:80], labels[:80]) print(weights, b) x = [1, 2] for i, x in enumerate(dataSet[:80]): result = svm.predict(x) if (int(labels[i]) == result): print(True) else: print(False) # print(result)
def main(): # 学習済みモデルデータの出力先パス SAVE_TRAINED_DATA_PATH = '/Users/panzer5/github/sample/python/scikit/svm/ex5_data/train.learn' # グラフ出力先パス SAVE_GRAPH_IMG_PATH = '/Users/panzer5/github/sample/python/scikit/svm/ex5_data/graph_x2_x3.png' # SVMのパラメータ GAMMA = 0.1 C = 1 KERNEL = "rbf" # クラスのデータとプロット時に割り当てる色 CLASS_DATAS = [0, 1, 2] CLASS_COLORS = ["blue", "red", "green"] svm = SVM() # 学習用のデータを読み込み(Irisデータセットを利用) iris_dataset = load_iris() # 説明変数(学習データ)を抽出 X = iris_dataset.data # 目的変数:アヤメの品種('setosa'=0 'versicolor'=1 'virginica'=2) y = iris_dataset.target X1 = np.vstack((X[:, :1])) #sepal length(ガクの長さ)を取得 X2 = np.vstack((X[:, 1:2])) #sepal width(ガクの幅)を取得 X3 = np.vstack((X[:, 2:3])) #petal length(花弁の長さ)を取得 X4 = np.vstack((X[:, 3:4])) #petal width(花弁の幅)を取得 # 学習に使用する説明変数を選択 X = np.hstack((X1, X2, X3, X4)) #X = np.hstack((X1, X2)) #X = np.hstack((X2, X3)) #X = np.hstack((X3, X4)) # 説明変数のデータを、学習用データと検証用データに分割(学習用90%、検証用10%、シャッフルする) train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1, shuffle=True) print("train_X size:", train_X.shape) print("train_y size:", train_y.shape) print("test_X size:", test_X.shape) print("test_y size:", test_y.shape) # 学習済みモデルの作成 svm.train(save_trained_data_path = SAVE_TRAINED_DATA_PATH, train_X=train_X, train_y=train_y, gamma = GAMMA, C = C, kernel = KERNEL) # 学習済みモデルの検証 svm.test( load_trained_data_path = SAVE_TRAINED_DATA_PATH, test_X=test_X, test_y=test_y) # 学習済みモデルを使って予測 predict_y = svm.predict( load_trained_data_path = SAVE_TRAINED_DATA_PATH, test_X=test_X) print("test_X:", test_X) print("predict_y:", predict_y) """ # グラフにプロットして決定境界を可視化(説明変数2つで学習したときのみ利用可能) svm.plot2d(load_trained_data_path = SAVE_TRAINED_DATA_PATH, save_graph_img_path = SAVE_GRAPH_IMG_PATH, train_X=train_X, train_y=train_y, class_datas = CLASS_DATAS, class_colors = CLASS_COLORS, x1_name = "x2", x2_name = "x3", fig_size_x = 10, fig_size_y = 10, lim_font_size = 25, ) """ """