def mainTest(X_train, X_test, y_train, y_test, k): print("--Test 1--") M = 3 # PCA Work print("\nTraining data:") comp_1 = pca.pca(X_train, M) X_train_t = pca.transform(X_train, comp_1) print("\nTesting data:") comp_2 = pca.pca(X_test, M) X_test_t = pca.transform(X_test, comp_2) # Print base results. print("\nBefore PCA - Dim ", len(X_train[0])) classifier = svm.train(X_train, y_train, k, C=None) info = svm.classify(classifier, X_test, return_sums=True) printResults(info[1], y_test, info[0]) # Print transformed results. print("After PCA - Dim ", M) X_train = X_train_t X_test = X_test_t classifier = svm.train(X_train, y_train, k, C=None) info = svm.classify(classifier, X_test, return_sums=True) printResults(info[1], y_test, info[0])
def train_model(images, mask_list, k_size, probability): """ Train model with list of images """ logging.info('Calculating, normalizing feature vectors for %d image(s)', len(images)) vectors_list = [ calculate_features(x.image, x.fov_mask, mask_list, k_size) for x in images ] truth_list = [x.truth for x in images] logging.info('Training model with %d image(s)', len(images)) svm.train(vectors_list, truth_list, probability) # Train SVM, lengthy process
def main(): m = 350 random.seed(2) X = np.empty([m, 2]) X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) #not separable y = np.empty([m, 1]) for i in range(X.shape[0]): y[i] = func2(X[i, :]) #plot data and decision surface ax = pu.plot_data(X, y) pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax) plt.show() #train svm #change c to hard/soft margins w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.1) #plot result predicted_labels = svm.classify_all(X, w, w0) print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels))) ax = pu.plot_data(X, y, support_vectors_idx) pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax) plt.show()
def main(): m=100 X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) # preprocessing.scale(X) #linearly separable y = np.empty([m,1]) for i in range(m): y[i] = func(X[i,]) #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm w,w0, support_vectors_idx = svm.train(X,y,c=999999999999999, eps=10, type='gaussian') # w, w0, support_vectors_idx = svm.train(X, y, c=999999999999999, eps=10, type='polynomial') #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def main(): dic = {'a':0, 'b':1, '?':-1} data = np.genfromtxt('Data/credits.data', skip_header=True, delimiter=',', usecols=[0,1,2,7,10,15],converters={0: lambda s: dic[s]}) use = [k for k in range(len(data)) if data[k][0] != -1 and (not math.isnan(data[k][1]))] data = data[use] X = np.empty([len(data),5]) y = np.empty([len(data), 1]) for i in range(len(data)): for j in range(len(data[i])-1): X[i,j] = data[i][j] y[i] = data[i][5] # preprocessing.scale(X[:,1]) #train svm w,w0, support_vectors_idx = svm.train(X[:,range(1,5)],y,c=10, eps=1) #plot result predicted_labels = svm.classify_all(X[:,range(1,5)],w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))
def save_classifier(): """ Is used for saving classifier as pickle. """ clf = train(x_train_points, train_data.labels) with open('./data/classifier.pkl', 'wb') as f: pickle.dump(clf, f)
def train(data, attributes, labels, k): trees = [] for i in range(k): new_data = data sample = data.raw_data[np.random.choice(data.raw_data.shape[0], 100, replace=True)] new_data.raw_data = sample numbers = np.random.randint(1, data.raw_data.shape[1] - 1, size=50) features = copy.deepcopy(attributes) for attr in attributes: if int(attr) not in numbers: del features[attr] new_data.attributes = features tree = dt.id3(new_data, features, labels) pruned = dt.pruning_tree(tree, 1) trees.append(pruned) err, depth = dt.report_error(new_data, pruned) transformed_data = np.zeros((data.raw_data.shape[0], k + 1)) labels = [] for row, test in enumerate(data.raw_data): transformed_data[row, 0] = test[0] for col, tree in enumerate(trees, 1): label = dt.predict(data, test, tree) transformed_data[row, col] = int(label) labels.append(int(label)) labels.append(1) lbls = transformed_data[:, 0] w, a, lab = svm.train(transformed_data, lbls, k) return a, lab
def main(): data = pandas.read_csv("Data/car.data", sep=",", header=0, index_col=False) data = pandas.get_dummies(data) arr = data.as_matrix() use = [k for k in range(arr.shape[0]) if (arr[k, 0] == -1 or arr[k, 0] == 1)] arr = arr[use] X = arr[:, range(1, 22)] y = arr[:, 0] # normalize # X = preprocessing.scale(X) # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] # train svm w, w0, support_vectors_idx = svm.train(X, y, c=99, eps=0.00001) # get accuracy predicted_labels = svm.classify_all(X, w, w0) print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels))) # # evaluate performance kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001) print(kfold) # evaluate performance with gaussina kernel function kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001, type="gaussian") print(kfold) # evaluate performance with polynomial kernel function kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001, type="polynomial") print(kfold)
def train(train_x, train_y): ''' this is a function to train all classifiers ''' tree_start = time.time() tree_clf = tree.train(train_x, train_y) print('Decision Tree - Training Time: ', round(time.time() - tree_start, 3), 's') svm_start = time.time() svm_clf = svm.train(train_x, train_y) print('SVM - Training Time: ', round(time.time() - svm_start, 3), 's') knn_start = time.time() knn_clf = knn.train(train_x, train_y) print('k-NN - Training Time: ', round(time.time() - knn_start, 3), 's') nn_start = time.time() nn_clf = nn.train(train_x, train_y) print('Neural Network - Training Time: ', round(time.time() - nn_start, 3), 's') boost_start = time.time() boost_clf = boost.train(train_x, train_y) print('Boosted Tree - Training Time: ', round(time.time() - boost_start, 3), 's') return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]
def main(): m=350 random.seed(2) X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) #not separable y = np.empty([m,1]) for i in range(X.shape[0]): y[i] = func2(X[i,:]) #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm #change c to hard/soft margins w,w0, support_vectors_idx = svm.train(X,y,c=99999,eps=0.1) #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def train(folds_x, folds_y): ''' this is a function to train all classifiers ''' tree_clf = tree.train(folds_x, folds_y) svm_clf = svm.train(folds_x, folds_y) knn_clf = knn.train(folds_x, folds_y) nn_clf = nn.train(folds_x, folds_y) boost_clf = boost.train(folds_x, folds_y) return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf] #
def main(): print 'program start:', datetime.datetime.now() #Define our connection string conn_string = "host='52.74.79.13' dbname='sammy' user='******' password='******'" # print the connection string we will use to connect print "Connecting to database\n ->%s" % (conn_string) # get a connection, if a connect cannot be made an exception will be # raised here conn = psycopg2.connect(conn_string) # conn.cursor will return a cursor object, you can use this cursor to # perform queries cursor = conn.cursor() print "Connected!\n" cursor.execute( "select vi.vid, tf.*, vi.duration, tl.grade from train_features tf inner join \ video_info vi on tf.video_id = vi.video_id \ inner join train_label tl on tl.user_id = tf.user_id \ order by user_id, event_time;") # where tf.user_id in ('ff930d24cbdeb11e6dde8ceb0da5ac64', 'eee1df0fff33a37873990992bed20e82') \ records = cursor.fetchall() print('fetch train data done, ', datetime.datetime.now()) svm_trainset = createFeatures(records, True) cursor.execute( "select vi.vid, tf.*, vi.duration from test_features tf inner join \ video_info vi on tf.video_id = vi.video_id \ order by user_id, event_time;") # where tf.user_id in ('a74fe6d4812fa93a1afa1a6a334ebdda', '4ab9d6eadf7510198f468d10fc29f689', '55654c092cd47b64ec9860f6a9cf3b40') \ records = cursor.fetchall() print('fetch test data done, ', datetime.datetime.now()) svm_testset = createFeatures(records, False) svm.train(svm_trainset['featureList'], svm_trainset['labelList']) svm.classify(svm_testset['featureList'], svm_testset['userList']) print('program finish', datetime.datetime.now())
def main(): data = load_data() train, test = split(data) best_c = svm.optimize_regularization(data) print() print("Best C: %.5f" % best_c) theta = svm.train(train, c=best_c) result = svm.testing(test, theta) print("Total error: %.5f" % result['er']) print("Precision: %.5f" % result['pre']) print("Recall: %.5f" % result['rec']) print("F1-metric: %.5f" % result['f1'])
def main(): # Get training and testing data data = sp.io.loadmat('../data/mnist2.mat') # training n, p = np.shape(data['xtrain']) w0 = np.zeros(p) T = 200 l = 0.1 for t in range(1, T + 1): w = svm.train(w0, data['xtrain'], data['ytrain'], t * n, l) print('Train Accuracy: {0}, Test Accuracy: {1}'.format( accuracy(data['xtrain'], data['ytrain'], w), accuracy(data['xtest'], data['ytest'], w)))
def main(): parser = argparse.ArgumentParser(description='Run SVM and Perceptron algorithms of Adult Data Set.') parser.add_argument('Training_filename', help='Training file') parser.add_argument('Test_filepath', help = 'Test File') args = parser.parse_args() dev = args.Training_filename test = args.Test_filepath print "Loading the data files...\n" X,Y = matrixbuild(args.Training_filename) DX,DY = matrixbuild(dev) TX, TY = matrixbuild(test) print "Training for the perceptron.\n" perc_weights = perceptron1.gradienttrain(X,Y,100) print "\n\nChecking accuracy on the test set.\n" perc_accuracy = perceptron1.classify(TX, TY, perc_weights) print ("The accuracy of the perceptron on the test set was %s%%\n" % perc_accuracy) #-------------Run the SVM algorithm------------# # find_c(dev_matrix,dev_classes, runs_each, learn) print "Finding best c from the dev set...\n" c, c_accuracy, c_list = svm.find_c(DX, DY, 20, 0.5) # train(data_matrix, real_classes, runs, learn, cost) print ("\n\nTraining for the SVM with C = %f\n" % c) acc, svm_weights, b = svm.train(X, Y, 100, 0.5, c, "train") # classify(test_matrix, test_class, weights, b) print "\n\nChecking accuracy on the test set.\n" svm_accuracy = svm.classify(TX,TY,svm_weights, b) print ("The accuracy of the SVM on the test set was %s%%" % svm_accuracy) plt.plot(c_list, c_accuracy) plt.xlabel("Cost value") plt.ylabel("Accuracy") plt.title("C vs. Accuracy") plt.show()
def main(): data = pandas.read_csv('Data/credits.data', sep=',', header=0, index_col=False) data = pandas.get_dummies(data) arr = data.as_matrix() X = arr[:,range(0,6) + range(7,47)] y = arr[:,6] # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] #train svm # w,w0, support_vectors_idx = svm.train(X[:,[0,1,2,3,4,5,6,7]],y,c=999, eps=0.000001) w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.000000001) #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001) print (kfold)
def main(): m=150 random.seed(2) X = np.empty([m,2]) X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) preprocessing.scale(X) #linearly separable y = np.empty([m,1]) for i in range(m): y[i] = func(X[i,]) # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] #plot data and decision surface ax = pu.plot_data(X,y) pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax) plt.show() #train svm w,w0, support_vectors_idx = svm.train(X,y,c=9999, eps=0.000001) #plot result predicted_labels = svm.classify_all(X,w,w0) print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels))) kfold = svm.kfoldCrossValidation(X,y,10,1,c=999999999,eps=0.000001) print (kfold) ax = pu.plot_data(X,y, support_vectors_idx) pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax) plt.show()
def test(thrsh, k, i, da): print(i, end=" ") # Train and predict values. classifier = svm.train(da[0], da[2], k, threshold=thrsh) info = svm.classify(classifier, da[1], return_sums=True) y_pred = info[0] sums = info[1] # Print percentage success percent = 1 - np.mean(y_pred != da[3].T) if (percent > .5): print(colored("{:.2f}\t".format(percent), 'green'), end=" ") elif (percent > .01): print(colored("{:.2f}\t".format(percent), 'blue'), end=" ") else: print(colored("{:.2f}\t".format(percent), 'red'), end=" ") if i % 4 == 0: print() return percent, y_pred, sums
def main(): m = 150 random.seed(2) X = np.empty([m, 2]) X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000) preprocessing.scale(X) #linearly separable y = np.empty([m, 1]) for i in range(m): y[i] = func(X[i, ]) # shuffle p = np.random.permutation(len(X)) X = X[p] y = y[p] #plot data and decision surface ax = pu.plot_data(X, y) pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax) plt.show() #train svm w, w0, support_vectors_idx = svm.train(X, y, c=9999, eps=0.000001) #plot result predicted_labels = svm.classify_all(X, w, w0) print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels))) kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=999999999, eps=0.000001) print(kfold) ax = pu.plot_data(X, y, support_vectors_idx) pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax) plt.show()
def train(model,case_type,number=1): average_accuracy=0 test_accuracy=list() if model=='svm': import svm x_train,y_train,x_test,y_test=svm.loadText(case_type) for _ in range(int(number)): test_accuracy.append(svm.train(x_train,y_train,x_test,y_test)) average_accuracy=average_accuracy+test_accuracy[_] average_accuracy=average_accuracy/int(number) print("aveage accuract:" +str(average_accuracy)) elif model=='cnn': import cnn train_data, test_data, train_label, test_label, vocab = cnn.get_data(case_type,mode='sequence') for _ in range(int(number)): test_accuracy.append(cnn.train_model(case_type,train_data, test_data, train_label, test_label, vocab)) average_accuracy=average_accuracy+test_accuracy[_] elif model=='lstm': import lstm train_data, test_data, train_label, test_label, vocab = lstm.get_data(case_type,mode='sequence') for _ in range(int(number)): test_accuracy.append(lstm.train_model(case_type,train_data, test_data, train_label, test_label, vocab)) average_accuracy=average_accuracy+test_accuracy[_] elif model=='keras_text_cnn': import keras_text_cnn as text_cnn train_data, test_data, train_label, test_label, vocab = text_cnn.get_data(case_type,mode='sequence') for _ in range(int(number)): test_accuracy.append(text_cnn.train_model(case_type,train_data, test_data, train_label, test_label, vocab)) average_accuracy=average_accuracy+test_accuracy[_] average_accuracy=average_accuracy/int(number) print("aveage accuract:" +str(average_accuracy)) with open(file='D:/judgement_prediction/judgement_prediction/'+case_type+'/information.txt', mode="a",encoding='utf-8') as target_file: target_file.write(case_type) for i in range(int(number)): target_file.write(str(test_accuracy[i])+' ') target_file.write(',average:'+str(average_accuracy)+'\n')
import numpy as np import svm import kernel as k # Test AND gate clsfyr = svm.train([[1, 1], [1, -1], [-1, 1], [-1, -1]], [1, -1, -1, -1], k.linear) # should be [0,0,0,1,1,0,0,0] print("classified: " + str( svm.classify(clsfyr, [[-1, -1], [1, -1], [-1, 1], [1, 1], [1, 1], [1, -1], [-1, -1], [-1, 1]]))) print("\n\n\n\n\n") X = np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0], [-1.0, 0.0], [-2.0, 0.0], [-3.0, 0.0]]) y = np.array([[1.0], [1.0], [1.0], [-1.0], [-1.0], [-1.0]]) clsfyr = svm.train(X, y, k.linear) print("classified: " + str(svm.classify(clsfyr, X)))
return numpy.array(features) def kim_tfidf_ngrams(filename): return uni_features, bi_features def many_sentiment(filename): return sentiment.get_sentiment_counts(filename) if __name__ == "__main__": train_file = '/home/ak/Courses/cs73/project/dataset/small_train.txt' kim = kim_pos(train_file) # 5 features zhang = zhang_pos(train_file) # 7 features sent = many_sentiment(train_file) # 2 features X_train = numpy.hstack((kim, zhang, sent)) t_train = svm.compile_targets(train_file) model = svm.train(X_train, t_train) test_file = '/home/ak/Courses/cs73/project/dataset/small_test.txt' kim = kim_pos(test_file) # 5 features zhang = zhang_pos(test_file) # 7 features sent = many_sentiment(test_file) # 2 features X_test = numpy.hstack((kim, zhang, sent)) t_test = svm.compile_targets(test_file) y_pred = svm.test(model, X_test) metrics.run_classification_metrics(t_test, y_pred)
def main(args): w2v_model = encode.train(args.log_name, min_count=1) svm.train(args.log_name, args.normal_traces, w2v_model)
cnn_test = cnndata['vin_testing'] cnn_test_extracted = [cnn_test[vin] for vin in tstset] cnn_recordTest = cnndata['record_testing'] cnn_rTdata = np.asarray(map(lambda x: x['data'], cnn_recordTest)) cnn_rt_length = len(cnn_rTdata) cnn_rT_data = cnn_rTdata.reshape(cnn_rt_length, 576) cnn_rT_label = np.asarray(map(lambda x: x['label'], cnn_recordTest)).reshape(cnn_rt_length, 2) #print "training set" #print trset #print "testing set" #print tstset svm_tst = svm.train(svm_tr_set_feature, svm_tr_set_label, svm_tst_set_feature, svm_tst_set_label, modelFolder, svm_th) cnn_tst = cnn.train(cnn_train, cnn_test, cnn_rT_data, cnn_rT_label, modelFolder, cnn_th) svm_th = max(svm_th, svm_tst) cnn_th = max(cnn_th, cnn_tst) print "=========testing phase=========" s = svm.classify(svm_tst_set_feature, modelFolder) c = cnn.classify("trained/" + modelFolder + "/cnnmodel.ckpt", cnn_test_extracted) #print "svm prediction: " #print s #print "cnn prediction" #print c compound = zip(s, c)
x[ix] = oldval - h fxmh = f(x) x[ix] = oldval grad_numerical = (fxph - fxmh) / (2 * h) grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) #现在我们对加入了正则项的梯度进行检验 loss, grad = svm.svm_loss_naive(w,x_dev,y_dev,0.0) f = lambda w:svm.svm_loss_naive(w,x_dev,y_dev,0.0)[0] grad_numerical = grad_check_sparse(f,w,grad) # 模型进行测试 svm = LinearSVM() #创建对象,此时W为空 tic = time.time() loss_hist = svm.train(x_train,y_train,learning_rate = 1e-7,reg = 2.5e4,num_iters = 1500,verbose = True) #此时svm对象中有W toc = time.time() print('that took %fs' % (toc -tic)) plt.plot(loss_hist) plt.xlabel('iteration number') plt.ylabel('loss value') plt.show() #训练完成之后,将参数保存,使用参数进行预测,计算准确率 y_train_pred = svm.predict(x_train) print('training accuracy: %f'%(np.mean(y_train==y_train_pred))) y_val_pred = svm.predict(x_val) print('validation accuracy: %f'%(np.mean(y_val==y_val_pred))) ''' #在拿到一组数据时一般分为训练集,开发集(验证集),测试集。训练和测试集都知道是干吗的,验证集在除了做验证训练结果外 # 还可以做超参数调优,寻找最优模型。遍历每一种参数组合,训练SVM模型,然后在验证集上测试,寻找验证集上准确率最高的模型
def bootstrapping(B, X, y, C): accuracy = np.zeros(B) precision = np.zeros(B) recall = np.zeros(B) specificity = np.zeros(B) n, d = X.shape bs_err = np.zeros(B) for b in range(B): train_samples = list(np.random.randint(0, n, n)) test_samples = list(set(range(n)) - set(train_samples)) # train the model theta = svm.train(X[train_samples], y[train_samples], C) testSet = X[test_samples] testLabels = y[test_samples] n2, d2 = testSet.shape tp = 0 tn = 0 fp = 0 fn = 0 for j in xrange(n2): # extract the test point and test label test_point = testSet[j, :].T test_label = testLabels[j] # count if the test was good or not # test the model testResult = svm.test(theta, test_point) if testResult == 1 and test_label == 1: tp += 1 if testResult == 1 and test_label == -1: fp += 1 if testResult == -1 and test_label == 1: fn += 1 if testResult == -1 and test_label == -1: tn += 1 #print 'tp, tn, fp, fn' #print tp, tn, fp, fn #print '' try: accuracy[b] = float(tp + tn) / float(fn + fp + tp + tn) except ZeroDivisionError: accuracy[b] = 0.0 try: recall[b] = float(tp) / float(tp + fn) except ZeroDivisionError: recall[b] = 0.0 try: precision[b] = float(tp) / float(tp + fp) except ZeroDivisionError: precision[b] = 0.0 try: specificity[b] = float(tn) / float(tn + fp) except ZeroDivisionError: specificity[b] = 0.0 error = np.ones(B) error -= accuracy return accuracy, error, recall, precision, specificity return bs_err
word = email[i] count= int(email[i+1]) arr.append(count) i+=2 x[itr] = arr if(len(arr)>max_len): max_len=len(arr) line = f.readline() total-=1 if(total==0): break f.close() trained = svm.train(lebel,x,'-t 0') label_test ={} x_test ={} ftest = open('../data/test','r') line = ftest.readline() itr =0 max_len =0 while(line): itr=itr+1 email = line.split(' ') if(email[1]=='ham'): label_test[itr] =-1
train_label_1 = y_1[0:s1] train_data_2 = x_2[0:s2] train_label_2 = y_2[0:s2] test_data_1 = x_1[s1:] test_label_1 = y_1[s1:] test_data_2 = x_2[s2:] test_label_2 = y_2[s2:] #generate traing data x = np.concatenate((train_data_1,train_data_2), axis=0) y = np.concatenate((train_label_1,train_label_2), axis=0) #traning a model svm_rbf = svm.train(x,y,"rbf") svm_linear = svm.train(x,y,"linear") w,mean = linear.train(x,y) #generate test data test_data = np.concatenate((test_data_1,test_data_2), axis=0) test_label = np.concatenate((test_label_1,test_label_2), axis=0) #prediction svm_rbf_label = svm.test(test_data,svm_rbf) linear_label = linear.test(test_data,w,mean) svm_linear_label = svm.test(test_data,svm_linear) #get result svm_rbf_error = error_rate(test_label,svm_rbf_label) linear_error = error_rate(test_label,linear_label)
grad_numerical=grad_check_sparse(f,w,grad) tic=time.time() loss_naive,grad_naive=svm.svm_loss_naive(w,x_dev,y_dev,0.00001) toc=time.time() print('naive loss: %e computed in %f s' % (loss_naive,toc-tic)) tic=time.time() loss_vectorized,grad_vectorized=svm.svm_loss_vectorized(w,x_dev,y_dev,0.00001) toc=time.time() print('vectoried loss: %e computed in %f s' % (loss_vectorized,toc-tic)) print('difference: %f ' % (loss_naive-loss_vectorized)) svm=LinearSVM() tic=time.time() loss_hist=svm.train(x_train,y_train,learning_rate=1e-7, reg=5e4,num_iters=1500,verbose=True) toc=time.time() print('that took %f s' % (toc-tic)) y_train_pred=svm.predict(x_train) print('training accuracy: %f ' % (np.mean(y_train==y_train_pred))) y_val_pred=svm.predict(x_val) print('validation accuracy : %f '% (np.mean(y_val==y_val_pred))) learning_rates=[1.4e-7,1.5e-7,1.6e-7] regularization_strengths=[(1+i*0.1)*1e4 for i in range(-3,3)]+[(2+0.1*i)*1e4 for i in range(-3,3)] results={} best_val=-1 best_svm=None for learning in learning_rates: for regularization in regularization_strengths:
globals.test_feature_vec[1].extend([subject] * len(histograms)) print('Time:', timer, '\n', file = globals.file) # Print print('Done!\n') # Print print('Training Support Vector Machine Model\n') # Train SVM Model print('Training %s SVM Models\n' % arguments.descriptor, file = globals.file) # SVM Model SVM = svm.train(gama = 0.001, descriptor_name = arguments.descriptor, model_name = 'SVM') # Print print('Done!\n') # Print print('Testing Support Vector Machine Model\n') # Test SVM Model print('Testing %s SVM Model\n' % arguments.descriptor, file = globals.file) # SVM Model SVM_predict = svm.test(model = SVM, descriptor_name = arguments.descriptor, model_name = 'SVM')
data = main() train_x, train_y, test_x, test_y = split(data, 0.4) c = nw.get_c(train_x, train_y) w1, w2 = nw.train(train_x, train_y, c) pnw, rnw = nw.test(test_x, test_y, w1, w2) enw = 2 * pnw * rnw / (pnw + rnw) print("nw:") print("\tF1 %.3f " % enw) print("\tprecision %.3f, recall %.3f" % (pnw, rnw)) c = svm.get_c(train_x, train_y) tsvm = svm.train(train_x, train_y, c) psvm, rsvm = svm.test(test_x, test_y, tsvm) esvm = 2 * psvm * rsvm / (psvm + psvm) print("svm:") print("\tF1 %.3f " % esvm) print("\tprecision %.3f, recall %.3f" % (psvm, rsvm)) tp = perceptrone.train(train_x, train_y) pp, rp = perceptrone.test(test_x, test_y, tp) ep = 2 * pp * rp / (pp + rp) print("lp:") print("\tF1 %.3f " % ep) print("\tprecision %.3f, recall %.3f" % (pp, rp))
train_y = [1.0 if x[1] == 'M' else -1.0 for x in temp_data[:b]] test_x = [numpy.array([float(i) for i in x[2:]]) for x in temp_data[b:]] test_y = [1.0 if x[1] == 'M' else -1.0 for x in temp_data[b:]] return train_x, train_y, test_x, test_y def main(): f = open('wdbc.data') lines = f.readlines() data = [x for x in lines] return data data = main() train_x, train_y, test_x, test_y = split(data, 0.4) c = svm.get_c(train_x, train_y) tsvm = svm.train(train_x, train_y, c) psvm, rsvm = svm.test(test_x, test_y, tsvm) esvm = 2 * psvm * rsvm / (psvm + psvm) print("svm:") print("\tF1 %.3f " %esvm) print("\tprecision %.3f, recall %.3f" %(psvm, rsvm)) tp = perceptrone.train(train_x, train_y) pp, rp = perceptrone.test(test_x, test_y, tp) ep = 2 * pp * rp / (pp + rp) print("lp:") print("\tF1 %.3f " %ep) print("\tprecision %.3f, recall %.3f" %(pp, rp))
def cross_validation(X, y, foldcount, C): accuracy = np.zeros(foldcount) precision = np.zeros(foldcount) recall = np.zeros(foldcount) specificity = np.zeros(foldcount) n, d = X.shape # extract k folds from the data split = cross_validation_split(y, foldcount) # running k fold x validation for j in xrange(foldcount): # breaking up the folds into train and test trainInd = [] testInd = split[j] for i in xrange(foldcount): if j == i: continue trainInd += split[i] # construct the training and testing sets trainSet = X[trainInd] trainLabels = y[trainInd] testSet = X[testInd] testLabels = y[testInd] # train the model theta = svm.train(trainSet, trainLabels, C) n = len(testInd) # Matt is terrible # getting information on the statistical results tp = 0 tn = 0 fp = 0 fn = 0 for i in xrange(n): # extract the test point and test label test_point = testSet[i] test_label = testLabels[i] # count if the test was good or not # test the model testResult = svm.test(theta, test_point) if testResult == 1 and test_label == 1: tp += 1 if testResult == 1 and test_label == -1: fp += 1 if testResult == -1 and test_label == 1: fn += 1 if testResult == -1 and test_label == -1: tn += 1 # making sure there are no zero denominators # probably unnecessary but just in case #print 'tp, tn, fp, fn' #print tp, tn, fp, fn #print '' try: accuracy[j] = float(tp + tn) / float(fn + fp + tp + tn) except ZeroDivisionError: accuracy[j] = 0.0 try: recall[j] = float(tp) / float(tp + fn) except ZeroDivisionError: recall[j] = 0.0 try: precision[j] = float(tp) / float(tp + fp) except ZeroDivisionError: precision[j] = 0.0 try: specificity[j] = float(tn) / float(tn + fp) except ZeroDivisionError: specificity[j] = 0.0 error = np.ones(foldcount) error -= accuracy return accuracy, error, recall, precision, specificity
for tr, tst in kf: cv += 1 print "cross validation fold %d" % (cv) trvin = vinlist[tr] tstvin = vinlist[tst] svmtrain = filter(lambda x: x['vin'] in trvin, svmdata) svmtest = filter(lambda x: x['vin'] in tstvin, svmdata) cnntrain = {} cnntest = {} for k in cnndata.keys(): if (k in trvin): cnntrain[k] = cnndata[k] if (k in tstvin): cnntest[k] = cnndata[k] svm.train(svmtrain) cnn.train(cnntrain) svmclassify = svm.classify(svmtest) svmres = svmclassify['detail'] svmacc = svmclassify['accuracy'] cnnclassify = cnn.classify(cnntest) cnnres = cnnclassify['detail'] cnnacc = cnnclassify['accuracy'] print "standalone classifier accuracy: svm -- %f , cnn -- %f" % (svmacc, cnnacc) pred = {} for each in svmres: vin = each['vin'] svm_proba = each['proba_predicted']
def main(): train_file = '/home/ak/Courses/cs73/project/dataset/small_train.txt' test_file = '/home/ak/Courses/cs73/project/dataset/small_test.txt' sent_included = False train_feats = [] test_feats = [] if 'k' in sys.argv: kim_train, kim_test = kim_features(train_file, test_file) train_feats.append(kim_train) test_feats.append(kim_test) if not sent_included: train_feats.append(many_sentiment(train_file)) test_feats.append(many_sentiment(test_file)) sent_included = True if 'o' in sys.argv: train_feats.append(omahony_features(train_file)) test_feats.append(omahony_features(test_file)) if not sent_included: train_feats.append(many_sentiment(train_file)) test_feats.append(many_sentiment(test_file)) sent_included = True if 'l' in sys.argv: train_feats.append(liu_features(train_file)) test_feats.append(liu_features(test_file)) if not sent_included: train_feats.append(many_sentiment(train_file)) test_feats.append(many_sentiment(test_file)) sent_included = True if 'z' in sys.argv: train_feats.append(zhang_features(train_file)) test_feats.append(zhang_features(test_file)) sent_included = True if not sent_included: train_feats.append(many_sentiment(train_file)) test_feats.append(many_sentiment(test_file)) sent_included = True if 't' in sys.argv: tfidf_train, tfidf_test = tfidf_ngrams(train_file, test_file, with_lsi=False) train_feats.append(tfidf_train) test_feats.append(tfidf_test) if 's' in sys.argv: train_feats.append(many_sentiment(train_file)) test_feats.append(many_sentiment(test_file)) if 'tl' in sys.argv: tfidf_train, tfidf_test = tfidf_ngrams(train_file, test_file, with_lsi=True) train_feats.append(tfidf_train) test_feats.append(tfidf_test) if 'bp' in sys.argv: train_feats.append(kim_pos(train_file)) test_feats.append(kim_pos(test_file)) X_train = None X_test = None if len(train_feats) > 1: X_train = scipy.sparse.hstack(train_feats) X_test = scipy.sparse.hstack(test_feats) else: X_train = train_feats[0] X_test = test_feats[0] svm.normalize(X_train) svm.normalize(X_test) # Classification # SV t_train_thresh = svm.compile_targets(train_file) t_test_thresh = svm.compile_targets(test_file) clf = ExtraTreesClassifier() X_new = clf.fit(X_train.toarray(), t_train_thresh).transform(X_train) if clf.feature_importances_.shape[0] < 500: for i in xrange(clf.feature_importances_.shape[0]): print i, clf.feature_importances_[i] '''bsvm = SVC(kernel="linear") selector = RFECV(bsvm, step=10) selector.fit(X_train, t_train_thresh) print selector.support_ print selector.ranking_ raw_input()''' class_model = None y_pred = None if 'rf' not in sys.argv: class_model = svm.train(X_train, t_train_thresh) y_pred = svm.test(class_model, X_test) else: class_model = rfc.train(X_train.todense(), t_train_thresh) y_pred = rfc.test(class_model, X_test.todense()) metrics.run_classification_metrics(t_test_thresh, y_pred) print # Regression # SVR t_train = svr.compile_targets(train_file) t_test = svr.compile_targets(test_file) if 'rf' not in sys.argv: reg_model = svr.train(X_train, t_train) y_pred = svr.test(reg_model, X_test) else: reg_model = rfr.train(X_train.todense(), t_train) y_pred = rfr.test(reg_model, X_test.todense()) #for i in xrange(X_test.shape[0]): # print y_pred[i], t_train[i] metrics.run_regression_metrics(t_test, y_pred) show_regression(y_pred, t_test)