def _test_svm(): import svm # Global variables global X_train global X_val global X_test global y_train global y_val global y_test # Train the SVM svm = svm.LinSVM() tic = time.time() losses = svm.train_sgd(X_train, y_train, eta=1e-7, reg=5e4, epochs=1500, s=250, verbose=True) toc = time.time() print "that took %fs" % (toc - tic) # Plot the training error plt.plot(losses) plt.xlabel("Iteration number") plt.ylabel("Loss value") plt.show() # Get the training and validation accuracy y_train_pred = svm.predict(X_train) print "training accuracy: %f" % (np.mean(y_train == y_train_pred),) y_val_pred = svm.predict(X_val) print "validation accuracy: %f" % (np.mean(y_val == y_val_pred),) # Visualize the weights classes = [str(i) for i in range(svm.W.shape[0])] misc.viz_weights_categorical(svm.W, classes, (28, 28))
def testBayesError(self): dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/" data = numpy.load(dataDir + "toyData.npz") gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"] sampleSize = 100 trainX, trainY = X[0:sampleSize, :], y[0:sampleSize] testX, testY = X[sampleSize:, :], y[sampleSize:] #We form a test set from the grid points gridX = numpy.zeros((gridPoints.shape[0]**2, 2)) for m in range(gridPoints.shape[0]): gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m] Cs = 2**numpy.arange(-5, 5, dtype=numpy.float) gammas = 2**numpy.arange(-5, 5, dtype=numpy.float) bestError = 1 for C in Cs: for gamma in gammas: svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma) svm.learnModel(trainX, trainY) predY, decisionsY = svm.predict(gridX, True) decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F") error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X) predY, decisionsY = svm.predict(testX, True) error2 = Evaluator.binaryError(testY, predY) print(error, error2) if error < bestError: error = bestError bestC = C bestGamma = gamma svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma) svm.learnModel(trainX, trainY) predY, decisionsY = svm.predict(gridX, True) plt.figure(0) plt.contourf(gridPoints, gridPoints, decisionGrid, 100) plt.colorbar() plt.figure(1) plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1") plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1") plt.legend() plt.show()
def predict(): if not request.json: flask.abort(400) data = {"success": False} imgbase64 = request.json['imgbase64'] model_typ = request.json['model'] start = time.process_time() if model_typ == 'cnn': lp, roi, rmb = cnn.predict(cnn_model,str(imgbase64)) if lp is not None: data['success'] = True data['lp'] = lp data['roi'] = roi data['rmb'] = rmb else: data['model'] = model_typ lp, roi, rmb = svm.predict(svm_model,str(imgbase64)) if lp is not None: data['success'] = True data['lp'] = lp data['roi'] = roi data['rmb'] = rmb end = time.process_time() - start data['time'] = end return jsonify(data)
def _test_svm(): import svm # Global variables global X_train global X_val global X_test global y_train global y_val global y_test # Train the SVM svm = svm.LinSVM() tic = time.time() losses = svm.train_sgd(X_train, y_train, eta=1e-7, reg=5e4, epochs=1500, s=250, verbose=True) toc = time.time() print 'that took %fs' % (toc - tic) # Plot the training error plt.plot(losses) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Get the training and validation accuracy y_train_pred = svm.predict(X_train) print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), ) y_val_pred = svm.predict(X_val) print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), ) # Visualize the weights classes = [str(i) for i in range(svm.W.shape[0])] misc.viz_weights_categorical(svm.W, classes, (28, 28))
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0): #m = 1000 #dimension = 256 (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE) #n_components = 100 #images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min) #(pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE) (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE) kappa_score_train = quadratic_weighted_kappa(pred[:m/2], y[:m/2], min_rating=0, max_rating=4) kappa_score_test = quadratic_weighted_kappa(pred[m/2:], y[m/2:], min_rating=0, max_rating=4) kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4) print "kappa score for train: ", kappa_score_train print "kappa score for test: ", kappa_score_test print "kappa score for all data: ", kappa_score_all print "svm score: ", svm_score
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0): # m = 1000 # dimension = 256 (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE) # n_components = 100 # images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min) # (pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE) (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE) kappa_score_train = quadratic_weighted_kappa(pred[: m / 2], y[: m / 2], min_rating=0, max_rating=4) kappa_score_test = quadratic_weighted_kappa(pred[m / 2 :], y[m / 2 :], min_rating=0, max_rating=4) kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4) print "kappa score for train: ", kappa_score_train print "kappa score for test: ", kappa_score_test print "kappa score for all data: ", kappa_score_all print "svm score: ", svm_score
def prediction(self): if self.lst is None: self.lst = [] if(self.extractor.get_blacklist()): self.blacklist = True else: self.lst = self.extractor.get_features() print (self.myqueue.qsize()) # if(self.blacklist): #result = [0,0] result = 0 else: data = (list(self.lst)) svm_prdt = svm.predict(data) #kclus_prdt = kclus.predict(data) #result = [svm_prdt[0],kclus_prdt[0]] result = svm_prdt[0] return result
def predict_historical(data, days_ahead): ''' dummy pyspark function. Use as wrapper and add all predictions here data: dict object of the form: { symbol: date: open: high: low: close: volume: } These values are all unicode strings! parse to int/float as needed ''' x = data["close"] symbol = data["symbol"] prediction = svm.predict(symbol, days_ahead, x) return prediction
f = lambda w:svm.svm_loss_naive(w,x_dev,y_dev,0.0)[0] grad_numerical = grad_check_sparse(f,w,grad) # 模型进行测试 svm = LinearSVM() #创建对象,此时W为空 tic = time.time() loss_hist = svm.train(x_train,y_train,learning_rate = 1e-7,reg = 2.5e4,num_iters = 1500,verbose = True) #此时svm对象中有W toc = time.time() print('that took %fs' % (toc -tic)) plt.plot(loss_hist) plt.xlabel('iteration number') plt.ylabel('loss value') plt.show() #训练完成之后,将参数保存,使用参数进行预测,计算准确率 y_train_pred = svm.predict(x_train) print('training accuracy: %f'%(np.mean(y_train==y_train_pred))) y_val_pred = svm.predict(x_val) print('validation accuracy: %f'%(np.mean(y_val==y_val_pred))) ''' #在拿到一组数据时一般分为训练集,开发集(验证集),测试集。训练和测试集都知道是干吗的,验证集在除了做验证训练结果外 # 还可以做超参数调优,寻找最优模型。遍历每一种参数组合,训练SVM模型,然后在验证集上测试,寻找验证集上准确率最高的模型 # 交叉验证很费时间,下面的代码总共循环18次,在9700KCPU上满载运行了十多分钟,自己选择是否运行该段代码。 # 超参数调优(交叉验证) learning_rates = [1.4e-7, 1.5e-7, 1.6e-7] # for循环的简化写法12个 regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range(-3, 3)] + [(2 + i * 0.1) * 1e4 for i in range(-3, 3)] results = {} # 字典 best_val = -1 best_svm = None
X, _ = preprocess_data(train_data, None) clf = MultinomialNB() scores = [] for metric in metrics: scores.append(cross_val_score(clf, X, y, cv=10, scoring=metric).mean()) return scores ################################ if __name__ == "__main__": train_data = pd.read_csv(dataset_path + 'train_set.csv', sep="\t") test_data = pd.read_csv(dataset_path + 'test_set.csv', sep="\t") print "my_method: Loaded data." le = preprocessing.LabelEncoder() le.fit(train_data["Category"]) y = le.transform(train_data["Category"]) X, Test = preprocess_data(train_data, test_data) # Prediction: import svm Test_pred = le.inverse_transform(svm.predict(X, y, Test)) predFile = open("./testSet_categories.csv", "w+") predFile.write("Id,Category\n") for i in range(len(Test_pred)): predFile.write(str(test_data['Id'][i]) + ',' + Test_pred[i] + '\n') predFile.close()
nh = FSIZE * h // w x = 0 y = (FSIZE - nh) // 2 else: nh = FSIZE nw = FSIZE * w // h y = 0 x = (FSIZE - nw) // 2 if nh < 1 or nw < 1: continue nz = cv2.resize(nz, (nw, nh)) pz = np.zeros((FSIZE, FSIZE)) #nz = np.pad(nz, ((1,1),(2,2)), "constant") pz[y:y + nh, x:x + nw] = nz pre = svm.predict(pz) py = minc[1] if lastY is not None and py - lastY > LINE_H: pres.append([]) layer = pres[-1] lastY = py layer.append(pre) ''' import os path = "/home/hal/Downloads/Final+Project/dataset/" name = path + "%s.png" % 0 k = 0 while os.path.exists(name): k += 1 name = path + "%s.png" % k '''
######################################### # Do classification to check the accuracy using the test set ######################################### if silence == 0: print("Calculating SVM %s kernel and decision matrix" % kernel) if cheat_test: # for handcrafted data, reduce algorithm complexity by reducing the number of support vectors supports = supports[0:size_limit, :] alpha_vector = alpha_vector[:, 0:size_limit] decision, vote = svm.get_decision(X_test, kernel, coef, degree, alpha_vector, supports, intercept, num_classifiers) if silence == 0: print("Classifying test set...") y_manual = svm.predict(X_test, clf, class_type, classes, num_classifiers, decision, vote) ######################################### # Compare model accuracies (computed vs Python library output) ######################################### y_pred = clf.predict(X_test) print("Accuracy from predict function: %f" % accuracy_score(y_test, y_pred)) print("Accuracy from manual calculation: %f" % accuracy_score(y_test, y_manual)) sensitivity = sum(sum((y_manual.T == y_test) & (y_test == 1))) / sum(y_test) specificity = sum( sum((y_manual.T == y_test) & (y_test == 0))) / (len(y_test) - sum(y_test)) print("Sensitivity: %f" % sensitivity) print("Specificity: %f" % specificity)
def training_loop(epochs, grad_fn, loss_fn, w, x, y, batch_size, x_test=None, y_test=None, max_steps=None, m=50, eta=0.01, **params): """ :param epochs: number of epoch to do :param step_function: optimization algorithm :param w: svm weights :param x: x input (here mnist images) :param y: target classes :param batch_size: size of a minibatch :param x_test: x test :param y_test: y test :param params: parameters for the optimization algorithms :return: """ # On sait quelle taille fait un batch, on compte le nombre de batch que ça donne pour prendre toutes les données n_batch = (x.shape[0] // batch_size) # utile pour brasser les données à chaque epoch idx = np.array([i for i in range(x.shape[0])]) # Training histories loss_records = [] valloss_records = [] valaccuracy_records = [] accuracy_records = [] parameters = None t = 1 counter = 0 wtilde = copy(w) for e in range(epochs): # mélange les indices np.random.shuffle(idx) # We randomly generates batches x_batches = np.array_split(x[idx], n_batch) y_batches = np.array_split(y[idx], n_batch) # Forme une liste d'éléments de la forme "[(batch_x, batch_y) ... ]" batchs = list(zip(x_batches, y_batches)) # Compute mu mu = 0 # Ce sont les batch qui définissent les fonctions de gradients qu'ont va utiliser, # \Psi_i sont définies par les batchi_x, batchi_y # C'est grad_fn() qui prend les poids courants, l'entrée en x, les sorties attendues, **parms c'est rien, juste # de la technique python for i in range(n_batch): batch_x = batchs[i][0] batch_y = batchs[i][1] # Chaque grad \Psi_i mu += grad_fn(w, batch_x, batch_y, **params) # La moyenne mu /= n_batch # Fin de compute mu for t in range(m): it = np.random.randint(n_batch) batch_x = batchs[it][0] batch_y = batchs[it][1] grad_1 = grad_fn(w, batch_x, batch_y, **params) grad_2 = grad_fn(wtilde, batch_x, batch_y, **params) w = svrg_update(w, grad_1 - grad_2, eta, mu) accuracy_records.append((predict(w, batch_x) == batch_y).mean()) loss_records.append(loss_fn(w, batch_x, batch_y, **params)) print(accuracy_records[-1]) # if we provided a test set evaluate the model on it if x_test is not None and y_test is not None: valloss_records.append(loss_fn(w, x_test, y_test, **params)) valaccuracy_records.append((predict(w, x_test) == y_test).mean()) # option 1 wtilde = copy(w) # return the histories return loss_records, valloss_records, accuracy_records, valaccuracy_records
x, y, w, h = [ v for v in f ] # scale the bounding box back to original frame size cropped_face = rotated_frame[y:y + h, x:x + w] # img[y: y + h, x: x + w] cropped_face = cv2.resize(cropped_face, DISPLAY_FACE_DIM, interpolation=cv2.INTER_AREA) # Name Prediction face_to_predict = cv2.resize(cropped_face, FACE_DIM, interpolation=cv2.INTER_AREA) face_to_predict = cv2.cvtColor(face_to_predict, cv2.COLOR_BGR2GRAY) name_to_display = svm.predict(clf, pca, face_to_predict, face_profile_names) # Display frame cv2.rectangle(rotated_frame, (x, y), (x + w, y + h), (0, 255, 0)) cv2.putText(rotated_frame, name_to_display, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0)) # rotate the frame back and trim the black paddings processed_frame = ut.trim( ut.rotate_image(rotated_frame, rotation * (-1)), frame_scale) # reset the optmized rotation map current_rotation_map = get_rotation_map(rotation)
""" Created on Tue Nov 7 17:23:15 2017 @author: Xie Yang """ import corpusProcess import prepareVector import pattern import svm import configparser config = configparser.ConfigParser() config.read('config.conf', encoding="utf8") fileName = config['input']['fileName'] fn = fileName.split('.')[0] filePath = config['input']['filePath'] mode = config['mode']['mode'] if mode == 'svm': corpusProcess.process(fileName, filePath) prepareVector.toVector(fileName, filePath) svm.predict("model/svmclf.clf", filePath, fileName) elif mode == 'pattern': pattern.match(fileName, filePath) elif mode == 'rf': pass elif mode == 'ensemble': pass else: print("请输入正确的模式!")
dataArr, labelArr = svm.loadDataSet("testSet.txt") print("dataArr.shape:", np.shape(dataArr), "labelArr.shape:", np.shape(labelArr)) #b, alphas = svm.smoSimple(dataArr, labelArr, 0.6, 0.001, 40) b, alphas = svm.smoP(dataArr, labelArr, 0.6, 0.001, 40) print("b=", b) # print("---") print("alpha.shape:", alphas.shape) print(alphas[alphas > 0]) # 支持向量 # for i in range(len(dataArr)): # if alphas[i] > 0.0: # print(dataArr[i], labelArr[i]) w = svm.calcWs(alphas, dataArr, labelArr) #svm.plot_sv(dataArr, labelArr, w, b, alphas) i = 5 y_ = svm.predict(dataArr[5], w, b) print("y_:{}, y:{}".format(y_, labelArr[i])) svm.eval(dataArr, labelArr, w, b) svm.testRbf(1) svm.testDigits(kTup=('rbf', 5))
label_test ={} x_test ={} ftest = open('../data/test','r') line = ftest.readline() itr =0 max_len =0 while(line): itr=itr+1 email = line.split(' ') if(email[1]=='ham'): label_test[itr] =-1 else: label_test[itr] =1 i=2 arr= [] while(i<len(email)-1): word = email[i] count= int(email[i+1]) arr.append(count) i+=2 x_test[itr] = arr line = ftest.readline() ftest.close() [prediction,accuracy,values]= svm.predict(label_test,x_test,trained)
if len(sys.argv) == 2: DATA_PATH = sys.argv[1] if not path.exists(DATA_PATH): print("\nError: There is no picture in this direction\n") exit() if not utils.check_image_format(DATA_PATH): print( "\nError: File extension has to be one of these: png, jpg, jpeg, pgm\n" ) exit() FACE = cv2.imread(DATA_PATH, 0) PREDICTION_NAME = predict(FACE) print("This is picture of \"%s\"" % PREDICTION_NAME) exit() elif len(sys.argv) > 2: print("\nError: Specify only one picture at a time\n") exit() SKIP_FRAME = 2 # the fixed skip frame FRAME_SKIP_RATE = 0 # skip SKIP_FRAME frames every other frame SCALE_FACTOR = 2 # used to resize the captured frame for face detection for faster processing speed CURRENT_ROTATION_MAP = utils.get_rotation_map(0) WEBCAM = cv2.VideoCapture(0) RET, FRAME = WEBCAM.read() # get first frame FRAME_SCALE = (int(FRAME.shape[1] / SCALE_FACTOR), int(FRAME.shape[0] / SCALE_FACTOR)) # (y, x) CROPPED_FACE = []
# for f in faces: # x, y, w, h = [ v*SCALE_FACTOR for v in f ] # scale the bounding box back to original frame size # cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0)) # cv2.putText(frame, "DumbAss", (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0)) if len(faces): for f in faces: # Crop out the face x, y, w, h = [ v for v in f ] # scale the bounding box back to original frame size cropped_face = rotated_frame[y: y + h, x: x + w] # img[y: y + h, x: x + w] cropped_face = cv2.resize(cropped_face, DISPLAY_FACE_DIM, interpolation = cv2.INTER_AREA) # Name Prediction face_to_predict = cv2.resize(cropped_face, FACE_DIM, interpolation = cv2.INTER_AREA) face_to_predict = cv2.cvtColor(face_to_predict, cv2.COLOR_BGR2GRAY) name_to_display = svm.predict(clf, pca, face_to_predict, face_profile_names) # Display frame cv2.rectangle(rotated_frame, (x,y), (x+w,y+h), (0,255,0)) cv2.putText(rotated_frame, name_to_display, (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0)) # rotate the frame back and trim the black paddings processed_frame = ut.trim(ut.rotate_image(rotated_frame, rotation * (-1)), frame_scale) # reset the optmized rotation map current_rotation_map = get_rotation_map(rotation) faceFound = True break