def VisualizeBoundary(X, y, xlim, ylim, model, gamma, dataSetExample): pd.PlotData(X, y, xlim, ylim, showplot=False) x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100) x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100) X1, X2 = np.meshgrid(x1plot, x2plot) [m, n] = X1.shape vals = np.zeros_like((X1)) for i in range(m): this_X = np.require(np.vstack((X1[:, i], X2[:, i])).T, requirements='C') #http://scikit-learn.org/stable/modules/generated/sklearn.svm.libsvm.fit.html #http://scikit-learn.org/stable/modules/generated/sklearn.svm.libsvm.predict.html vals[:, i] = libsvm.predict(this_X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], probA=model[5], probB=model[6], kernel='rbf', gamma=gamma) plot.contour(X1, X2, vals, colors='red') plot.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset %d)' % (dataSetExample)) plot.show()
def predict(self, X): svm_type = LIBSVM_IMPL.index(self.impl) if not self.__X: print ("You should train the model first!!!") sys.exit(3) else: gram_matr_predict_new = self.string_kernel(X, self.__X) gram_matr_predict_new = np.asarray(gram_matr_predict_new, dtype=np.float64, order="C") return libsvm.predict( gram_matr_predict_new, self.support_, self.support_vectors_, self.n_support_, self.dual_coef_, self._intercept_, self._label, self.probA_, self.probB_, svm_type=svm_type, kernel=self.kernel, C=self.C, nu=self.nu, probability=self.probability, degree=self.degree, shrinking=self.shrinking, tol=self.tol, cache_size=self.cache_size, coef0=self.coef0, gamma=self._gamma, epsilon=self.epsilon, )
def _dense_predict(self, X): kernel = self.kernel if callable(self.kernel): if X.shape[1] != self.shape_fit_[0]: raise ValueError("X.shape[1] = %d should be equal to %d, " "the number of samples at training time" % (X.shape[1], self.shape_fit_[0])) kernel = self.kernel(X, self.__Xfit) if sp.issparse(kernel): kernel = kernel.toarray() kernel = 'precomputed' X = np.asarray(kernel, dtype=np.float64, order='C') if X.ndim == 1: X = check_array(X, order='C', accept_large_sparse=False) svm_type = 0 #for svc return libsvm.predict( X, self.support_, self.support_vectors_, self.n_support_, self._dual_coef_, self._intercept_, self.probA_, self.probB_, svm_type=svm_type, kernel=kernel, degree=self.degree, coef0=self.coef0, gamma=self._gamma, cache_size=self.cache_size)
def modelEvaluate(self, x_test, y_test, path): start = datetime.datetime.now() pred_y = libsvm.predict( x_test.as_matrix().copy(order='C').astype(np.float64), *(self.model)) end = datetime.datetime.now() print self.name + ' predicte time is ' + str(end - start) print 'crosstab:{0}'.format(pd.crosstab(y_test, pred_y)) print 'accuracy_score:{0}'.format(accuracy_score(y_test, pred_y)) print '0precision_score:{0}'.format( precision_score(y_test, pred_y, pos_label=0)) print '0recall_score:{0}'.format( recall_score(y_test, pred_y, pos_label=0)) print '1precision_score:{0}'.format( precision_score(y_test, pred_y, pos_label=1)) print '1recall_score:{0}'.format( recall_score(y_test, pred_y, pos_label=1)) fp = open(path + 'log.txt', 'a') fp.write('crosstab:{0}'.format(pd.crosstab(y_test, pred_y)) + '\n') fp.write('accuracy_score:{0}'.format(accuracy_score(y_test, pred_y)) + '\n') fp.write('0precision_score:{0}'.format( precision_score(y_test, pred_y, pos_label=0)) + '\n') fp.write('0recall_score:{0}'.format( recall_score(y_test, pred_y, pos_label=0)) + '\n') fp.write('1precision_score:{0}'.format( precision_score(y_test, pred_y, pos_label=1)) + '\n') fp.write('1recall_score:{0}'.format( recall_score(y_test, pred_y, pos_label=1)) + '\n') fp.close()
def predict(self, X): svm_type = LIBSVM_IMPL.index(self.impl) if not self.__X: print('You should train the model first!!!') sys.exit(3) else: gram_matr_predict_new = self.string_kernel(X, self.__X) gram_matr_predict_new = np.asarray(gram_matr_predict_new, dtype=np.float64, order='C') return libsvm.predict(gram_matr_predict_new, self.support_, self.support_vectors_, self.n_support_, self.dual_coef_, self._intercept_, self._label, self.probA_, self.probB_, svm_type=svm_type, kernel=self.kernel, C=self.C, nu=self.nu, probability=self.probability, degree=self.degree, shrinking=self.shrinking, tol=self.tol, cache_size=self.cache_size, coef0=self.coef0, gamma=self._gamma, epsilon=self.epsilon)
def dataset3_params(X, y, Xval, yval): all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] best_C = all_C[0] best_sigma = all_sigma[0] previous_err = 1000.0 for C in all_C: for sigma in all_sigma: gamma = 1.0 / (2.0 * sigma ** 2) model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma) predictions = libsvm.predict( Xval, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='rbf', gamma=gamma ) err = np.mean(predictions != yval) if err < previous_err: best_C = C best_sigma = sigma previous_err = err return (best_C, best_sigma)
def dataset3_params(X, y, Xval, yval): all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] best_C = all_C[0] best_sigma = all_sigma[0] previous_err = 1000.0 for C in all_C: for sigma in all_sigma: gamma = 1.0 / (2.0 * sigma**2) model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma) predictions = libsvm.predict(Xval, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='rbf', gamma=gamma) err = np.mean(predictions != yval) if err < previous_err: best_C = C best_sigma = sigma previous_err = err return (best_C, best_sigma)
def main(): train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset() train_x_flatten = train_x_orig.reshape( train_x_orig.shape[0], -1).T # The "-1" makes reshape flatten the remaining dimensions test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T train_x = train_x_flatten / 255. test_x = test_x_flatten / 255. layers_dims = [12288, 20, 7, 5, 1] parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations=1000, print_cost=True) predictions_train = predict(train_x, train_y, parameters) pred_test = predict(test_x, test_y, parameters) print(pred_test) print(predictions_train)
def classify(line): word_id_list = get_vsm.get_vsm(line) tmp_dict = {} # 临时词典,key为特征id,value为特征值 if len(word_id_list) > 0: for item in word_id_list: tmp_dict[item[0]] = item[1] y = [] x = [] x.append(tmp_dict) # liblinear python接口 p_labs, p_acc, p_vals = predict(y, x, model_, '-b 1') print(p_labs, p_vals) return get_max_prob_classes(p_vals[0]) else: return -1
def visualize_boundary(X, y, model, gamma): plot_data(X, y, show=False) x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100) x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100) X1, X2 = np.meshgrid(x1plot, x2plot) predictions = np.zeros_like(X1) for i in range(X1.shape[1]): currentX = np.require(np.vstack((X1[:, i], X2[:, i])).T, requirements='C_CONTIGUOUS') predictions[:, i] = libsvm.predict(currentX, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='rbf', gamma=gamma) plot.contour(X1, X2, predictions, [0.0, 0.0]) plot.show()
def DataSet3Params(X, y, Xval, yval): C = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]) sigma = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]) previousError = 2000 for i in range(len(C)): currentC = C[i] for j in range(len(sigma)): currentSigma = sigma[j] gamma = 1 / (2 * (currentSigma**2)) model = libsvm.fit(X, y, kernel='rbf', C=currentC, gamma=gamma) # Train predictions = libsvm.predict(Xval, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], probA=model[5], probB=model[6], kernel='rbf', gamma=gamma) currentError = np.mean(predictions != yval) if currentError < previousError: bestC = currentC bestSigma = currentSigma previousError = currentError C = bestC sigma = bestSigma return C, sigma
def visualize_boundary(X, y, model, gamma): plot_data(X, y, show=False) x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100) x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100) X1, X2 = np.meshgrid(x1plot, x2plot) predictions = np.zeros_like(X1) for i in range(X1.shape[1]): currentX = np.require(np.vstack((X1[:, i], X2[:, i])).T, requirements='C_CONTIGUOUS') predictions[:, i] = libsvm.predict( currentX, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='rbf', gamma=gamma ) plot.contour(X1, X2, predictions, [0.0, 0.0]) plot.show()
def decision_function(self, X): return predict(X)
print('Word indices:\n%s' % word_indices) features = email_features(word_indices, vocabulary) print('Length of feature vector: %d' % len(features)) print('Number of non-zero entries: %d' % sum(features > 0)) # train SVM for spam classification data = loadmat('../../machine-learning-ex6/ex6/spamTrain.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) C = 0.1 model = libsvm.fit(X, y, kernel='linear', C=C) predictions = libsvm.predict( X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='linear', ) accuracy = 100 * np.mean(predictions == y) print('Training set accuracy: %0.2f %%' % accuracy) # load test set data = loadmat('../../machine-learning-ex6/ex6/spamTest.mat') Xtest = np.require(data['Xtest'], dtype=np.float64, requirements='C_CONTIGUOUS') ytest = np.require(data['ytest'].flatten(), dtype=np.float64) print('Evaluating the trained Linear SVM on a test set ...') predictions = libsvm.predict( Xtest, support=model[0],
# The original training and test labels are normalised here for j in range(0,len(X)): X[j] = normaliseData(X[j]) for j in range(0,len(data)): data[j] = normaliseData(data[j]) # SVM model is trained here using libsvm in-built library functions. Kernel typr is polynomial here and its degree is kept 4. # All parameters are kept for making the most optimal fit for the data [support, sv, nsv, coeff, intercept, proba, probb, fit_status] = libsvm.fit(X, Y, svm_type=0, kernel='poly', degree=4, gamma=0.093, coef0=0, tol=0.001, C=1, nu=0.5, max_iter=-1, random_seed=0) m = [support, sv, nsv, coeff, intercept, proba, probb] save_model('model.pkl',m) [support_, sv_, nsv_, coeff_, intercept_, proba_, probb_] = load_model('model.pkl') # Predictions are made on the test dataset using the hyper parameters trained on training dataset dec_values = libsvm.predict(data, support_, sv_, nsv_, coeff_, intercept_, proba_, probb_, svm_type=0,kernel='poly', degree =4, gamma=0.093, coef0=0) dec_values.astype(int) # Predictions are written to a csv file named result.csv j=0 with open('result.csv', 'w') as csvfile: spamwriter = csv.writer(csvfile) for j in range(0,len(dec_values)): spamwriter.writerow([int(dec_values[j])]) Host ID: 90489ad33ff3 74e6e22945bf Release: R2015b Login Name: root [ 58 139 66 61 76 90 43 96 164 143]
def fitSLR(x, y): n = len(x) dinominator = 0 numerator = 0 for i in range(0, n): numerator += (x[i] - np.mean(x)*(y[i] - np.mean(y)) dinominator += (x[i] - np.mean(x))**2 print("numerator:", numerator) print("dinominator:", dinominator) b1 = numerator/float(dinominator)) b0 = np.mean(y)/float(np.mean(x)) return b0, b1 def predict(x, b0, b1): return b0 + x*b1 x = [1, 3, 2, 1, 3] y = [14, 24, 18, 17, 27] b0, b1 = fitSLR(x, y) print("intercept:", b0, "slope:", b1) x_test = 6 y_test = predict(6, b0, b1) print("y_test:", y_test)
def predict(self, input): input = np.array(input) result = libsvm.predict(input, *(self.model)) return result
'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTrain.mat' ) X = np.require(svmtrain['X'], dtype=np.float64, requirements='C') # 51X2 #print(X.flags) y = np.require(svmtrain['y'].flatten(), dtype=np.float64) # 51X1 print('Training Linear SVM (Spam Classification)') print('(this may take 1 to 2 minutes) ...') C = 0.1 model = libsvm.fit(X, y, kernel='linear', C=C) p = libsvm.predict(X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], probA=model[5], probB=model[6], kernel='linear') accuracyTrain = np.mean(p == y) * 100 print('Training accuracy', accuracyTrain) # Test Spam Classification svmtest = loadmat( 'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTest.mat') Xtest = np.require(svmtest['Xtest'], dtype=np.float64, requirements='C') # 51X2 #print(X.flags) ytest = np.require(svmtest['ytest'].flatten(), dtype=np.float64) # 51X1
print('Word indices:\n%s' % word_indices) features = email_features(word_indices, vocabulary) print('Length of feature vector: %d' % len(features)) print('Number of non-zero entries: %d' % sum(features > 0)) # train SVM for spam classification data = loadmat('../../octave/mlclass-ex6/spamTrain.mat') X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS') y = np.require(data['y'].flatten(), dtype=np.float64) C = 0.1 model = libsvm.fit(X, y, kernel='linear', C=C) predictions = libsvm.predict( X, support=model[0], SV=model[1], nSV=model[2], sv_coef=model[3], intercept=model[4], label=model[5], probA=model[6], probB=model[7], kernel='linear', ) accuracy = 100 * np.mean(predictions == y) print('Training set accuracy: %0.2f %%' % accuracy) # load test set data = loadmat('../../octave/mlclass-ex6/spamTest.mat') Xtest = np.require(data['Xtest'], dtype=np.float64, requirements='C_CONTIGUOUS') ytest = np.require(data['ytest'].flatten(), dtype=np.float64) print('Evaluating the trained Linear SVM on a test set ...') predictions = libsvm.predict( Xtest, support=model[0],
def classify(features, model): vec = DictVectorizer() X = vec.fit_transform(features) prediction = libsvm.predict(X.toarray(), *model) return int(prediction[0])