def get_2d_intuition_data(n_samples=100, seed=10, L=10, C=1., **kwargs): X, y = get_toy_data(n_samples=n_samples, seed=seed) trainer = svm.SVMTrainer('linear', C) predictor = trainer.train(X, y, remove_zero=False) alpha, b = predictor._weights, predictor._bias weight = (alpha * predictor._support_vector_labels).reshape(-1, 1) * predictor._support_vectors weight = weight.sum(axis=0) y_p = y.copy() dist = np.abs(np.dot(X, weight.T) + b) flip_inds = np.argsort(dist)[::-1][:L] flip_pnts = X[flip_inds] y_p[flip_inds] *= -1 # plt.figure() # svm_plot(X, y_p) # boundary_plot(X, predictor) # plt.scatter(flip_pnts[:, 0], flip_pnts[:, 1], s=85 * 2, facecolors='none', edgecolors='green') # plt.show() return X, y_p, flip_pnts
def get_adv_data(n_samples=100, seed=16, C=1., R=25, beta1=0.1, beta2=0.1, L=10, **kwargs): X, y = get_toy_data( n_samples=n_samples, seed=seed, ) trainer = svm.SVMTrainer('linear', C) predictor = trainer.train(X, y, remove_zero=False) alpha, b = predictor._weights, predictor._bias s = predictor.score(X) s *= predictor._support_vector_labels s = normalize(s) # svm_plot(X, y) # boundary_plot(X, predictor) # plt.show() error_hist = [] yp_hist = [] flip_pnts_hist = [] print('training error on untainted data is ', calc_error(s, y)) for i in range(R): alpha_rnd = np.random.uniform(-C, C, size=alpha.shape) b_rnd = np.random.uniform(-C, C) predictor_rnd = svm.SVMPredictor( weights=alpha_rnd, support_vectors=predictor._support_vectors, support_vector_labels=predictor._support_vector_labels, bias=b_rnd, sigma=predictor._sigma, kernel=predictor._kernel) # weight = (alpha_rnd * predictor_rnd._support_vector_labels).reshape(-1, 1) * predictor._support_vectors # weight = weight.sum(axis=0) print('training error of random svm is ', calc_error(predictor_rnd.predict(X), y)) # svm_plot(X, y) # boundary_plot(X, predictor_rnd) # plt.show() q = predictor_rnd.score(X) q *= predictor._support_vector_labels q = normalize(q) v = alpha / C - beta1 * s - beta2 * q # plt.figure() # plt.plot(alpha,'o') # plt.plot(s,'x') # plt.plot(q,'.') # plt.legend() # plt.show() k = np.argsort(v, axis=0) y_p = y.copy() y_p[k[0:L]] *= -1 predictor_new = trainer.train(X, y_p) print('training error on tainted data is ', calc_error(predictor_new.predict(X), y)) # plt.figure() # svm_plot(X, y_p) # boundary_plot(X, predictor_new) flip_pnts = X[k[0:L]] # plt.scatter(flip_pnts[:, 0], flip_pnts[:, 1], s=85 * 2, facecolors='none', edgecolors='green') # plt.show() error_hist.append(calc_error(predictor_new.predict(X), y)) yp_hist.append(y_p) flip_pnts_hist.append(flip_pnts) print(np.max(error_hist), error_hist) y_p = yp_hist[np.argmax(error_hist)] flip_pnts = flip_pnts_hist[np.argmax(error_hist)] return X, y_p, flip_pnts
# from lz import * import logging, numpy as np, matplotlib.pyplot as plt logging.root.setLevel(logging.ERROR) import svm, data exp = 'proc' if exp == 'proc': C = 1. kernel = 'rbf' proc_train = np.loadtxt('proc-train', delimiter=' ') X, y = proc_train[:, 1:], proc_train[:, 0] X_test = np.loadtxt('proc-test', delimiter=' ') trainer = svm.SVMTrainer(kernel, C) predictor = trainer.train(X, y, remove_zero=True) y_pred = predictor.predict(X_test) np.savetxt('y-pred', y_pred, delimiter=' ') elif exp == 'sonar': n_samples = 208 n_features = 60 C = 1. L = 208 // 10 kernel = 'rbf' X, y = data.get_sonar_data() X, y, X_val, y_val = data.split_train_test(X, y) trainer = svm.SVMTrainer(kernel, C) predictor = trainer.train(X, y, remove_zero=True) print(predictor.error(X_val, y_val)) X, y, _ = data.apply_rand_flip(X, y, L)
import numpy as np import pandas as pd import svm data = pd.read_table('wdbc.data', sep=',', header=None) X = np.array(data.ix[:, 2:]) y = data.ix[:, 1] y = np.array([1.0 if label == 'B' else -1.0 for label in y]).ravel() n_samples, n_features = X.shape c = 0.1 trainer = svm.SVMTrainer(svm.Kernel.linear(), c) predictor = trainer.train(X, y) test = X[0].reshape(1, n_features) predictions = [ predictor.predict(sample.reshape(1, n_features)) for sample in X ]
import matplotlib.pyplot as plt import svm from kernels import Kernel iris = datasets.load_iris() y = iris.target y = np.asarray([-1 if x == 0 else x for x in y]) sel = [y != 2] y = y[sel] X = iris.data[:, :2] X = X[sel] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) C = 0.1 trainer = svm.SVMTrainer(kernel=Kernel.linear(), c=C) model = trainer.train(X_train, y_train) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 h = 0.02 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) xxyy = np.stack((xx.ravel(), yy.ravel()), axis=-1) result = [] for i in range(len(xxyy)): result.append(model.predict(xxyy[i])) Z = np.array(result).reshape(xx.shape)
result.append(predictor.predict(point)) Z = np.array(result).reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=cm.Paired, levels=[-0.001, 0.001], extend='both', alpha=0.5) plt.scatter(flatten(X[:, 0]), flatten(X[:, 1]), c=flatten(y), cmap=cm.Paired) plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) plt.savefig(filename) num_samples=100 num_features=2 grid_size=20 # Random data samples = np.matrix(np.random.normal(size=num_samples * num_features) .reshape(num_samples, num_features)) labels = 2 * (samples.sum(axis=1) > 0) - 1.0 # Train and predict trainer = svm.SVMTrainer(svm.Kernel.gaussian(0.5), 0.1) predictor = trainer.train(samples, labels) plot(predictor, samples, labels, grid_size, "svm-demo.pdf")
def test_on_dataset(dataset = 'sonar', kernel = 'rbf', n_samples = 208, n_features = 60): l = 1; fig = plt.figure(figsize=(8,3)) for C in [1, 10, 100, 1000]: ax = fig.add_axes([0.25*l-0.20, 0.3,0.18,0.4]) acc_1 = [] acc_2 = [] acc_3 = [] for L in [0, n_samples//10, n_samples//5, int(n_samples//3.3333), int(n_samples//2.5)]: X_, y_ = data.get_train_data(dataset, n_samples, n_features) X_c, y_c, = data.split_train_test(X_, y_) # trainer = svm.SVMTrainer(kernel, C) # predictor = trainer.train(X, y, remove_zero=True) # print(predictor.error(X_val, y_val)) # print(acc_1, acc_2, acc_3) acc_1.append(0) acc_2.append(0) acc_3.append(0) for i in range(5): X, y = [], [] for j in range(5): if j != i: X.extend(X_c[j]) y.extend(y_c[j]) X_val, y_val = np.array(X_c[i]), np.array(y_c[i]) X, y = np.array(X), np.array(y) X, y, flip_pnts = data.apply_rand_flip(X, y, L) trainer = svm.SVMTrainer(kernel, C) predictor = trainer.train(X, y, remove_zero=True) acc_1[-1] += (1-predictor.error(X_val, y_val)) #print(acc_1[-1]) trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.1) predictor = trainer.train(X, y, remove_zero=True) acc_2[-1] += (1-predictor.error(X_val, y_val)) #print(acc_2[-1]) trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.5 - 1e-4) predictor = trainer.train(X, y, remove_zero=True) acc_3[-1] += (1-predictor.error(X_val, y_val)) #print(acc_3[-1]) acc_1[-1] /= 5 acc_2[-1] /= 5 acc_3[-1] /= 5 acc_1 = np.array(acc_1) acc_2 = np.array(acc_2) acc_3 = np.array(acc_3) flip_ratio = np.linspace(0, 40,acc_1.shape[0]) plt.ylim((0.3,1)) print(acc_1) ax.plot(flip_ratio,acc_1,color="blue", label='mu=0') ax.plot(flip_ratio,acc_2,color="red", label='mu=0.1') ax.plot(flip_ratio,acc_3,color="black", label='mu=0.5') ax.set_xlabel('% flipped labels') ax.set_ylabel('test acc') ax.set_title("C = %d" % C) ax.spines['right'].set_color('black') ax.spines['top'].set_color('black') ax.spines['left'].set_color('black') ax.spines['bottom'].set_color('black') ax.patch.set_facecolor("white") ax.grid(color='r', linestyle='--',linewidth=1, alpha=0.3) #if l == 1: # ax.legend(facecolor='white') l += 1 fig.suptitle(dataset, fontsize=12)
num_samples, num_features)) y = np.ravel(2 * (X.sum(axis=1) > 0) - 1.0) linearly_separable = (X, y) names = [ "SVM Linear c = 1", "SVM RBF c = 1", "SVM Linear c = 100", "SVM RBF c = 100" ] datasets = [ make_moons(noise=0.1, random_state=0), make_circles(noise=0.1, factor=0.5, random_state=1), linearly_separable, ] classifiers = [ svm.SVMTrainer(kernel=linear(), c=1), svm.SVMTrainer(kernel=radial_basis(gamma=1), c=1), svm.SVMTrainer(kernel=linear(), c=100), svm.SVMTrainer(kernel=radial_basis(gamma=1), c=100), ] print(linear()) figure = plt.figure(figsize=(27, 9)) i = 1 for ds_cnt, ds in enumerate(datasets): X, y = ds ## modify labels to correspond to SVM categories {-1, 1} y[y == 0] = -1 X = StandardScaler().fit_transform(X)
import svm import numpy as np import cvxopt import plotFigure """ Some functions """ X = np.array([[1, 2], [2, 2], [0, 0], [-2, 3]]).copy() Y = np.array([-1., -1., 1., 1.]).copy() trainer = svm.SVMTrainer(svm.Kernel.gaussian(5), c=200) predictor = trainer.train(X, Y) plotFigure.plot(predictor, X, Y, grid_size=100, filename='TEST.pdf') print predictor.predict(np.array([0, 1]))