def plot_svm(): num_points = 10 (X, y, f) = random_linearly_separable_data(num_points, bounds) (w_svm, vectors) = linsep_svm(X, y) x_plot = X[:,0] y_plot = X[:,1] s = 20*np.ones(num_points) s[vectors] = 60 pla = PLA(bounds=bounds) pla.fit(X, y) w_pla = pla.weights # test values for b print("Average b:", w_svm[0]) each_b = y[vectors] - np.dot(X[vectors], w_svm[1:]) print("Individual b's:", each_b) (x_f, y_f) = weights_to_mxb_2D(f, bounds) (x_w, y_w) = weights_to_mxb_2D(w_svm, bounds) (x_p, y_p) = weights_to_mxb_2D(w_pla, bounds) c = np.where(y==1, 'r', 'b') plt.scatter(x_plot,y_plot, s, c=c) plt.plot(x_f, y_f, 'k-.') plt.plot(x_w, y_w, 'b') plt.plot(x_p, y_p, 'r--') plt.xlim([-1, 1]) plt.ylim([-1, 1]) plt.grid() plt.show()
def test_linearly_separable(): "Maybe stick a PLA solver in here too?" num_points = 100 for dimension in range(2, 5): bounds = datagen.unit_bounds(dimension) (X, y, weights) = datagen.random_linearly_separable_data(num_points, bounds) X_in = np.column_stack([np.ones((num_points, 1)), X]) assert np.all(y == np.sign(np.dot(X_in, weights)))
def linsep_logistic(num_points=100, num_experiments=100, tol=0.01, eta=0.01, max_iter=1000): all_epochs = np.zeros(num_experiments) all_E_out = np.zeros(num_experiments) for i in range(num_experiments): (X, y, f) = random_linearly_separable_data(num_points, bounds) (weights, num_epochs) = logistic_gradient_descent(X, y, tol, eta, max_iter) E_out = cross_entropy_randomized_Eout(f, weights, bounds) all_epochs[i] = num_epochs all_E_out[i] = E_out print(i+1, num_epochs, E_out) return all_epochs.mean(), all_E_out.mean()
def test(num_points=100, tol=0.01, eta=0.01, max_iter=2000): (X, y, f) = random_linearly_separable_data(num_points, bounds) (w, num) = logistic_gradient_descent(X, y, tol, eta, max_iter) print(num, cross_entropy_randomized_Eout(f, w, bounds)) positives = X[np.where(y==1)] negatives = X[np.where(y==-1)] x_p = positives[:,0] y_p = positives[:,1] x_n = negatives[:,0] y_n = negatives[:,1] (w_x, w_y) = (bounds[:2], -np.array(bounds[:2])*w[1]/w[2] - w[0]/w[2]) (f_x, f_y) = (bounds[:2], -np.array(bounds[:2])*f[1]/f[2] - f[0]/f[2]) plt.plot(w_x, w_y, c='b') plt.plot(f_x, f_y, c='k') plt.scatter(x_p, y_p, c='b', marker='o') plt.scatter(x_n, y_n, c='r', marker='o') plt.xlim(bounds[:2]) plt.ylim(bounds[2:4]) plt.show()
def answers(): num_points = 10 num_experiments = 1000 times_svm_better = 0.0 total_num_vectors = 0.0 for i in range(num_experiments): (X, y, f) = random_linearly_separable_data(num_points, bounds) (w_svm, vectors) = linsep_svm(X, y) pla = PLA(bounds=bounds) pla.fit(X, y) w_pla = pla.weights E_svm = linear_randomized_Eout(f, w_svm, bounds) E_pla = linear_randomized_Eout(f, w_pla, bounds) if E_svm < E_pla: times_svm_better += 1 total_num_vectors += len(vectors) print("Number of points used: ", num_points) print("Proportion of times SVM beats PLA:", times_svm_better/num_experiments) print("Average number of support vectors:", total_num_vectors/num_experiments)