def rbf_answers(k=9, gamma=1.5): num_experiments = 500 num_points = 100 bounds = [-1, 1, -1, 1] num_kernel_beat_reg = 0.0 not_linsep = 0.0 num_rbf_linsep = 0.0 sum_E_in_svm = 0.0 sum_E_out_svm = 0.0 sum_E_in_rbf = 0.0 sum_E_out_rbf = 0.0 for experiment in range(num_experiments): #print experiment + 1, "/", num_experiments X = random_plane_points(num_points, bounds) y = target_fn(X) svm_clf = svm.SVC(kernel='rbf', gamma=gamma, C=10**10) svm_clf.fit(X, y) rbf_clf = rbfClassifier(k, gamma, bounds, bias=True) rbf_clf.fit(X, y) E_in_svm = np.where(svm_clf.predict(X) != y, 1.0, 0.0).mean() E_in_rbf = np.where(rbf_clf.predict(X) != y, 1.0, 0.0).mean() X_test = random_plane_points(10000, bounds) y_test = target_fn(X_test) E_out_svm = np.where(svm_clf.predict(X_test) != y_test, 1.0, 0.0).mean() E_out_rbf = np.where(rbf_clf.predict(X_test) != y_test, 1.0, 0.0).mean() # Statistics if E_in_svm != 0: # Throw out run not_linsep += 1 continue if E_in_rbf == 0: num_rbf_linsep += 1 if E_out_svm < E_out_rbf: num_kernel_beat_reg += 1 sum_E_in_svm += E_in_svm sum_E_out_svm += E_out_svm sum_E_in_rbf += E_in_rbf sum_E_out_rbf += E_out_rbf good_runs = num_experiments - not_linsep print("k =", k, "gamma =", gamma) print("Not linearly separable: ", not_linsep/num_experiments) print("Kernel beats regular: ", num_kernel_beat_reg/good_runs) print("Regular linearly separable: ", num_rbf_linsep/good_runs) print("Average value of E_in_svm: ", sum_E_in_svm/good_runs) print("Average value of E_out_svm: ", sum_E_out_svm/good_runs) print("Average value of E_in_rbf: ", sum_E_in_rbf/good_runs) print("Average value of E_out_rbf: ", sum_E_out_rbf/good_runs)
def test_random_gens(): for dimension in range(2, 5): bounds = datagen.unit_bounds(dimension) random_points = datagen.random_plane_points(4, bounds) assert np.array_equal(random_points.shape, (4, dimension)) random_line = datagen.random_hyperplane(bounds) assert len(random_line) == dimension + 1
def test_all_dichotomies(num_points, max_iter, dichotomies, pla): X = random_plane_points(num_points, pla.bounds) passed = True for i, dichotomy in enumerate(dichotomies): pla.fit(X, dichotomy, maxiter=max_iter) if pla.num_iters == max_iter: #print dichotomy, np.sign(np.dot(pla.X, pla.weights)), pla.num_iters passed = False break return passed
def cluster_centers(X, k, bounds): """Returns mu for k-means on X.""" mu = random_plane_points(k, bounds) kmeans = sklearn.cluster.KMeans(init=mu) kmeans.fit(X) # Restart if a mean has no associated points if len(set(kmeans.labels_)) < k: return cluster_centers(X, k, bounds) return kmeans.cluster_centers_
def cross_entropy_randomized_Eout(f, g, bounds, N=10000): """Computes cross entropy out of sample error for linear model. The cross entropy error is similar to the linear_Eout error, but instead of simply taking the proportion of wrong labels, we use cross entropy to get an error. Args: f, g: two lines defined by weights (with bias term) bounds: bounds used to generate random points N: number of points to generate Returns: Cross entropy error from g to f. """ X = np.hstack([np.ones((N, 1)), random_plane_points(N, bounds)]) labels_f = np.sign(np.dot(X, f)) return cross_entropy_error(X, labels_f, g)
def linear_randomized_Eout(f, g, bounds, N=10000): """Computes out of sample error for two linear weights. This function compute the out of sample error E_out by generating N random points in the given bounds, and finding the percentage difference in labels from the two lines defined by f and g. Args: f, g: two lines defined by weights (with bias term) bounds: bounds used to generate random points N: number of points to generate Returns: Proportion of generated points whose labels differ from g to f. """ X = np.hstack([np.ones((N, 1)), random_plane_points(N, bounds)]) labels_f = np.sign(np.dot(X, f)) labels_g = np.sign(np.dot(X, g)) return np.where(labels_f==labels_g, 0, 1.0).mean()