def test_kmp_precomputed_dictionary(): n_samples = mult_dense.shape[0] cv = ShuffleSplit(n_samples, n_iterations=1, test_fraction=0.2, random_state=0) train, test = list(cv)[0] X_train, y_train = mult_dense[train], mult_target[train] X_test, y_test = mult_dense[test], mult_target[test] components = select_components(X_train, y_train, n_components=0.3, random_state=0) K_train = pairwise_kernels(X_train, components) kmp = KMPClassifier(metric="precomputed") kmp.fit(K_train, y_train) y_pred = kmp.predict(K_train) acc = np.mean(y_pred == y_train) assert_true(acc >= 0.75) K_test = pairwise_kernels(X_test, components) y_pred = kmp.predict(K_test) acc = np.mean(y_pred == y_test) assert_true(acc >= 0.63)
def test_kmp_validation(): random_state = check_random_state(0) perm = random_state.permutation(200) X = bin_dense[perm] y = bin_target[perm] X_train = X[:100] y_train = y[:100] X_test = X[100:150] y_test = y[100:150] X_val = X[150:] y_val = y[150:] kmp = KMPClassifier(n_nonzero_coefs=1.0, n_components=0.5, n_refit=5, estimator=Ridge(alpha=1.0), metric="linear", X_val=X_val, y_val=y_val, random_state=0) kmp.fit(X_train, y_train) assert_almost_equal(kmp.validation_scores_[-1], 0.56, decimal=2) n_scores = len(kmp.validation_scores_) # early stopping kmp.epsilon = 0.001 kmp.fit(X_train, y_train) assert_true(kmp.validation_scores_.shape[0] < n_scores)
def test_kmp_fit_multiclass(): for metric, acc in (("rbf", 0.796), ("linear", 0.803), ("poly", 0.836)): kmp = KMPClassifier(n_nonzero_coefs=4.0/5, n_components=0.5, n_refit=10, metric=metric, random_state=0) kmp.fit(mult_dense, mult_target) y_pred = kmp.predict(mult_dense) assert_almost_equal(np.mean(mult_target == y_pred), acc, decimal=2)
def test_kmp_init_components(): random_state = check_random_state(0) perm = random_state.permutation(200) components = bin_dense[perm[:20]] kmp = KMPClassifier(init_components=components, n_components=0.5, n_refit=0, metric="linear", random_state=0) kmp.fit(bin_dense, bin_target) assert_true(kmp.components_.shape[0] < components.shape[0])
def test_kmp_fit_binary_backfitting(): for metric, acc in (("rbf", 0.723), ("linear", 0.954), ("poly", 0.724)): kmp = KMPClassifier(n_nonzero_coefs=1.0, n_components=0.5, n_refit=1, metric=metric, random_state=0) kmp.fit(bin_dense, bin_target) assert_equal(kmp.components_.shape[1], bin_dense.shape[0] / 2) y_pred = kmp.predict(bin_dense) assert_almost_equal(np.mean(bin_target == y_pred), acc, decimal=2)
def test_kmp_squared_loss(): kmp = KMPClassifier(n_nonzero_coefs=4.0/5, n_components=0.5, n_refit=5, estimator=Ridge(alpha=1.0), metric="linear", random_state=0) kmp.fit(bin_dense, bin_target) y_pred = kmp.decision_function(bin_dense) kmp.loss = "squared" kmp.fit(bin_dense, bin_target) y_pred2 = kmp.decision_function(bin_dense) assert_array_almost_equal(y_pred, y_pred2)
def fit_kmp(K_train, y_train, K_test, y_test, opts, random_state): clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs, n_refit=opts.n_refit, estimator=Ridge(alpha=opts.alpha), X_val=K_test, y_val=y_test, metric="precomputed", scale=True, scale_y=opts.scale_y, check_duplicates=opts.check_duplicates, n_validate=opts.n_validate, epsilon=opts.epsilon, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(K_train, y_train) return clf
def fit_kmp(X_train, y_train, X_test, y_test, class_distrib, opts, random_state): components = select_components(X_train, y_train, n_components=opts.n_components, class_distrib=class_distrib, random_state=random_state) clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs, init_components=components, n_refit=opts.n_refit, estimator=Ridge(alpha=opts.alpha), X_val=X_test, y_val=y_test, metric=opts.metric, gamma=opts.gamma, degree=opts.degree, coef0=opts.coef0, scale=opts.scale, n_validate=opts.n_validate, epsilon=opts.epsilon, #score_func=f1_score, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf
try: color = int(sys.argv[1]) except: color = True try: surface = int(sys.argv[2]) except: surface = False X1, y1, X2, y2 = gen_non_lin_separable_data() X_train, y_train = split_train(X1, y1, X2, y2) X_test, y_test = split_test(X1, y1, X2, y2) clf = KMPClassifier(n_nonzero_coefs=0.3, n_components=1.0, metric="rbf", gamma=0.1, n_refit=1, estimator=Ridge(alpha=0.01), random_state=random_state) clf.fit(X_train, y_train) y_predict = clf.predict(X_test) correct = np.sum(y_predict == y_test) print "%d out of %d predictions correct" % (correct, len(y_predict)) plot_contour(X_train, clf, color, surface)