def test_kmp_precomputed_dictionary(): n_samples = mult_dense.shape[0] cv = ShuffleSplit(n_samples, n_iterations=1, test_fraction=0.2, random_state=0) train, test = list(cv)[0] X_train, y_train = mult_dense[train], mult_target[train] X_test, y_test = mult_dense[test], mult_target[test] components = select_components(X_train, y_train, n_components=0.3, random_state=0) K_train = pairwise_kernels(X_train, components) kmp = KMPClassifier(metric="precomputed") kmp.fit(K_train, y_train) y_pred = kmp.predict(K_train) acc = np.mean(y_pred == y_train) assert_true(acc >= 0.75) K_test = pairwise_kernels(X_test, components) y_pred = kmp.predict(K_test) acc = np.mean(y_pred == y_test) assert_true(acc >= 0.63)
def fit_kmp(X_train, y_train, X_test, y_test, class_distrib, opts, random_state): components = select_components(X_train, y_train, n_components=opts.n_components, class_distrib=class_distrib, random_state=random_state) clf = KMPClassifier(n_nonzero_coefs=opts.n_nonzero_coefs, init_components=components, n_refit=opts.n_refit, estimator=Ridge(alpha=opts.alpha), X_val=X_test, y_val=y_test, metric=opts.metric, gamma=opts.gamma, degree=opts.degree, coef0=opts.coef0, scale=opts.scale, n_validate=opts.n_validate, epsilon=opts.epsilon, #score_func=f1_score, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(X_train, y_train) return clf
proportion_train=0.75, random_state=random_state) except KeyError: raise ValueError("Wrong dataset name!") print "X_train", X_train.shape print "X_test", X_test.shape # PCA view print "Computing PCA..." pca = RandomizedPCA(n_components=300) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) components_pca = select_components(X_train_pca, y_train, n_components=opts.n_components, class_distrib="balanced") print "Computing kernels (PCA view)..." K_pca_train = pairwise_kernels(X_train_pca, components_pca, metric="rbf", gamma=0.1) K_pca_test = pairwise_kernels(X_test_pca, components_pca, metric="rbf", gamma=0.1) # Regular view components = select_components(X_train, y_train, n_components=opts.n_components, class_distrib="balanced") print "Computing kernels (regular view)..." K_train = pairwise_kernels(X_train, components, metric="rbf", gamma=0.1)
clf_s = [] clf_kg = [] clf_kb = [] clf_ks = [] j = 0 for X_tr, y_tr, X_te, y_te in split_data(X_train, y_train, X_test, y_test, opts.n_folds, opts.cvtype, opts.force_cv): print "Fold", j # selected from datasets print "Selected components" components = select_components(X_tr, y_tr, opts.n_components, class_distrib=class_distrib, random_state=j) clf_s.append(fit_kmp(X_tr, y_tr, X_te, y_te, components, opt_dict, opts.regression, random_state=j)) # k-means global print "Global k-means" components = create_kmeans_comp(X_tr, y_tr, n_components=opts.n_components, class_distrib="global", random_state=j) clf_kg.append(fit_kmp(X_tr, y_tr, X_te, y_te, components, opt_dict, opts.regression, random_state=j)) if not opts.regression: # k-means balanced print "Balanced k-means"
def test_kmp_select_components_stratified(): components = select_components(mult_dense, mult_target, n_components=0.5, class_distrib="stratified", random_state=0) assert_equal(components.shape[0], mult_dense.shape[0]/2-1)
dataset, opts, random_state = parse_kmp() try: X_train, y_train, X_test, y_test = load_dataset(dataset, proportion_train=0.75, random_state=random_state) except KeyError: raise ValueError("Wrong dataset name!") print "X_train", X_train.shape print "X_test", X_test.shape class_distrib = "random" if opts.regression else "balanced" components = select_components(X_train, y_train, n_components=opts.n_components, class_distrib=class_distrib) print "Computing linear kernels..." linear_train = pairwise_kernels(X_train, components, metric="linear") linear_test = pairwise_kernels(X_test, components, metric="linear") print "Computing rbf kernels..." rbf_train = pairwise_kernels(X_train, components, metric="rbf", gamma=opts.gamma) rbf_test = pairwise_kernels(X_test, components, metric="rbf", gamma=opts.gamma) print "Computing polynomial kernels..." poly_train = pairwise_kernels(X_train, components, metric="poly", degree=opts.degree)