start = time.time() y_pred = clf.predict(X_test) return np.mean(y_test == y_pred), time.time() - start try: dataset = sys.argv[1] except: dataset = "usps0" try: kernel = sys.argv[2] except: kernel = "rbf" try: X_train, y_train, X_test, y_test = load_dataset(dataset) except KeyError: raise ValueError("Wrong dataset name!") Nu = np.linspace(0.01, 0.15, 10) res = [fit_nusvc(X_train, y_train, nu=nu, kernel=kernel) for nu in Nu] clfs, train_times = zip(*res) res = [predict(clf, X_test, y_test) for clf in clfs] accuracies, test_times = zip(*res) n_samples = X_train.shape[0] pl.figure() pl.plot(Nu, [np.mean(clf.n_support_) for clf in clfs]) pl.xlabel("nu")
def parse_kmp(n_nonzero_coefs=200, n_components=0.5, metric="rbf", gamma=0.1, degree=4, coef0=1.0, epsilon=0.0, n_validate=5, n_refit=5, scale=False, scale_y=False, check_duplicates=False, force_cv=False, cvtype="random", bars=False, savefig=""): op = OptionParser() op.add_option("--n_folds", action="store", default=5, dest="n_folds", type="int") op.add_option("-n", action="store", default=n_nonzero_coefs, dest="n_nonzero_coefs", type="float") op.add_option("--n_components", action="store", default=n_components, dest="n_components", type="float") op.add_option("--metric", action="store", default=metric, dest="metric", type="str") op.add_option("--gamma", action="store", default=gamma, dest="gamma", type="float") op.add_option("--degree", action="store", default=degree, dest="degree", type="int") op.add_option("--coef0", action="store", default=coef0, dest="coef0", type="float") op.add_option("--epsilon", action="store", default=epsilon, dest="epsilon", type="float") op.add_option("--n_validate", action="store", default=n_validate, dest="n_validate", type="int") op.add_option("--n_refit", action="store", default=n_refit, dest="n_refit", type="int") op.add_option("--scale", action="store_true", default=scale, dest="scale") op.add_option("--scale_y", action="store_true", default=scale_y, dest="scale_y") op.add_option("--check_duplicates", action="store_true", default=check_duplicates, dest="check_duplicates") op.add_option("--regression", action="store_true", default=scale, dest="regression") op.add_option("--force_cv", action="store_true", default=force_cv, dest="force_cv") op.add_option("--cvtype", action="store", default=cvtype, dest="cvtype", type="str") op.add_option("--bars", action="store_true", default=bars, dest="bars") op.add_option("--savefig", action="store", default=savefig, dest="savefig", type="str") (opts, args) = op.parse_args() try: dataset = args[0] except: dataset = "usps" try: X_train, y_train, X_test, y_test = load_dataset(dataset) print "X_train", X_train.shape if X_test is not None: print "X_test", X_test.shape return X_train, y_train, X_test, y_test, opts, args except KeyError: raise ValueError("Wrong dataset name!")
scale=True, scale_y=opts.scale_y, check_duplicates=opts.check_duplicates, n_validate=opts.n_validate, epsilon=opts.epsilon, verbose=1, random_state=random_state, n_jobs=-1) clf.fit(K_train, y_train) return clf dataset, opts, random_state = parse_kmp() try: X_train, y_train, X_test, y_test = load_dataset(dataset, proportion_train=0.75, random_state=random_state) except KeyError: raise ValueError("Wrong dataset name!") print "X_train", X_train.shape print "X_test", X_test.shape # PCA view print "Computing PCA..." pca = RandomizedPCA(n_components=300) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) components_pca = select_components(X_train_pca, y_train, n_components=opts.n_components,