Exemplo n.º 1
0
    def test_fit_predict_strings(self):
        """ GENSVM_GRID: Test fit and predict with string targets """
        iris = load_iris()
        X = iris.data
        y = iris.target
        labels = iris.target_names
        yy = labels[y]
        X_train, X_test, y_train, y_test = train_test_split(X, yy)

        pg = {
            "p": [1, 1.5, 2.0],
            "kappa": [-0.9, 1.0],
            "lmd": [0.1, 1.0],
            "epsilon": [0.01, 0.002],
            "gamma": [1.0, 2.0],
            "weights": ["unit", "group"],
        }

        clf = GenSVMGridSearchCV(pg)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        pred_set = set(y_pred)
        label_set = set(labels)
        self.assertTrue(pred_set.issubset(label_set))
Exemplo n.º 2
0
    def test_gridsearch_stratified(self):
        """ GENSVM_GRID: Error on using shufflesplit """
        X, y = load_iris(return_X_y=True)

        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
        with self.assertRaises(ValueError):
            GenSVMGridSearchCV(param_grid="tiny", verbose=1, cv=cv)

        cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
        with self.assertRaises(ValueError):
            GenSVMGridSearchCV(param_grid="tiny", verbose=1, cv=cv)
Exemplo n.º 3
0
 def test_invalid_y(self):
     """ GENSVM_GRID: Check raises for invalid y type """
     pg = {"lmd": [1e-4, 100, 10000], "kernel": ["rbf"]}
     clf = GenSVMGridSearchCV(pg)
     X = np.random.random((20, 4))
     y = np.random.random((20, ))
     with self.assertRaises(ValueError) as err:
         clf.fit(X, y)
     exc = err.exception
     self.assertEqual(exc.args,
                      ("Label type not allowed for GenSVM: 'continuous'", ))
Exemplo n.º 4
0
    def test_params_rbf_kernel(self):
        """ GENSVM_GRID: Test best params with RBF kernel """
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pg = {"lmd": [1e-4, 100, 10000], "kernel": ["rbf"]}

        clf = GenSVMGridSearchCV(pg)
        clf.fit(X_train, y_train)

        self.assertTrue(hasattr(clf, "best_params_"))

        y_pred = clf.predict(X_test, trainX=X_train)
        del y_pred
Exemplo n.º 5
0
    def test_refit(self):
        """ GENSVM_GRID: Test refit """
        # we use the fact that large regularization parameters usually don't
        # give a good fit.
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pg = {"lmd": [1e-4, 100, 10000]}

        clf = GenSVMGridSearchCV(pg)
        clf.fit(X_train, y_train)

        self.assertTrue(hasattr(clf, "best_params_"))
        self.assertTrue(clf.best_params_ == {"lmd": 1e-4})
Exemplo n.º 6
0
    def test_multimetric(self):
        """ GENSVM_GRID: Test multimetric """
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pg = {"p": [1., 1.5, 2.]}

        clf = GenSVMGridSearchCV(pg,
                                 scoring=["accuracy", "adjusted_rand_score"],
                                 refit=False)
        clf.fit(X_train, y_train)

        self.assertTrue(clf.multimetric_)
        self.assertTrue("mean_test_accuracy" in clf.cv_results_)
        self.assertTrue("mean_test_adjusted_rand_score" in clf.cv_results_)
Exemplo n.º 7
0
    def test_gridsearch_full(self):
        """ GENSVM_GRID: Test with full grid """
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=123)

        full = load_grid_full()
        full["epsilon"] = [1e-5]
        clf = GenSVMGridSearchCV(param_grid=full)
        clf.fit(X_train, y_train)

        score = clf.score(X_test, y_test)
        # low threshold on purpose for testing on Travis
        # Real performance should be higher!
        self.assertGreaterEqual(score, 0.70)
Exemplo n.º 8
0
    def test_fit_score(self):
        """ GENSVM_GRID: Test fit and score """
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pg = {
            "p": [1, 1.5, 2.0],
            "kappa": [-0.9, 1.0, 5.0],
            "lmd": [pow(2, x) for x in range(-12, 9, 2)],
        }

        clf = GenSVMGridSearchCV(pg)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # low for safety
        self.assertGreaterEqual(score, 0.80)
Exemplo n.º 9
0
    def slowtest_gridsearch_warnings(self):
        """ GENSVM_GRID: Check grid search with warnings """
        np.random.seed(123)
        X, y = load_digits(n_class=4, return_X_y=True)
        small = {}
        for k in [1, 2, 3]:
            tmp = X[y == k, :]
            small[k] = tmp[np.random.choice(tmp.shape[0], 20), :]

        Xs = np.vstack((small[1], small[2], small[3]))
        ys = np.hstack((np.ones(20), 2 * np.ones(20), 3 * np.ones(20)))
        pg = {
            "p": [1.0, 2.0],
            "lmd": [pow(10, x) for x in range(-4, 1, 2)],
            "epsilon": [1e-6],
        }
        gg = GenSVMGridSearchCV(pg, verbose=True)
        gg.fit(Xs, ys)
Exemplo n.º 10
0
    def test_refit_multimetric(self):
        """ GENSVM_GRID: Test refit with multimetric """
        X, y = load_iris(return_X_y=True)
        X = maxabs_scale(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        pg = {"lmd": [1e-4, 100, 10000]}

        clf = GenSVMGridSearchCV(pg,
                                 scoring=["accuracy", "adjusted_rand_score"],
                                 refit="accuracy")
        clf.fit(X_train, y_train)

        self.assertTrue(hasattr(clf, "best_params_"))
        self.assertTrue(hasattr(clf, "best_estimator_"))
        self.assertTrue(hasattr(clf, "best_index_"))
        self.assertTrue(hasattr(clf, "best_score_"))
        self.assertTrue(clf.best_params_ == {"lmd": 1e-4})