예제 #1
0
    def test_pandas_input(self):
        # check cross_val_score doesn't destroy pandas dataframe
        types = [(MockDataFrame, MockDataFrame)]
        try:
            from pandas import Series, DataFrame

            types.append((DataFrame, Series))
        except ImportError:
            pass

        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        for InputFeatureType, TargetType in types:
            # X dataframe, y series
            X_df, y_ser = InputFeatureType(X), TargetType(y)
            clf = CheckingClassifier(
                check_X=lambda x: isinstance(x, InputFeatureType),
                check_y=lambda x: isinstance(x, TargetType),
            )

            grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]})
            grid_search.fit(X_df, y_ser).score(X_df, y_ser)
            grid_search.predict(X_df)
            self.assertTrue(hasattr(grid_search, "cv_results_"))
예제 #2
0
    def test_grid_search_sparse_scoring(self):
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]}, scoring="f1")
        cv.fit(X_[:180], y_[:180])
        y_pred = cv.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]}, scoring="f1")
        cv.fit(X_[:180], y_[:180])
        y_pred2 = cv.predict(X_[180:])
        C2 = cv.best_estimator_.C

        assert_array_equal(y_pred, y_pred2)
        self.assertEqual(C, C2)

        # test loss where greater is worse
        def f1_loss(y_true_, y_pred_):
            return -f1_score(y_true_, y_pred_)

        F1Loss = make_scorer(f1_loss, greater_is_better=False)
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]}, scoring=F1Loss)
        cv.fit(X_[:180], y_[:180])
        y_pred3 = cv.predict(X_[180:])
        C3 = cv.best_estimator_.C

        self.assertEqual(C, C3)
        assert_array_equal(y_pred, y_pred3)
예제 #3
0
    def test_diabetes(self):
        # load the diabetes datasets
        dataset = datasets.load_diabetes()
        X = dataset.data
        y = dataset.target
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=0)
        # prepare a range of alpha values to test
        alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0])
        param_grid = dict(alpha=alphas)
        # create and fit a ridge regression model, testing each alpha
        model = linear_model.Ridge()

        tune_search = TuneGridSearchCV(
            model,
            param_grid,
        )
        tune_search.fit(X_train, y_train)

        pred = tune_search.predict(X_test)
        print(pred)
        error = sum(np.array(pred) - np.array(y_test)) / len(pred)
        print(error)
예제 #4
0
    def test_digits(self):
        # Loading the Digits dataset
        digits = datasets.load_digits()

        # To apply an classifier on this data, we need to flatten the image, to
        # turn the data in a (samples, feature) matrix:
        n_samples = len(digits.images)
        X = digits.images.reshape((n_samples, -1))
        y = digits.target

        # Split the dataset in two equal parts
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=0)

        # Set the parameters by cross-validation
        tuned_parameters = {
            "kernel": ["rbf"],
            "gamma": [1e-3, 1e-4],
            "C": [1, 10, 100, 1000]
        }

        tune_search = TuneGridSearchCV(SVC(), tuned_parameters, max_iters=20)
        tune_search.fit(X_train, y_train)

        pred = tune_search.predict(X_test)
        print(pred)
        accuracy = np.count_nonzero(
            np.array(pred) == np.array(y_test)) / len(pred)
        print(accuracy)
예제 #5
0
    def test_grid_search_precomputed_kernel(self):
        # Test that grid search works when the input features are given in the
        # form of a precomputed kernel matrix
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        # compute the training kernel matrix corresponding to the linear kernel
        K_train = np.dot(X_[:180], X_[:180].T)
        y_train = y_[:180]

        clf = SVC(kernel="precomputed")
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(K_train, y_train)

        self.assertTrue(cv.best_score_ >= 0)

        # compute the test kernel matrix
        K_test = np.dot(X_[180:], X_[:180].T)
        y_test = y_[180:]

        y_pred = cv.predict(K_test)

        self.assertTrue(np.mean(y_pred == y_test) >= 0)

        # test error is raised when the precomputed kernel is not array-like
        # or sparse
        with self.assertRaises(TuneError):
            cv.fit(K_train.tolist(), y_train)
예제 #6
0
    def test_grid_search_sparse(self):
        # Test that grid search works with both dense and sparse matrices
        X_, y_ = make_classification(
            n_samples=200, n_features=100, random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(X_[:180], y_[:180])
        y_pred = cv.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(X_[:180].tocoo(), y_[:180])
        y_pred2 = cv.predict(X_[180:])
        C2 = cv.best_estimator_.C

        self.assertTrue(np.mean(y_pred == y_pred2) >= 0.9)
        self.assertEqual(C, C2)
this will require the estimator to have `partial_fit`, but
we use sklearn's `warm_start` parameter to do this here.
We fit the estimator for one epoch, then `warm_start`
to pick up from where we left off, continuing until the
trial is early stopped or `max_iters` is reached.
"""

from tune_sklearn import TuneGridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import numpy as np

x, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)

clf = RandomForestClassifier()
parameter_grid = {"min_samples_split": [2, 3, 4]}

tune_search = TuneGridSearchCV(
    clf,
    parameter_grid,
    early_stopping=True,
    max_iters=20,
)
tune_search.fit(x_train, y_train)

pred = tune_search.predict(x_test)
accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred)
print(accuracy)