def setup(self):
        (X_train, y_train), _ = get_test_data()
        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train)

        self.ensemble = est.ensemble
    def test_not_fitted_classifier(self):
        (X_train, y_train), (X_test, y_test) = get_test_data()
        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)

        with pytest.raises(NotFittedError):
            est.predict_proba(X_test)

        with pytest.raises(NotFittedError):
            est.predict(X_test)
    def test_train_regression_regression(self):
        (X_train, y_train), (X_test, y_test) = get_test_data(
            classification=False)
        est = rgf.RegularizedGreedyForestRegressor(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train)

        y_pred = est.predict(X_train)

        fixture_name = get_fixture_path('rgf_regression_regression.npy')
        saved_preds = np.loadtxt(fixture_name)
        np.testing.assert_allclose(y_pred, saved_preds)
    def test_train_regression(self):
        (X_train, y_train), (X_test, y_test) = get_test_data(
            classification=False)
        est = rgf.RegularizedGreedyForestRegressor(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train)

        y_pred = est.predict(X_train)
        train_score = metrics.mean_squared_error(y_train, y_pred)
        assert train_score < 2

        y_pred = est.predict(X_test)
        test_score = metrics.mean_squared_error(y_test, y_pred)
        assert test_score < 2
    def test_classification_regression(self):
        (X_train, y_train), (X_test, y_test) = get_test_data()
        sample_weight = np.ones_like(y_train)
        sample_weight[y_train == 0] = 0.5

        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train, sample_weight)

        y_pred = est.predict(X_train)

        fixture_name = get_fixture_path('rgf_classification_regression.npy')
        saved_preds = np.loadtxt(fixture_name)
        np.testing.assert_allclose(y_pred, saved_preds)
    def test_train_regression_weighted(self):
        (X_train, y_train), (X_test, y_test) = get_test_data(
            classification=False)
        sample_weight = np.ones_like(y_train)
        sample_weight[y_train < 0] = 0.5

        est = rgf.RegularizedGreedyForestRegressor(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train, sample_weight)

        y_pred = est.predict(X_train)
        train_score = metrics.mean_squared_error(y_train, y_pred)
        assert train_score < 2

        y_pred = est.predict(X_test)
        test_score = metrics.mean_squared_error(y_test, y_pred)
        assert test_score < 2
    def test_train_classification(self):
        (X_train, y_train), (X_test, y_test) = get_test_data()
        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train)

        y_pred = est.predict(X_train)
        train_score = metrics.accuracy_score(y_train, y_pred)
        assert train_score > 0.75

        y_pred = est.predict(X_test)
        test_score = metrics.accuracy_score(y_test, y_pred)
        assert test_score > 0.75

        y_proba = est.predict_proba(X_test)
        test_score = metrics.roc_auc_score(y_test, y_proba)
        assert test_score > 0.75
    def test_pickle_classifier(self):
        (X_train, y_train), (X_test, y_test) = get_test_data()
        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train)

        y_pred = est.predict_proba(X_test)
        with tempfile.NamedTemporaryFile('wr') as tfile:
            file_name = tfile.name

        try:
            pickle.dump(est, open(file_name, 'wb'))
            unpickle_est = pickle.load(open(file_name, 'rb'))
            unpickle_y_pred = unpickle_est.predict_proba(X_test)
            np.testing.assert_allclose(y_pred, unpickle_y_pred)
        finally:
            os.remove(file_name)
    def test_train_classification_weighted(self):
        (X_train, y_train), (X_test, y_test) = get_test_data()
        sample_weight = np.ones_like(y_train)
        sample_weight[y_train == 0] = 0.5

        est = rgf.RegularizedGreedyForestClassifier(
            l2=0.01, max_leaf_nodes=500)
        est.fit(X_train, y_train, sample_weight)

        y_pred = est.predict(X_train)
        train_score = metrics.accuracy_score(y_train, y_pred)
        assert train_score > 0.75

        y_pred = est.predict(X_test)
        test_score = metrics.accuracy_score(y_test, y_pred)
        assert test_score > 0.75

        y_proba = est.predict_proba(X_test)
        test_score = metrics.roc_auc_score(y_test, y_proba)
        assert test_score > 0.75