def test_model_n_trees_invalid():
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update({"n_trees": [42]})
    model = CascadeForestRegressor(**case_kwargs)
    with pytest.raises(ValueError) as excinfo:
        model._set_n_trees(0)
    assert "Invalid value for n_trees." in str(excinfo.value)
def test_model_n_trees_non_positive():
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update({"n_trees": 0})
    model = CascadeForestRegressor(**case_kwargs)
    with pytest.raises(ValueError) as excinfo:
        model._set_n_trees(0)
    assert "should be strictly positive." in str(excinfo.value)
Пример #3
0
def test_custom_base_estimator_invalid_n_splits():

    model = CascadeForestRegressor()
    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    with pytest.raises(ValueError) as excinfo:
        model.set_estimator(estimators, n_splits=1)
    assert "should be at least 2" in str(excinfo.value)
def test_model_n_trees_auto():
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update({"n_trees": "auto"})
    model = CascadeForestRegressor(**case_kwargs)

    n_trees = model._set_n_trees(0)
    assert n_trees == 100

    n_trees = model._set_n_trees(2)
    assert n_trees == 300

    n_trees = model._set_n_trees(10)
    assert n_trees == 500
def test_model_invalid_training_params(param):
    case_kwargs = copy.deepcopy(toy_kwargs)
    case_kwargs.update(param[1])

    model = CascadeForestRegressor(**case_kwargs)

    with pytest.raises(ValueError) as excinfo:
        model.fit(X_train, y_train)

    if param[0] == 0:
        assert "max_layers" in str(excinfo.value)
    elif param[0] == 1:
        assert "n_tolerant_rounds" in str(excinfo.value)
    elif param[0] == 2:
        assert "delta " in str(excinfo.value)
Пример #6
0
def test_custom_base_estimator_missing_predict():
    class tmp_estimator:
        def __init__(self):
            pass

        def fit(self, X, y):
            pass

    model = CascadeForestRegressor()
    with pytest.raises(AttributeError) as excinfo:
        model.set_estimator([tmp_estimator()])
    assert "The `predict` method" in str(excinfo.value)

    with pytest.raises(AttributeError) as excinfo:
        model.set_predictor(tmp_estimator())
    assert "The `predict` method of the predictor" in str(excinfo.value)
Пример #7
0
        records = []

        for idx, random_state in enumerate(random_states):

            msg = "Currently processing {} with trial {}..."
            print(msg.format(dataset, idx))

            model = CascadeForestRegressor(
                n_bins=n_bins,
                bin_subsample=bin_subsample,
                max_layers=max_layers,
                criterion=criterion,
                n_estimators=n_estimators,
                n_trees=n_trees,
                max_depth=max_depth,
                min_samples_leaf=min_samples_leaf,
                use_predictor=use_predictor,
                predictor=predictor,
                n_tolerant_rounds=n_tolerant_rounds,
                partial_mode=partial_mode,
                delta=delta,
                n_jobs=n_jobs,
                random_state=random_state,
                verbose=verbose,
            )

            tic = time.time()
            model.fit(X_train, y_train)
            toc = time.time()
            training_time = toc - tic

            tic = time.time()
def test_model_workflow_in_memory():
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})

    model = CascadeForestRegressor(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test).astype(np.float32)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test).astype(np.float32)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
def test_model_workflow_partial_mode():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})

    model = CascadeForestRegressor(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test).astype(np.float32)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test).astype(np.float32)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
def test_model_properties_after_fitting():
    """Check the model properties after fitting a deep forest model."""
    model = CascadeForestRegressor(**toy_kwargs)
    model.fit(X_train, y_train)

    assert len(model) == model.n_layers_

    assert model[0] is model._get_layer(0)

    with pytest.raises(ValueError) as excinfo:
        model._get_layer(model.n_layers_)
    assert "The layer index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_layer(0, None)
    assert "already exists in the internal container" in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        model._get_binner(model.n_layers_ + 1)
    assert "The binner index should be in the range" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        model._set_binner(0, None)
    assert "already exists in the internal container" in str(excinfo.value)
Пример #11
0
def test_regressor_custom_cascade_layer_workflow_in_memory(y_train):

    model = CascadeForestRegressor()

    n_estimators = 4
    estimators = [DecisionTreeRegressor() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeRegressor()
    model.set_predictor(predictor)

    model.fit(X_train_reg, y_train)
    y_pred_before = model.predict(X_test_reg)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestRegressor()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test_reg)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    assert (model.get_estimator(0, 0, "custom") is
            model._get_layer(0).estimators_["0-0-custom"].estimator_)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)