def test_model_n_trees_invalid(): case_kwargs = copy.deepcopy(toy_kwargs) case_kwargs.update({"n_trees": [42]}) model = CascadeForestRegressor(**case_kwargs) with pytest.raises(ValueError) as excinfo: model._set_n_trees(0) assert "Invalid value for n_trees." in str(excinfo.value)
def test_model_n_trees_non_positive(): case_kwargs = copy.deepcopy(toy_kwargs) case_kwargs.update({"n_trees": 0}) model = CascadeForestRegressor(**case_kwargs) with pytest.raises(ValueError) as excinfo: model._set_n_trees(0) assert "should be strictly positive." in str(excinfo.value)
def test_custom_base_estimator_invalid_n_splits(): model = CascadeForestRegressor() n_estimators = 4 estimators = [DecisionTreeClassifier() for _ in range(n_estimators)] with pytest.raises(ValueError) as excinfo: model.set_estimator(estimators, n_splits=1) assert "should be at least 2" in str(excinfo.value)
def test_model_n_trees_auto(): case_kwargs = copy.deepcopy(toy_kwargs) case_kwargs.update({"n_trees": "auto"}) model = CascadeForestRegressor(**case_kwargs) n_trees = model._set_n_trees(0) assert n_trees == 100 n_trees = model._set_n_trees(2) assert n_trees == 300 n_trees = model._set_n_trees(10) assert n_trees == 500
def test_model_invalid_training_params(param): case_kwargs = copy.deepcopy(toy_kwargs) case_kwargs.update(param[1]) model = CascadeForestRegressor(**case_kwargs) with pytest.raises(ValueError) as excinfo: model.fit(X_train, y_train) if param[0] == 0: assert "max_layers" in str(excinfo.value) elif param[0] == 1: assert "n_tolerant_rounds" in str(excinfo.value) elif param[0] == 2: assert "delta " in str(excinfo.value)
def test_custom_base_estimator_missing_predict(): class tmp_estimator: def __init__(self): pass def fit(self, X, y): pass model = CascadeForestRegressor() with pytest.raises(AttributeError) as excinfo: model.set_estimator([tmp_estimator()]) assert "The `predict` method" in str(excinfo.value) with pytest.raises(AttributeError) as excinfo: model.set_predictor(tmp_estimator()) assert "The `predict` method of the predictor" in str(excinfo.value)
records = [] for idx, random_state in enumerate(random_states): msg = "Currently processing {} with trial {}..." print(msg.format(dataset, idx)) model = CascadeForestRegressor( n_bins=n_bins, bin_subsample=bin_subsample, max_layers=max_layers, criterion=criterion, n_estimators=n_estimators, n_trees=n_trees, max_depth=max_depth, min_samples_leaf=min_samples_leaf, use_predictor=use_predictor, predictor=predictor, n_tolerant_rounds=n_tolerant_rounds, partial_mode=partial_mode, delta=delta, n_jobs=n_jobs, random_state=random_state, verbose=verbose, ) tic = time.time() model.fit(X_train, y_train) toc = time.time() training_time = toc - tic tic = time.time()
def test_model_workflow_in_memory(): """Run the workflow of deep forest with in-memory mode.""" case_kwargs = copy.deepcopy(kwargs) case_kwargs.update({"partial_mode": False}) model = CascadeForestRegressor(**case_kwargs) model.fit(X_train, y_train) # Predictions before saving y_pred_before = model.predict(X_test).astype(np.float32) # Save and Reload model.save(save_dir) model = CascadeForestRegressor(**case_kwargs) model.load(save_dir) # Make sure the same predictions before and after model serialization y_pred_after = model.predict(X_test).astype(np.float32) assert_array_equal(y_pred_before, y_pred_after) shutil.rmtree(save_dir)
def test_model_workflow_partial_mode(): """Run the workflow of deep forest with a local buffer.""" case_kwargs = copy.deepcopy(kwargs) case_kwargs.update({"partial_mode": True}) model = CascadeForestRegressor(**case_kwargs) model.fit(X_train, y_train) # Predictions before saving y_pred_before = model.predict(X_test).astype(np.float32) # Save and Reload model.save(save_dir) model = CascadeForestRegressor(**case_kwargs) model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test).astype(np.float32) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) model.clean() # clear the buffer shutil.rmtree(save_dir)
def test_model_properties_after_fitting(): """Check the model properties after fitting a deep forest model.""" model = CascadeForestRegressor(**toy_kwargs) model.fit(X_train, y_train) assert len(model) == model.n_layers_ assert model[0] is model._get_layer(0) with pytest.raises(ValueError) as excinfo: model._get_layer(model.n_layers_) assert "The layer index should be in the range" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: model._set_layer(0, None) assert "already exists in the internal container" in str(excinfo.value) with pytest.raises(ValueError) as excinfo: model._get_binner(model.n_layers_ + 1) assert "The binner index should be in the range" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: model._set_binner(0, None) assert "already exists in the internal container" in str(excinfo.value)
def test_regressor_custom_cascade_layer_workflow_in_memory(y_train): model = CascadeForestRegressor() n_estimators = 4 estimators = [DecisionTreeRegressor() for _ in range(n_estimators)] model.set_estimator(estimators) # set custom base estimators predictor = DecisionTreeRegressor() model.set_predictor(predictor) model.fit(X_train_reg, y_train) y_pred_before = model.predict(X_test_reg) # Save and Reload model.save(save_dir) model = CascadeForestRegressor() model.load(save_dir) # Predictions after loading y_pred_after = model.predict(X_test_reg) # Make sure the same predictions before and after model serialization assert_array_equal(y_pred_before, y_pred_after) assert (model.get_estimator(0, 0, "custom") is model._get_layer(0).estimators_["0-0-custom"].estimator_) model.clean() # clear the buffer shutil.rmtree(save_dir)