def real_fitted_model(data, features): """ This fixture really fits the model and it takes time. Comment this and all dependant tests if you don't want to run real training. """ X = data[features] model = LightGBMWrapper() model.fit(X, data[TARGET_COLUMN]) return model
def test_wrapper_fit(data, features): X = data[features] model = LightGBMWrapper() # patch real training of lightgbm because it is time consuming with patch("lightgbm.train") as m: m.return_value = "mocked_value" model.fit(X, data[TARGET_COLUMN]) assert model.categorical_features_extended is not None assert model.model == "mocked_value"
def test_custom_lgb_params_applied(): model = LightGBMWrapper( lgb_params={ "objective": "binary", "boosting": "gbdt", "metric": "auc" }, lgb_training_params={ "num_boost_round": 50, "early_stopping_rounds": 10 }, metrics={"auc": { "function": roc_auc_score, "use_proba": True }}, ) assert model.lgb_params == { "objective": "binary", "boosting": "gbdt", "metric": "auc", } assert model.lgb_training_params == { "num_boost_round": 50, "early_stopping_rounds": 10, } assert model.metrics == { "auc": { "function": roc_auc_score, "use_proba": True } }
def test_evaluate(): model = LightGBMWrapper() # patch real predictions with patch("autoboosting.auto_estimator.LightGBMWrapper.predict" ) as pr, patch( "autoboosting.auto_estimator.LightGBMWrapper.predict_proba" ) as pr_pr: fake_ys = np.array([1, 0, 0, 1]) pr.return_value = fake_ys pr_pr.return_value = np.array([0.8, 0.2, 0.1, 0.9]) metrics = model.evaluate(pd.DataFrame(), fake_ys) assert metrics["f1_score"] == 1.0
def train_model( path: pathlib.Path, filename: str, output_path: pathlib.Path, ) -> None: path_to_train = path / filename df = pd.read_csv(path_to_train, index_col=None) feature_columns = [ i for i in df.columns if i not in (TARGET_COLUMN, ID_COLUMN) ] model = LightGBMWrapper() model.fit(df[feature_columns], df[TARGET_COLUMN]) output_path = output_path / "classifier" print(output_path)
def lightgbm_wrapper_cross_val_score( lgb_wrapper_params, X: pd.DataFrame, y: np.ndarray, random_state: Optional[int] = None, cv: int = 2, ): kfold = StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state) metrics_agg: Dict[str, List[float]] = defaultdict(list) for train, test in kfold.split(X, y): estimator = LightGBMWrapper(**lgb_wrapper_params) estimator.fit(X.iloc[train], y[train]) metrics = estimator.evaluate(X.iloc[test], y[test]) for k, v in metrics.items(): metrics_agg[k].append(v) return {k: np.mean(v) for k, v in metrics_agg.items()}
def test_non_fitted_model_exception(): model = LightGBMWrapper() with pytest.raises(ModelNotFittedException): model.predict(pd.DataFrame())