def _fit(self, trava_model: TravaModel, X, y, fit_params: dict, predict_params: dict): """ If you want to control the fit process """ trava_model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params)
def _fit(self, trava_model: TravaModel, X, y, fit_params: dict, predict_params: dict): if not self._is_raw_model_ready: trava_model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params) for group_model in self._group_models: if group_model != trava_model: trava_model.copy(existing_model=group_model, only_fit=True) self._is_raw_model_ready = True
def test_predict(mocker, raw_model, model_id, X, y, fit_params, needs_proba): if needs_proba: predict_proba = mocker.Mock() raw_model.predict_proba.return_value = predict_proba y_pred = mocker.Mock() raw_model.predict.return_value = y_pred model = TravaModel(raw_model=raw_model, model_id=model_id) model.predict(X=X, y=y) raw_model.predict.assert_called_once_with(X) if needs_proba: raw_model.predict_proba.assert_called_with(X) assert model.predict_time
def _models_configs( self, raw_model, config: FitPredictConfig ) -> List[Tuple[TravaModel, FitPredictConfig]]: """ If you want to run multiple fits on the same raw_model, just configure TravaModels and configs for them in your subclass. """ return [(TravaModel(raw_model=raw_model, model_id=config.model_id), config)]
def __call__(self, trava_model: TravaModel, for_train: bool, X, X_raw, y, **kwargs): if self._requires_raw_model and not trava_model.raw_model(): raise Exception("Cannot perform eval on model {} " "because it was unloaded.".format(trava_model.model_id)) if self._requires_X_y and (X is None or X_raw is None or y is None): raise Exception( "Cannot perform eval on model {} " "because data is required and was unloaded.".format(trava_model.model_id) ) return self._scorer( model=trava_model.get_model(for_train=for_train), model_info=trava_model, for_train=for_train, X=X, X_raw=X_raw, y=y, **kwargs )
def test_is_classification(mocker, raw_model, model_id, X, y, fit_params, is_classification): predict_proba = mocker.Mock() if is_classification: raw_model.predict_proba.return_value = predict_proba else: raw_model.predict_proba = None model = TravaModel(raw_model=raw_model, model_id=model_id) if is_classification: assert model.is_classification_model else: assert not model.is_classification_model
def test_get_model_unload(mocker, raw_model, for_train): trava_model = TravaModel(raw_model=raw_model, model_id=model_id) trava_model.unload_model() with pytest.raises(ValueError): trava_model.get_model(for_train=for_train) if for_train: y_pred_key = "_y_train_pred" else: y_pred_key = "_y_test_pred" y_pred_mock = mocker.Mock() mocker.patch.object(trava_model, y_pred_key, y_pred_mock) assert trava_model.get_model(for_train=for_train).predict(X=None) == y_pred_mock
def _models_configs( self, raw_model, config: FitPredictConfig ) -> List[Tuple[TravaModel, FitPredictConfig]]: split_result = config.raw_split_data assert split_result unique_groups = sorted( set(split_result.X_train[self._group_col_name].values)) result = [] for group in unique_groups: model_config = self._config_for_group(group=group, config=config) group_model_id = model_config.model_id + "_" + str(group) trava_model = TravaModel(raw_model=raw_model, model_id=group_model_id) result.append((trava_model, model_config)) return result
def _models_configs( self, raw_model, config: FitPredictConfig ) -> List[Tuple[TravaModel, FitPredictConfig]]: result = [] X = self._raw_dataset.X y = self._raw_dataset.y X_cleaned = X.drop(self._ignore_cols, axis=1) for fold_idx, (train_indices, test_indices) in enumerate( self._cv.split(X=X_cleaned, y=y, groups=self._groups)): X_train, y_train = X_cleaned.iloc[train_indices], y.iloc[ train_indices] X_test, y_test = X_cleaned.iloc[test_indices], y.iloc[test_indices] split_result = SplitResult(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test) fold_model_id = config.model_id + "_fold_{}".format(fold_idx + 1) model_config = FitPredictConfig( raw_split_data=split_result, raw_model=config.raw_model, model_init_params=config.model_init_params, model_id=fold_model_id, scorers_providers=config.scorers_providers, serializer=config.serializer, fit_params=config.fit_params, predict_params=config.predict_params, ) trava_model = TravaModel(raw_model=raw_model, model_id=fold_model_id) result.append((trava_model, model_config)) return result
def test_raw_model(mocker, raw_model, model_id): model = TravaModel(raw_model=raw_model, model_id=model_id) assert model.raw_model == raw_model
def test_copy(mocker, model_id, use_existing_model, only_fit): raw_model = mocker.Mock() model = TravaModel(raw_model=raw_model, model_id=model_id) y_train = np.array([0, 0, 1]) y_train_pred = np.array([1, 2, 3]) y_train_pred_proba = np.array([3, 4, 5]) y_test = np.array([-2, 3, 5]) y_test_pred = np.array([6, 7, 8]) y_test_pred_proba = np.array([9, 10, 11]) fit_params = {"1": 2} predict_params = {"2": 3} fit_time = 123 predict_time = 434 model._y_train = y_train model._y_train_pred = y_train_pred model._y_train_pred_proba = y_train_pred_proba model._y_test = y_test model._y_test_pred = y_test_pred model._y_test_pred_proba = y_test_pred_proba model._fit_params = fit_params model._predict_params = predict_params model._fit_time = fit_time model._predict_time = predict_time model_copy_id = model_id + "_copy" existing_model = None existing_model_id = "existing_model" # what a mess... but should work if use_existing_model: existing_model = TravaModel(raw_model=raw_model, model_id=existing_model_id) model_copy = model.copy(existing_model=existing_model, only_fit=only_fit) else: model_copy = model.copy(model_id=model_copy_id, only_fit=only_fit) if use_existing_model: assert model_copy.model_id == existing_model_id else: assert model_copy.model_id == model_copy_id y = model.y(for_train=True) copy_y = model_copy.y(for_train=True) assert np.array_equal(y, copy_y) assert model.fit_params == model_copy.fit_params assert model.fit_time == model_copy.fit_time if use_existing_model: assert existing_model == model_copy else: assert existing_model != model_copy if only_fit: assert model_copy.y(for_train=False) is None assert model_copy.y_pred(for_train=False) is None assert model_copy.y_pred_proba(for_train=False) is None assert model_copy.predict_time is None assert model_copy.predict_params == {} else: assert np.array_equal(model.y(for_train=False), model_copy.y(for_train=False)) assert np.array_equal(model.y_pred(for_train=False), model_copy.y_pred(for_train=False)) assert np.array_equal(model.y_pred_proba(for_train=False), model_copy.y_pred_proba(for_train=False)) assert model.predict_time == model_copy.predict_time assert model.predict_params == model_copy.predict_params
def test_unload(mocker, model_id): raw_model = mocker.Mock() model = TravaModel(raw_model=raw_model, model_id=model_id) model.unload_model() assert not model.raw_model
def test_all_y(mocker, raw_model, model_id, X, y, fit_params, predict_params, needs_proba): model = TravaModel(raw_model=raw_model, model_id=model_id) predict_proba_train = mocker.Mock() if needs_proba: raw_model.predict_proba.return_value = predict_proba_train y_pred_train = mocker.Mock() raw_model.predict.return_value = y_pred_train model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params) predict_proba_test = mocker.Mock() if needs_proba: raw_model.predict_proba.return_value = predict_proba_test y_pred_test = mocker.Mock() raw_model.predict.return_value = y_pred_test X_test = mocker.Mock() y_test = mocker.Mock() model.predict(X=X_test, y=y_test) assert model.y_pred(for_train=True) == y_pred_train assert model.y_pred(for_train=False) == y_pred_test assert model.y(for_train=True) == y assert model.y(for_train=False) == y_test if needs_proba: assert model.y_pred_proba(for_train=True) == predict_proba_train assert model.y_pred_proba(for_train=False) == predict_proba_test
def _predict(self, trava_model: TravaModel, X, y): """ If you want to control the predict process """ trava_model.predict(X=X, y=y)
def test_predict_time(mocker, raw_model, model_id, X, y): model = TravaModel(raw_model=raw_model, model_id=model_id) assert not model.predict_time model.predict(X=X, y=y) assert model.predict_time
def test_predict_params(mocker, raw_model, model_id, X, y, fit_params, predict_params): model = TravaModel(raw_model=raw_model, model_id=model_id) model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params) assert model.predict_params == predict_params
def test_get_model(mocker, raw_model, X, y, needs_proba): model = TravaModel(raw_model=raw_model, model_id=model_id) assert model.get_model(for_train=True) == raw_model assert model.get_model(for_train=False) == raw_model y_predict_proba = mocker.Mock() if needs_proba: raw_model.predict_proba.return_value = y_predict_proba y_pred = mocker.Mock() raw_model.predict.return_value = y_pred model.fit(X=X, y=y) model.predict(X=X, y=y) model.unload_model() train_cached_model = model.get_model(for_train=True) test_cached_model = model.get_model(for_train=False) assert train_cached_model != raw_model assert test_cached_model != raw_model assert train_cached_model.predict(X) == y_pred if needs_proba: assert train_cached_model.predict_proba(X) == y_predict_proba
def test_model_id(mocker, model_id): raw_model = mocker.Mock() model = TravaModel(raw_model=raw_model, model_id=model_id) assert model.model_id == model_id