示例#1
0
 def _fit(self, trava_model: TravaModel, X, y, fit_params: dict,
          predict_params: dict):
     """
     If you want to control the fit process
     """
     trava_model.fit(X=X,
                     y=y,
                     fit_params=fit_params,
                     predict_params=predict_params)
示例#2
0
    def _fit(self, trava_model: TravaModel, X, y, fit_params: dict,
             predict_params: dict):
        if not self._is_raw_model_ready:
            trava_model.fit(X=X,
                            y=y,
                            fit_params=fit_params,
                            predict_params=predict_params)

            for group_model in self._group_models:
                if group_model != trava_model:
                    trava_model.copy(existing_model=group_model, only_fit=True)

            self._is_raw_model_ready = True
示例#3
0
def test_predict(mocker, raw_model, model_id, X, y, fit_params, needs_proba):
    if needs_proba:
        predict_proba = mocker.Mock()
        raw_model.predict_proba.return_value = predict_proba

    y_pred = mocker.Mock()
    raw_model.predict.return_value = y_pred

    model = TravaModel(raw_model=raw_model, model_id=model_id)
    model.predict(X=X, y=y)

    raw_model.predict.assert_called_once_with(X)

    if needs_proba:
        raw_model.predict_proba.assert_called_with(X)

    assert model.predict_time
示例#4
0
 def _models_configs(
         self, raw_model, config: FitPredictConfig
 ) -> List[Tuple[TravaModel, FitPredictConfig]]:
     """
     If you want to run multiple fits on the same raw_model,
     just configure TravaModels and configs for them in your subclass.
     """
     return [(TravaModel(raw_model=raw_model,
                         model_id=config.model_id), config)]
示例#5
0
    def __call__(self, trava_model: TravaModel, for_train: bool, X, X_raw, y, **kwargs):
        if self._requires_raw_model and not trava_model.raw_model():
            raise Exception("Cannot perform eval on model {} " "because it was unloaded.".format(trava_model.model_id))

        if self._requires_X_y and (X is None or X_raw is None or y is None):
            raise Exception(
                "Cannot perform eval on model {} "
                "because data is required and was unloaded.".format(trava_model.model_id)
            )

        return self._scorer(
            model=trava_model.get_model(for_train=for_train),
            model_info=trava_model,
            for_train=for_train,
            X=X,
            X_raw=X_raw,
            y=y,
            **kwargs
        )
示例#6
0
def test_is_classification(mocker, raw_model, model_id, X, y, fit_params, is_classification):
    predict_proba = mocker.Mock()

    if is_classification:
        raw_model.predict_proba.return_value = predict_proba
    else:
        raw_model.predict_proba = None

    model = TravaModel(raw_model=raw_model, model_id=model_id)

    if is_classification:
        assert model.is_classification_model
    else:
        assert not model.is_classification_model
示例#7
0
def test_get_model_unload(mocker, raw_model, for_train):
    trava_model = TravaModel(raw_model=raw_model, model_id=model_id)
    trava_model.unload_model()

    with pytest.raises(ValueError):
        trava_model.get_model(for_train=for_train)

    if for_train:
        y_pred_key = "_y_train_pred"
    else:
        y_pred_key = "_y_test_pred"

    y_pred_mock = mocker.Mock()
    mocker.patch.object(trava_model, y_pred_key, y_pred_mock)

    assert trava_model.get_model(for_train=for_train).predict(X=None) == y_pred_mock
示例#8
0
    def _models_configs(
            self, raw_model, config: FitPredictConfig
    ) -> List[Tuple[TravaModel, FitPredictConfig]]:
        split_result = config.raw_split_data
        assert split_result

        unique_groups = sorted(
            set(split_result.X_train[self._group_col_name].values))

        result = []
        for group in unique_groups:
            model_config = self._config_for_group(group=group, config=config)
            group_model_id = model_config.model_id + "_" + str(group)
            trava_model = TravaModel(raw_model=raw_model,
                                     model_id=group_model_id)

            result.append((trava_model, model_config))

        return result
示例#9
0
    def _models_configs(
            self, raw_model, config: FitPredictConfig
    ) -> List[Tuple[TravaModel, FitPredictConfig]]:
        result = []

        X = self._raw_dataset.X
        y = self._raw_dataset.y
        X_cleaned = X.drop(self._ignore_cols, axis=1)

        for fold_idx, (train_indices, test_indices) in enumerate(
                self._cv.split(X=X_cleaned, y=y, groups=self._groups)):
            X_train, y_train = X_cleaned.iloc[train_indices], y.iloc[
                train_indices]
            X_test, y_test = X_cleaned.iloc[test_indices], y.iloc[test_indices]

            split_result = SplitResult(X_train=X_train,
                                       y_train=y_train,
                                       X_test=X_test,
                                       y_test=y_test)

            fold_model_id = config.model_id + "_fold_{}".format(fold_idx + 1)
            model_config = FitPredictConfig(
                raw_split_data=split_result,
                raw_model=config.raw_model,
                model_init_params=config.model_init_params,
                model_id=fold_model_id,
                scorers_providers=config.scorers_providers,
                serializer=config.serializer,
                fit_params=config.fit_params,
                predict_params=config.predict_params,
            )

            trava_model = TravaModel(raw_model=raw_model,
                                     model_id=fold_model_id)

            result.append((trava_model, model_config))

        return result
示例#10
0
def test_raw_model(mocker, raw_model, model_id):
    model = TravaModel(raw_model=raw_model, model_id=model_id)

    assert model.raw_model == raw_model
示例#11
0
def test_copy(mocker, model_id, use_existing_model, only_fit):
    raw_model = mocker.Mock()
    model = TravaModel(raw_model=raw_model, model_id=model_id)

    y_train = np.array([0, 0, 1])
    y_train_pred = np.array([1, 2, 3])
    y_train_pred_proba = np.array([3, 4, 5])
    y_test = np.array([-2, 3, 5])
    y_test_pred = np.array([6, 7, 8])
    y_test_pred_proba = np.array([9, 10, 11])

    fit_params = {"1": 2}
    predict_params = {"2": 3}
    fit_time = 123
    predict_time = 434

    model._y_train = y_train
    model._y_train_pred = y_train_pred
    model._y_train_pred_proba = y_train_pred_proba

    model._y_test = y_test
    model._y_test_pred = y_test_pred
    model._y_test_pred_proba = y_test_pred_proba

    model._fit_params = fit_params
    model._predict_params = predict_params
    model._fit_time = fit_time
    model._predict_time = predict_time

    model_copy_id = model_id + "_copy"
    existing_model = None
    existing_model_id = "existing_model"
    # what a mess... but should work
    if use_existing_model:
        existing_model = TravaModel(raw_model=raw_model, model_id=existing_model_id)
        model_copy = model.copy(existing_model=existing_model, only_fit=only_fit)
    else:
        model_copy = model.copy(model_id=model_copy_id, only_fit=only_fit)

    if use_existing_model:
        assert model_copy.model_id == existing_model_id
    else:
        assert model_copy.model_id == model_copy_id

    y = model.y(for_train=True)
    copy_y = model_copy.y(for_train=True)
    assert np.array_equal(y, copy_y)
    assert model.fit_params == model_copy.fit_params
    assert model.fit_time == model_copy.fit_time

    if use_existing_model:
        assert existing_model == model_copy
    else:
        assert existing_model != model_copy

    if only_fit:
        assert model_copy.y(for_train=False) is None
        assert model_copy.y_pred(for_train=False) is None
        assert model_copy.y_pred_proba(for_train=False) is None
        assert model_copy.predict_time is None
        assert model_copy.predict_params == {}
    else:
        assert np.array_equal(model.y(for_train=False), model_copy.y(for_train=False))
        assert np.array_equal(model.y_pred(for_train=False), model_copy.y_pred(for_train=False))
        assert np.array_equal(model.y_pred_proba(for_train=False), model_copy.y_pred_proba(for_train=False))
        assert model.predict_time == model_copy.predict_time
        assert model.predict_params == model_copy.predict_params
示例#12
0
def test_unload(mocker, model_id):
    raw_model = mocker.Mock()
    model = TravaModel(raw_model=raw_model, model_id=model_id)
    model.unload_model()

    assert not model.raw_model
示例#13
0
def test_all_y(mocker, raw_model, model_id, X, y, fit_params, predict_params, needs_proba):
    model = TravaModel(raw_model=raw_model, model_id=model_id)

    predict_proba_train = mocker.Mock()
    if needs_proba:
        raw_model.predict_proba.return_value = predict_proba_train
    y_pred_train = mocker.Mock()
    raw_model.predict.return_value = y_pred_train
    model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params)

    predict_proba_test = mocker.Mock()
    if needs_proba:
        raw_model.predict_proba.return_value = predict_proba_test
    y_pred_test = mocker.Mock()
    raw_model.predict.return_value = y_pred_test
    X_test = mocker.Mock()
    y_test = mocker.Mock()
    model.predict(X=X_test, y=y_test)

    assert model.y_pred(for_train=True) == y_pred_train
    assert model.y_pred(for_train=False) == y_pred_test
    assert model.y(for_train=True) == y
    assert model.y(for_train=False) == y_test
    if needs_proba:
        assert model.y_pred_proba(for_train=True) == predict_proba_train
        assert model.y_pred_proba(for_train=False) == predict_proba_test
示例#14
0
 def _predict(self, trava_model: TravaModel, X, y):
     """
     If you want to control the predict process
     """
     trava_model.predict(X=X, y=y)
示例#15
0
def test_predict_time(mocker, raw_model, model_id, X, y):
    model = TravaModel(raw_model=raw_model, model_id=model_id)
    assert not model.predict_time
    model.predict(X=X, y=y)
    assert model.predict_time
示例#16
0
def test_predict_params(mocker, raw_model, model_id, X, y, fit_params, predict_params):
    model = TravaModel(raw_model=raw_model, model_id=model_id)
    model.fit(X=X, y=y, fit_params=fit_params, predict_params=predict_params)

    assert model.predict_params == predict_params
示例#17
0
def test_get_model(mocker, raw_model, X, y, needs_proba):
    model = TravaModel(raw_model=raw_model, model_id=model_id)

    assert model.get_model(for_train=True) == raw_model
    assert model.get_model(for_train=False) == raw_model

    y_predict_proba = mocker.Mock()
    if needs_proba:
        raw_model.predict_proba.return_value = y_predict_proba

    y_pred = mocker.Mock()
    raw_model.predict.return_value = y_pred

    model.fit(X=X, y=y)
    model.predict(X=X, y=y)

    model.unload_model()

    train_cached_model = model.get_model(for_train=True)
    test_cached_model = model.get_model(for_train=False)

    assert train_cached_model != raw_model
    assert test_cached_model != raw_model

    assert train_cached_model.predict(X) == y_pred
    if needs_proba:
        assert train_cached_model.predict_proba(X) == y_predict_proba
示例#18
0
def test_model_id(mocker, model_id):
    raw_model = mocker.Mock()
    model = TravaModel(raw_model=raw_model, model_id=model_id)

    assert model.model_id == model_id