示例#1
0
    def test_compute_variable_importance(self, mocker):

        def mock_pearsonr(ypred, ytrue):
            return [ypred.unique()[0]]

        (mocker
         .patch("cobra.model_building.LogisticRegressionModel.score_model",
                mock_score_model))

        (mocker
         .patch("cobra.model_building.models.stats.pearsonr",
                mock_pearsonr))

        model = LogisticRegressionModel()
        model.predictors = ["var1_enc", "var2_enc", "var3_enc"]

        data = mock_data()

        actual = model.compute_variable_importance(data)

        expected = pd.DataFrame([
            {"predictor": "var1", "importance": data["var1_enc"].unique()[0]},
            {"predictor": "var2", "importance": data["var2_enc"].unique()[0]},
            {"predictor": "var3", "importance": data["var3_enc"].unique()[0]}
        ]).sort_values(by="importance", ascending=False).reset_index(drop=True)

        pd.testing.assert_frame_equal(actual, expected)
示例#2
0
    def test_serialize(self, mocker):

        model = LogisticRegressionModel()
        actual = model.serialize()

        expected = {
            "meta": "logistic-regression",
            "predictors": [],
            "_eval_metrics_by_split": {},
            "params": {
                "C": 1000000000.0,
                "class_weight": None,
                "dual": False,
                "fit_intercept": True,
                "intercept_scaling": 1,
                "l1_ratio": None,
                "max_iter": 100,
                "multi_class": "auto",
                "n_jobs": None,
                "penalty": "l2",
                "random_state": 42,
                "solver": "liblinear",
                "tol": 0.0001,
                "verbose": 0,
                "warm_start": False
            }
        }

        assert actual == expected
def mock_model(predictor_list, model_type="classification"):
    if model_type == "classification":
        model = LogisticRegressionModel()
    elif model_type == "regression":
        model = LinearRegressionModel()

    model.predictors = predictor_list

    return model
示例#4
0
    def test_evaluate_cached(self):

        split = "train"
        expected = 0.79

        model = LogisticRegressionModel()
        model._eval_metrics_by_split["train"] = expected

        actual = model.evaluate(pd.DataFrame(), pd.Series(dtype="float64"),
                                split)

        assert actual == expected
示例#5
0
    def test_evaluate(self, mocker):

        X = mock_data()
        y = pd.Series([1] * 5 + [0] * 5)

        def mock_roc_auc_score(y_true, y_score):
            return 0.79

        (mocker.patch(
            "cobra.model_building.LogisticRegressionModel.score_model",
            mock_score_model_classification))

        (mocker.patch("cobra.model_building.models.roc_auc_score",
                      mock_roc_auc_score))

        model = LogisticRegressionModel()
        actual = model.evaluate(X, y)

        assert actual == 0.79
示例#6
0
    def test_deserialize(self):

        model = LogisticRegressionModel()

        model_dict = {
            "meta": "logistic-regression",
            "predictors": [],
            "_eval_metrics_by_split": {},
            "params": {
                "C": 1000000000.0,
                "class_weight": None,
                "dual": False,
                "fit_intercept": True,
                "intercept_scaling": 1,
                "l1_ratio": None,
                "max_iter": 100,
                "multi_class": "auto",
                "n_jobs": None,
                "penalty": "l2",
                "random_state": 42,
                "solver": "liblinear",
                "tol": 0.0001,
                "verbose": 0,
                "warm_start": False
            },
            "classes_": [0, 1],
            "coef_": [[0.5, 0.75]],
            "intercept_": [-3],
            "n_iter_": [10]
        }

        model.deserialize(model_dict)

        logit = model.logit
        assert logit.get_params() == model_dict["params"]
        assert logit.classes_.all() == np.array(model_dict["classes_"]).all()
        assert logit.n_iter_.all() == np.array(model_dict["n_iter_"]).all()
        assert logit.intercept_.all() == (np.array(model_dict["intercept_"])
                                          .all())
        assert logit.coef_.all() == np.array(model_dict["coef_"]).all()
def mock_model(predictor_list):
    model = LogisticRegressionModel()
    model.predictors = predictor_list

    return model