Python StatsModelsLinearRegression примеры использования

Язык программирования: Python

Пространство имен/Пакет: econml.utilities

Примеров на hotexamples.com: 6

Python StatsModelsLinearRegression - 6 примеров найдено. Это лучшие примеры Python кода для econml.utilities.StatsModelsLinearRegression, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

StatsModelsLinearRegression(4)

coef__interval(2)

intercept__interval(2)

predict(1)

predict_interval(1)

Пример #1

Показать файл

Файл: drlearner.py Проект: sijeong/EconML

 def __init__(self,
              model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
              model_regression=WeightedLassoCV(cv=3),
              featurizer=None,
              fit_cate_intercept=True,
              n_splits=2, random_state=None):
     super().__init__(model_propensity=model_propensity,
                      model_regression=model_regression,
                      model_final=StatsModelsLinearRegression(fit_intercept=fit_cate_intercept),
                      featurizer=featurizer,
                      multitask_model_final=False,
                      n_splits=n_splits,
                      random_state=random_state)

Пример #2

Показать файл

    def test_cate_api(self):
        """Test that we correctly implement the CATE API."""
        n = 20

        def make_random(is_discrete, d):
            if d is None:
                return None
            sz = (n, d) if d > 0 else (n,)
            if is_discrete:
                while True:
                    arr = np.random.choice(['a', 'b', 'c'], size=sz)
                    # ensure that we've got at least two of every element
                    _, counts = np.unique(arr, return_counts=True)
                    if len(counts) == 3 and counts.min() > 1:
                        return arr
            else:
                return np.random.normal(size=sz)

        for d_y in [0, 1]:
            is_discrete = True
            for d_t in [0, 1]:
                for d_x in [2, None]:
                    for d_w in [2, None]:
                        W, X, Y, T = [make_random(is_discrete, d)
                                      for is_discrete, d in [(False, d_w),
                                                             (False, d_x),
                                                             (False, d_y),
                                                             (is_discrete, d_t)]]

                        if (X is None) and (W is None):
                            continue
                        d_t_final = 2 if is_discrete else d_t

                        effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
                        effect_summaryframe_shape = (
                            n * (d_y if d_y > 0 else 1), 6)
                        marginal_effect_shape = ((n,) +
                                                 ((d_y,) if d_y > 0 else ()) +
                                                 ((d_t_final,) if d_t_final > 0 else ()))
                        marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1),
                                                              6 * (d_t_final if d_t_final > 0 else 1))

                        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
                        const_marginal_effect_shape = ((n if d_x else 1,) +
                                                       ((d_y,) if d_y > 0 else ()) +
                                                       ((d_t_final,) if d_t_final > 0 else()))
                        const_marginal_effect_summaryframe_shape = (
                            (n if d_x else 1) * (d_y if d_y > 0 else 1),
                            6 * (d_t_final if d_t_final > 0 else 1))

                        for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
                                                                                        multi_class='auto')),
                                    DRLearner(model_propensity=LogisticRegression(multi_class='auto'),
                                              model_regression=LinearRegression(),
                                              model_final=StatsModelsLinearRegression(),
                                              multitask_model_final=True)]:

                            # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
                            infs = [None]
                            if isinstance(est, LinearDRLearner):
                                infs.append('statsmodels')

                            for inf in infs:
                                with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
                                                  is_discrete=is_discrete, est=est, inf=inf):
                                    est.fit(Y, T, X, W, inference=inf)
                                    # make sure we can call the marginal_effect and effect methods
                                    const_marg_eff = est.const_marginal_effect(
                                        X)
                                    marg_eff = est.marginal_effect(T, X)
                                    self.assertEqual(
                                        shape(marg_eff), marginal_effect_shape)
                                    self.assertEqual(
                                        shape(const_marg_eff), const_marginal_effect_shape)

                                    np.testing.assert_array_equal(
                                        marg_eff if d_x else marg_eff[0:1], const_marg_eff)

                                    T0 = np.full_like(T, 'a')
                                    eff = est.effect(X, T0=T0, T1=T)
                                    self.assertEqual(shape(eff), effect_shape)
                                    if inf is not None:
                                        const_marg_eff_int = est.const_marginal_effect_interval(
                                            X)
                                        marg_eff_int = est.marginal_effect_interval(
                                            T, X)
                                        const_marg_effect_inf = est.const_marginal_effect_inference(
                                            X)
                                        T1 = np.full_like(T, 'b')
                                        effect_inf = est.effect_inference(
                                            X, T0=T0, T1=T1)
                                        marg_effect_inf = est.marginal_effect_inference(
                                            T, X)
                                        self.assertEqual(shape(marg_eff_int),
                                                         (2,) + marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_eff_int),
                                                         (2,) + const_marginal_effect_shape)
                                        self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
                                                         (2,) + effect_shape)

                                        # test const marginal inference
                                        self.assertEqual(shape(const_marg_effect_inf.summary_frame()),
                                                         const_marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.point_estimate),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.stderr),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.var),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.pvalue()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.zstat()),
                                                         const_marginal_effect_shape)
                                        self.assertEqual(shape(const_marg_effect_inf.conf_int()),
                                                         (2,) + const_marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(const_marg_effect_inf.conf_int()
                                                                             [0], const_marg_eff_int[0], decimal=5)
                                        const_marg_effect_inf.population_summary()._repr_html_()

                                        # test effect inference
                                        self.assertEqual(shape(effect_inf.summary_frame()),
                                                         effect_summaryframe_shape)
                                        self.assertEqual(shape(effect_inf.point_estimate),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.stderr),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.var),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.pvalue()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.zstat()),
                                                         effect_shape)
                                        self.assertEqual(shape(effect_inf.conf_int()),
                                                         (2,) + effect_shape)
                                        np.testing.assert_array_almost_equal(effect_inf.conf_int()
                                                                             [0], est.effect_interval(
                                                                                 X, T0=T0, T1=T1)
                                                                             [0], decimal=5)
                                        effect_inf.population_summary()._repr_html_()

                                        # test marginal effect inference
                                        self.assertEqual(shape(marg_effect_inf.summary_frame()),
                                                         marginal_effect_summaryframe_shape)
                                        self.assertEqual(shape(marg_effect_inf.point_estimate),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.stderr),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.var),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.pvalue()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.zstat()),
                                                         marginal_effect_shape)
                                        self.assertEqual(shape(marg_effect_inf.conf_int()),
                                                         (2,) + marginal_effect_shape)
                                        np.testing.assert_array_almost_equal(marg_effect_inf.conf_int()
                                                                             [0], marg_eff_int[0], decimal=5)
                                        marg_effect_inf.population_summary()._repr_html_()

                                    est.score(Y, T, X, W)

                                    # make sure we can call effect with implied scalar treatments, no matter the
                                    # dimensions of T, and also that we warn when there are multiple treatments
                                    if d_t > 1:
                                        cm = self.assertWarns(Warning)
                                    else:
                                        cm = ExitStack()  # ExitStack can be used as a "do nothing" ContextManager
                                    with cm:
                                        effect_shape2 = (
                                            n if d_x else 1,) + ((d_y,) if d_y > 0 else())
                                        eff = est.effect(X, T0='a', T1='b')
                                        self.assertEqual(
                                            shape(eff), effect_shape2)

Пример #3

Показать файл

    def test_drlearner_all_attributes(self):
        from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
        from sklearn.linear_model import LinearRegression, LogisticRegression
        from econml.utilities import StatsModelsLinearRegression
        import scipy.special
        np.random.seed(123)
        controls = np.random.uniform(-1, 1, size=(5000, 3))
        T = np.random.binomial(2, scipy.special.expit(controls[:, 0]))
        sigma = 0.01
        y = (1 + .5 * controls[:, 0]) * T + controls[:,
                                                     0] + np.random.normal(0, sigma, size=(5000,))
        for X in [controls]:
            for W in [None, controls]:
                for sample_weight in [None, 1 + np.random.randint(10, size=X.shape[0])]:
                    for sample_var in [None, 1 + np.random.randint(10, size=X.shape[0])]:
                        for featurizer in [None, PolynomialFeatures(degree=2, include_bias=False)]:
                            for models in [(GradientBoostingClassifier(), GradientBoostingRegressor(),
                                            RandomForestRegressor(n_estimators=100,
                                                                  max_depth=5, min_samples_leaf=50)),
                                           (GradientBoostingClassifier(), GradientBoostingRegressor(),
                                            RandomForestRegressor(n_estimators=100,
                                                                  max_depth=5, min_samples_leaf=50)),
                                           (LogisticRegression(solver='lbfgs', multi_class='auto'),
                                            LinearRegression(), StatsModelsLinearRegression())]:
                                for multitask_model_final in [False, True]:
                                    if (not isinstance(models, StatsModelsLinearRegression))\
                                            and (sample_var is not None):
                                        continue
                                    with self.subTest(X=X, W=W, sample_weight=sample_weight, sample_var=sample_var,
                                                      featurizer=featurizer, models=models,
                                                      multitask_model_final=multitask_model_final):
                                        est = DRLearner(model_propensity=models[0],
                                                        model_regression=models[1],
                                                        model_final=models[2],
                                                        featurizer=featurizer,
                                                        multitask_model_final=multitask_model_final)
                                        if (X is None) and (W is None):
                                            with pytest.raises(AttributeError) as e_info:
                                                est.fit(y, T, X=X, W=W,
                                                        sample_weight=sample_weight, sample_var=sample_var)
                                            continue
                                        est.fit(
                                            y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var)
                                        np.testing.assert_allclose(est.effect(X[:3], T0=0, T1=1), 1 + .5 * X[:3, 0],
                                                                   rtol=0, atol=.15)
                                        np.testing.assert_allclose(est.const_marginal_effect(X[:3]),
                                                                   np.hstack(
                                                                       [1 + .5 * X[:3, [0]],
                                                                        2 * (1 + .5 * X[:3, [0]])]),
                                                                   rtol=0, atol=.15)
                                        for t in [1, 2]:
                                            np.testing.assert_allclose(est.marginal_effect(t, X[:3]),
                                                                       np.hstack([1 + .5 * X[:3, [0]],
                                                                                  2 * (1 + .5 * X[:3, [0]])]),
                                                                       rtol=0, atol=.15)
                                        assert isinstance(est.score_, float)
                                        assert isinstance(
                                            est.score(y, T, X=X, W=W), float)

                                        feat_names = ['A', 'B', 'C']
                                        out_feat_names = feat_names
                                        if featurizer is not None:
                                            out_feat_names = featurizer.fit(
                                                X).get_feature_names(feat_names)
                                            np.testing.assert_array_equal(
                                                est.featurizer.n_input_features_, 3)
                                        np.testing.assert_array_equal(est.cate_feature_names(feat_names),
                                                                      out_feat_names)

                                        if isinstance(models[0], GradientBoostingClassifier):
                                            np.testing.assert_array_equal(np.array([mdl.feature_importances_
                                                                                    for mdl
                                                                                    in est.models_regression]).shape,
                                                                          [2, 2 + X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                            np.testing.assert_array_equal(np.array([mdl.feature_importances_
                                                                                    for mdl
                                                                                    in est.models_propensity]).shape,
                                                                          [2, X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                        else:
                                            np.testing.assert_array_equal(np.array([mdl.coef_
                                                                                    for mdl
                                                                                    in est.models_regression]).shape,
                                                                          [2, 2 + X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                            np.testing.assert_array_equal(np.array([mdl.coef_
                                                                                    for mdl
                                                                                    in est.models_propensity]).shape,
                                                                          [2, 3, X.shape[1] +
                                                                           (W.shape[1] if W is not None else 0)])
                                        if multitask_model_final:
                                            if isinstance(models[2], RandomForestRegressor):
                                                np.testing.assert_equal(np.argsort(
                                                    est.multitask_model_cate.feature_importances_)[-1], 0)
                                            else:
                                                true_coef = np.zeros(
                                                    (2, len(out_feat_names)))
                                                true_coef[:, 0] = [.5, 1]
                                                np.testing.assert_allclose(
                                                    est.multitask_model_cate.coef_, true_coef, rtol=0, atol=.15)
                                                np.testing.assert_allclose(
                                                    est.multitask_model_cate.intercept_, [1, 2], rtol=0, atol=.15)
                                        else:
                                            for t in [1, 2]:
                                                if isinstance(models[2], RandomForestRegressor):
                                                    np.testing.assert_equal(np.argsort(
                                                        est.model_cate(T=t).feature_importances_)[-1], 0)
                                                else:
                                                    true_coef = np.zeros(
                                                        len(out_feat_names))
                                                    true_coef[0] = .5 * t
                                                    np.testing.assert_allclose(
                                                        est.model_cate(T=t).coef_, true_coef, rtol=0, atol=.15)
                                                    np.testing.assert_allclose(
                                                        est.model_cate(T=t).intercept_, t, rtol=0, atol=.15)

Пример #4

Показать файл

Файл: drlearner.py Проект: sijeong/EconML

    def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
                 model_regression=WeightedLassoCV(cv=3),
                 model_final=StatsModelsLinearRegression(),
                 multitask_model_final=False,
                 featurizer=None,
                 n_splits=2,
                 random_state=None):
        class ModelNuisance:
            def __init__(self, model_propensity, model_regression):
                self._model_propensity = model_propensity
                self._model_regression = model_regression

            def _combine(self, X, W):
                return np.hstack([arr for arr in [X, W] if arr is not None])

            def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
                # TODO Allow for non-vector y, i.e. of shape (n, 1)
                assert np.ndim(Y) == 1, "Can only accept single dimensional outcomes Y! Use Y.ravel()."
                if (X is None) and (W is None):
                    raise AttributeError("At least one of X or W has to not be None!")
                if np.any(np.all(T == 0, axis=0)) or (not np.any(np.all(T == 0, axis=1))):
                    raise AttributeError("Provided crossfit folds contain training splits that " +
                                         "don't contain all treatments")
                XW = self._combine(X, W)
                filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight)
                self._model_propensity.fit(XW, inverse_onehot(T), **filtered_kwargs)
                self._model_regression.fit(np.hstack([XW, T]), Y, **filtered_kwargs)
                return self

            def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
                XW = self._combine(X, W)
                propensities = self._model_propensity.predict_proba(XW)
                Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
                T_counter = np.zeros(T.shape)
                Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter]))
                Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
                for t in np.arange(T.shape[1]):
                    T_counter = np.zeros(T.shape)
                    T_counter[:, t] = 1
                    Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter]))
                    Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
                return Y_pred

        class ModelFinal:
            # Coding Remark: The reasoning around the multitask_model_final could have been simplified if
            # we simply wrapped the model_final with a MultiOutputRegressor. However, because we also want
            # to allow even for model_final objects whose fit(X, y) can accept X=None
            # (e.g. the StatsModelsLinearRegression), we cannot take that route, because the MultiOutputRegressor
            # checks that X is 2D array.
            def __init__(self, model_final, featurizer, multitask_model_final):
                self._model_final = clone(model_final, safe=False)
                self._featurizer = clone(featurizer, safe=False)
                self._multitask_model_final = multitask_model_final
                return

            def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
                Y_pred, = nuisances
                if (X is not None) and (self._featurizer is not None):
                    X = self._featurizer.fit_transform(X)
                filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
                if self._multitask_model_final:
                    self.model_cate = clone(self._model_final, safe=False).fit(
                        X, Y_pred[:, 1:] - Y_pred[:, [0]], **filtered_kwargs)
                else:
                    self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[:, t] - Y_pred[:, 0],
                                                                                 **filtered_kwargs)
                                        for t in np.arange(1, Y_pred.shape[1])]
                return self

            def predict(self, X=None):
                if (X is not None) and (self._featurizer is not None):
                    X = self._featurizer.transform(X)
                if self._multitask_model_final:
                    return self.model_cate.predict(X)
                else:
                    return np.array([mdl.predict(X) for mdl in self.models_cate]).T

            def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
                if (X is not None) and (self._featurizer is not None):
                    X = self._featurizer.transform(X)
                Y_pred, = nuisances
                if self._multitask_model_final:
                    return np.mean(np.average((Y_pred[:, 1:] - Y_pred[:, [0]] - self.model_cate.predict(X))**2,
                                              weights=sample_weight, axis=0))
                else:
                    return np.mean([np.average((Y_pred[:, t] - Y_pred[:, 0] - self.models_cate[t - 1].predict(X))**2,
                                               weights=sample_weight, axis=0)
                                    for t in np.arange(1, Y_pred.shape[1])])

        self._multitask_model_final = multitask_model_final
        super().__init__(ModelNuisance(model_propensity, model_regression),
                         ModelFinal(model_final, featurizer, multitask_model_final),
                         n_splits=n_splits, discrete_treatment=True,
                         random_state=random_state)

Пример #5

Показать файл

Файл: test_statsmodels.py Проект: sijeong/EconML

    def test_comp_with_statsmodels(self):
        """ Comparing with confidence intervals and standard errors of statsmodels in the un-weighted case """
        np.random.seed(123)

        # Single dimensional output y
        n = 1000
        d = 3
        X = np.random.binomial(1, .8, size=(n, d))
        T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,))

        def true_effect(x):
            return x[:, 0] + .5
        y = true_effect(X) * T + X[:, 0] + X[:, 2] + np.random.normal(0, 1, size=(n,))
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for fit_intercept in [True, False]:
            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                lr = StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                    'cov_type': cov_type, 'use_t': False}).fit(X, y)
                _compare_classes(est, lr, X_test)

        n = 1000
        d = 3
        X = np.random.normal(0, 1, size=(n, d))
        y = X[:, 0] + X[:, 2] + np.random.normal(0, 1, size=(n,))
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for fit_intercept in [True, False]:
            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                lr = StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                    'cov_type': cov_type, 'use_t': False}).fit(X, y)
                _compare_classes(est, lr, X_test)

        d = 3
        X = np.vstack([np.eye(d)])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)

        for cov_type in ['nonrobust', 'HC0', 'HC1']:
            for alpha in [.01, .05, .1]:
                _compare_classes(OLS(fit_intercept=False, cov_type=cov_type).fit(X, y),
                                 StatsModelsOLS(fit_intercept=False, fit_args={
                                                'cov_type': cov_type, 'use_t': False}).fit(X, y),
                                 X_test, alpha=alpha)

        d = 3
        X = np.vstack([np.eye(d), np.ones((1, d)), np.zeros((1, d))])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        X_test = np.unique(np.random.binomial(1, .5, size=(n, d)), axis=0)
        for cov_type in ['nonrobust', 'HC0', 'HC1']:
            _compare_classes(OLS(fit_intercept=True, cov_type=cov_type).fit(X, y),
                             StatsModelsOLS(fit_intercept=True,
                                            fit_args={'cov_type': cov_type, 'use_t': False}).fit(X, y), X_test)

        # Multi-dimensional output y
        n = 1000
        d = 3
        for p in np.arange(1, 4):
            X = np.random.binomial(1, .8, size=(n, d))
            T = np.random.binomial(1, .5 * X[:, 0] + .25, size=(n,))

            def true_effect(x):
                return np.hstack([x[:, [0]] + .5 + t for t in range(p)])
            y = np.zeros((n, p))
            y = true_effect(X) * T.reshape(-1, 1) + X[:, [0] * p] + \
                (0 * X[:, [0] * p] + 1) * np.random.normal(0, 1, size=(n, p))

            for cov_type in ['nonrobust', 'HC0', 'HC1']:
                for fit_intercept in [True, False]:
                    for alpha in [.01, .05, .2]:
                        est = OLS(fit_intercept=fit_intercept, cov_type=cov_type).fit(X, y)
                        lr = [StatsModelsOLS(fit_intercept=fit_intercept, fit_args={
                                             'cov_type': cov_type, 'use_t': False}).fit(X, y[:, t]) for t in range(p)]
                        for t in range(p):
                            assert np.all(np.abs(est.coef_[t] - lr[t].coef_) < 1e-12),\
                                "{}, {}, {}: {}, {}".format(cov_type, fit_intercept, t, est.coef_[t], lr[t].coef_)
                            assert np.all(np.abs(np.array(est.coef__interval(alpha=alpha))[:, t] -
                                                 lr[t].coef__interval(alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.coef__interval(alpha=alpha))[:, t],
                                                              lr[t].coef__interval(alpha=alpha))
                            assert np.all(np.abs(est.intercept_[t] - lr[t].intercept_) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              est.intercept_[t], lr[t].intercept_)
                            assert np.all(np.abs(np.array(est.intercept__interval(alpha=alpha))[:, t] -
                                                 lr[t].intercept__interval(alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.intercept__interval(alpha=alpha))[:, t],
                                                              lr[t].intercept__interval(alpha=alpha))
                            assert np.all(np.abs(est.predict(X_test)[:, t] - lr[t].predict(X_test)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t, est.predict(X_test)[
                                                              :, t], lr[t].predict(X_test))
                            assert np.all(np.abs(np.array(est.predict_interval(X_test, alpha=alpha))[:, :, t] -
                                                 lr[t].predict_interval(X_test, alpha=alpha)) < 1e-12),\
                                "{}, {}, {}: {} vs {}".format(cov_type, fit_intercept, t,
                                                              np.array(est.predict_interval(X_test,
                                                                                            alpha=alpha))[:, :, t],
                                                              lr[t].predict_interval(X_test, alpha=alpha))

Пример #6

Показать файл

Файл: test_statsmodels.py Проект: sijeong/EconML

    def test_inference(self):
        """ Testing that we recover the expected standard errors and confidence intervals in a known example """

        # 1-d output
        d = 3
        X = np.vstack([np.eye(d)])
        y = X[:, 0]
        est = OLS(fit_intercept=False).fit(X, y)
        assert np.all(np.abs(est.coef_ - [1, 0, 0]) <= 1e-12), "{}, {}".format(est.coef_, [1, 0, 0])
        assert np.all(np.abs(est.coef__interval() - np.array([[1, 0, 0], [1, 0, 0]])) <= 1e-12),\
            "{}, {}".format(est.coef__interval(), np.array([[1, 0, 0], [1, 0, 0]]))
        assert np.all(est.coef_stderr_ <= 1e-12)
        assert np.all(est._param_var <= 1e-12)

        d = 3
        X = np.vstack([np.eye(d), np.ones((1, d)), np.zeros((1, d))])
        y = X[:, 0]
        est = OLS(fit_intercept=True).fit(X, y)
        assert np.all(np.abs(est.coef_ - np.array([1] + [0] * (d - 1))) <=
                      1e-12), "{}, {}".format(est.coef_, [1] + [0] * (d - 1))
        assert np.all(np.abs(est.coef__interval() - np.array([[1] + [0] * (d - 1), [1] + [0] * (d - 1)])) <= 1e-12),\
            "{}, {}".format(est.coef__interval(), np.array([[1] + [0] * (d - 1), [1] + [0] * (d - 1)]))
        assert np.all(est.coef_stderr_ <= 1e-12)
        assert np.all(est._param_var <= 1e-12)
        assert np.abs(est.intercept_) <= 1e-12
        assert np.all(np.abs(est.intercept__interval()) <= 1e-12)

        d = 3
        X = np.vstack([np.eye(d)])
        y = np.concatenate((X[:, 0] - 1, X[:, 0] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=False).fit(X, y)
        assert np.all(np.abs(est.coef_ - ([1] + [0] * (d - 1))) <=
                      1e-12), "{}, {}".format(est.coef_, [1] + [0] * (d - 1))
        assert np.all(np.abs(est.coef_stderr_ - np.array([1] * d)) <= 1e-12)
        assert np.all(np.abs(est.coef__interval()[0] -
                             np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                      [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
            "{}, {}".format(est.coef__interval()[0], np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                                              [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1)))
        assert np.all(np.abs(est.coef__interval()[1] -
                             np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                      [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
            "{}, {}".format(est.coef__interval()[1], np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                                              [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1)))

        # 2-d output
        d = 3
        p = 4
        X = np.vstack([np.eye(d)])
        y = np.vstack((X[:, [0] * p] - 1, X[:, [0] * p] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=False).fit(X, y)
        for t in range(p):
            assert np.all(np.abs(est.coef_[t] - ([1] + [0] * (d - 1))) <=
                          1e-12), "{}, {}".format(est.coef_[t], [1] + [0] * (d - 1))
            assert np.all(np.abs(est.coef_stderr_[t] - np.array([1] * d)) <= 1e-12), "{}".format(est.coef_stderr_[t])
            assert np.all(np.abs(est.coef__interval()[0][t] -
                                 np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                          [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[0][t],
                                np.array([scipy.stats.norm.ppf(.025, loc=1, scale=1)] +
                                         [scipy.stats.norm.ppf(.025, loc=0, scale=1)] * (d - 1)))
            assert np.all(np.abs(est.coef__interval()[1][t] -
                                 np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                          [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[1][t],
                                np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] +
                                         [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1)))
            assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t])
            assert np.all(np.abs(est.intercept_stderr_[t]) <= 1e-12), "{}".format(est.intercept_stderr_[t])
            assert np.all(np.abs(est.intercept__interval()[0][t]) <=
                          1e-12), "{}".format(est.intercept__interval()[0][t])

        d = 3
        p = 4
        X = np.vstack([np.eye(d), np.zeros((1, d))])
        y = np.vstack((X[:, [0] * p] - 1, X[:, [0] * p] + 1))
        X = np.vstack([X, X])
        est = OLS(fit_intercept=True).fit(X, y)
        for t in range(p):
            assert np.all(np.abs(est.coef_[t] - ([1] + [0] * (d - 1))) <=
                          1e-12), "{}, {}".format(est.coef_[t], [1] + [0] * (d - 1))
            assert np.all(np.abs(est.coef_stderr_[t] - np.array([np.sqrt(2)] * d)) <=
                          1e-12), "{}".format(est.coef_stderr_[t])
            assert np.all(np.abs(est.coef__interval()[0][t] -
                                 np.array([scipy.stats.norm.ppf(.025, loc=1, scale=np.sqrt(2))] +
                                          [scipy.stats.norm.ppf(.025, loc=0, scale=np.sqrt(2))] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[0][t],
                                np.array([scipy.stats.norm.ppf(.025, loc=1, scale=np.sqrt(2))] +
                                         [scipy.stats.norm.ppf(.025, loc=0, scale=np.sqrt(2))] * (d - 1)))
            assert np.all(np.abs(est.coef__interval()[1][t] -
                                 np.array([scipy.stats.norm.ppf(.975, loc=1, scale=np.sqrt(2))] +
                                          [scipy.stats.norm.ppf(.975, loc=0, scale=np.sqrt(2))] * (d - 1))) <= 1e-12),\
                "{}, {}".format(est.coef__interval()[1][t],
                                np.array([scipy.stats.norm.ppf(.975, loc=1, scale=np.sqrt(2))] +
                                         [scipy.stats.norm.ppf(.975, loc=0, scale=np.sqrt(2))] * (d - 1)))
            assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t])
            assert np.all(np.abs(est.intercept_stderr_[t] - 1) <= 1e-12), "{}".format(est.intercept_stderr_[t])
            assert np.all(np.abs(est.intercept__interval()[0][t] -
                                 scipy.stats.norm.ppf(.025, loc=0, scale=1)) <= 1e-12),\
                "{}, {}".format(est.intercept__interval()[0][t], scipy.stats.norm.ppf(.025, loc=0, scale=1))