Пример #1
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        # adding 10 to avoid strict rtol at predicted values close to zero
        y = y + 10
        cov_type = 'HC0'
        modp = GLM(y, x[:, :cls.k_nonzero], family=family.Gaussian())
        cls.res2 = modp.fit(cov_type=cov_type,
                            method='bfgs',
                            maxiter=100,
                            disp=0)

        weights = (np.arange(x.shape[1]) >= 4).astype(float)
        mod = GLMPenalized(y,
                           x,
                           family=family.Gaussian(),
                           penal=smpen.L2ContraintsPenalty(weights=weights))
        # make pen_weight large to force redundant to close to zero
        mod.pen_weight *= 500
        cls.res1 = mod.fit(cov_type=cov_type,
                           method='bfgs',
                           maxiter=100,
                           disp=0,
                           trim=False)

        cls.exog_index = slice(None, cls.k_nonzero, None)
        cls.k_params = x.shape[1]
        cls.atol = 1e-5
        cls.rtol = 1e-5
Пример #2
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        # adding 10 to avoid strict rtol at predicted values close to zero
        y = y + 10
        k = x.shape[1]
        cov_type = 'HC0'
        restriction = np.eye(k)[2:]
        modp = TheilGLS(y, x, r_matrix=restriction)
        # the corresponding Theil penweight seems to be 2 * nobs / sigma2_e
        cls.res2 = modp.fit(pen_weight=120.74564413221599 * 1000, use_t=False)

        pen = smpen.L2ContraintsPenalty(restriction=restriction)
        mod = GLMPenalized(y, x, family=family.Gaussian(), penal=pen)
        # use default weight for GLMPenalized
        mod.pen_weight *= 1
        cls.res1 = mod.fit(cov_type=cov_type,
                           method='bfgs',
                           maxiter=100,
                           disp=0,
                           trim=False)

        cls.k_nonzero = k
        cls.exog_index = slice(None, cls.k_nonzero, None)
        cls.k_params = x.shape[1]
        cls.atol = 1e-5
        cls.rtol = 1e-5
    def setup_class(cls):
        super(_estGAMGaussianLogLink, cls).setup_class()  #initialize DGP

        cls.family = family.Gaussian(links.log)
        cls.rvs = stats.norm.rvs
        cls.scale = 5

        cls.init()
Пример #4
0
    def __init__(self):
        super(self.__class__, self).__init__()  #initialize DGP

        self.family = family.Gaussian(links.log)
        self.rvs = stats.norm.rvs
        self.scale = 5

        self.init()
Пример #5
0
def test_glmgaussian_screening():

    y, x, idx_nonzero_true, beta = _get_gaussian_data()
    nobs = len(y)
    # demeaning makes constant zero, checks that exog_keep are not trimmed
    y = y - y.mean(0)

    # test uses
    screener_kwds = dict(pen_weight=nobs * 0.75,
                         threshold_trim=1e-3,
                         ranking_attr='model.score_factor')

    xnames_true = ['var%4d' % ii for ii in idx_nonzero_true]
    xnames_true[0] = 'const'
    parameters = pd.DataFrame(beta[idx_nonzero_true],
                              index=xnames_true,
                              columns=['true'])

    xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true)
    res_oracle = GLMPenalized(y, xframe_true, family=family.Gaussian()).fit()
    parameters['oracle'] = res_oracle.params

    for k_keep in [1, 2]:
        mod_initial = GLMPenalized(y, x[:, :k_keep], family=family.Gaussian())
        screener = VariableScreening(mod_initial, **screener_kwds)
        exog_candidates = x[:, k_keep:]
        res_screen = screener.screen_exog(exog_candidates, maxiter=30)

        assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true)

        xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero]
        xnames[0] = 'const'

        # smoke test
        res_screen.results_final.summary(xname=xnames)
        res_screen.results_pen.summary()
        assert_equal(res_screen.results_final.mle_retvals['converged'], True)

        ps = pd.Series(res_screen.results_final.params,
                       index=xnames,
                       name='final')
        parameters = parameters.join(ps, how='outer')

        assert_allclose(parameters['oracle'], parameters['final'], atol=1e-5)
        # we need to remove 'final' again for next iteration
        del parameters['final']
Пример #6
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        # adding 10 to avoid strict rtol at predicted values close to zero
        y = y + 10
        cov_type = 'HC0'
        modp = GLM(y, x[:, :cls.k_nonzero], family=family.Gaussian())
        cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100,
                            disp=0)

        mod = GLMPenalized(y, x, family=family.Gaussian(), penal=cls.penalty)
        mod.pen_weight *= 1.5  # same as discrete Poisson
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100,
                           disp=0, trim=True)

        cls.exog_index = slice(None, cls.k_nonzero, None)
        cls.k_params = cls.k_nonzero
        cls.atol = 1e-5
        cls.rtol = 1e-5
Пример #7
0
    def setup_class(cls):
        exog, penalty_matrix, restriction = cls._init()
        endog = data_mcycle['accel']
        pen = smpen.L2ContraintsPenalty(restriction=restriction)
        mod = GLMPenalized(endog, exog, family=family.Gaussian(),
                           penal=pen)
        # scaling of penweight in R mgcv
        s_scale_r = 0.02630734
        # set pen_weight to correspond to R mgcv example
        cls.pw = mod.pen_weight = 1 / s_scale_r / 2
        cls.res1 = mod.fit(cov_type=cls.cov_type, method='bfgs', maxiter=100,
                           disp=0, trim=False, scale='x2')
        cls.res2 = results_pls.pls5

        cls.rtol_fitted = 1e-5
        # edf is currently not available with PenalizedMixin
        # need correction for difference in scale denominator
        cls.covp_corrfact = 1.0025464444310588
Пример #8
0
    def __init__(self,
                 family_name='normal',
                 link_name='identity',
                 fam_params=None):
        """Constructor."""

        # Store link
        self.link_name = link_name
        if self.link_name.lower() == 'logit':
            self.link = L.logit
        elif self.link_name.lower() == 'log':
            self.link = L.log
        elif self.link_name.lower() == 'identity':
            self.link = L.identity
        elif self.link_name.lower() == 'sqrt':
            self.link = L.sqrt
        elif self.link_name.lower() == 'probit':
            self.link = L.probit
        family_kwargs = {}
        if self.link_name:
            family_kwargs['link'] = self.link
        # Store family
        self.family_name = family_name
        if self.family_name.lower() == 'normal':
            self.family = F.Gaussian(**family_kwargs)

            def rand(x):
                return np.random.normal(x, fam_params)
        elif self.family_name.lower() == 'binomial':
            self.family = F.Binomial(**family_kwargs)

            def rand(x):
                return np.random.binomial(1, x)
        elif self.family_name.lower() == 'poisson':
            self.family = F.Poisson(**family_kwargs)

            def rand(x):
                return np.random.poisson(x)

        self.rand = rand
        self.in_columns = None
        self.out_columns = None