Exemplo n.º 1
0
def test_multivariate_penalty():
    alphas = [1, 2]
    weights = [1, 1]
    np.random.seed(1)
    x, y, pol = multivariate_sample_data()

    univ_pol1 = UnivariatePolynomialSmoother(x[:, 0], degree=pol.degrees[0])
    univ_pol2 = UnivariatePolynomialSmoother(x[:, 1], degree=pol.degrees[1])

    gp1 = UnivariateGamPenalty(alpha=alphas[0], univariate_smoother=univ_pol1)
    gp2 = UnivariateGamPenalty(alpha=alphas[1], univariate_smoother=univ_pol2)
    mgp = MultivariateGamPenalty(multivariate_smoother=pol, alpha=alphas,
                                 weights=weights)

    for i in range(10):
        params1 = np.random.randint(-3, 3, pol.smoothers[0].dim_basis)
        params2 = np.random.randint(-3, 3, pol.smoothers[1].dim_basis)
        params = np.concatenate([params1, params2])
        c1 = gp1.func(params1)
        c2 = gp2.func(params2)
        c = mgp.func(params)
        assert_allclose(c, c1 + c2, atol=1.e-10, rtol=1.e-10)

        d1 = gp1.deriv(params1)
        d2 = gp2.deriv(params2)
        d12 = np.concatenate([d1, d2])
        d = mgp.deriv(params)
        assert_allclose(d, d12)

        h1 = gp1.deriv2(params1)
        h2 = gp2.deriv2(params2)
        h12 = block_diag(h1, h2)
        h = mgp.deriv2(params)
        assert_allclose(h, h12)
Exemplo n.º 2
0
def test_multivariate_penalty():
    alphas = [1, 2]
    weights = [1, 1]
    np.random.seed(1)
    x, y, pol = multivariate_sample_data()

    univ_pol1 = UnivariatePolynomialSmoother(x[:, 0], degree=pol.degrees[0])
    univ_pol2 = UnivariatePolynomialSmoother(x[:, 1], degree=pol.degrees[1])

    gp1 = UnivariateGamPenalty(alpha=alphas[0], univariate_smoother=univ_pol1)
    gp2 = UnivariateGamPenalty(alpha=alphas[1], univariate_smoother=univ_pol2)
    mgp = MultivariateGamPenalty(multivariate_smoother=pol,
                                 alpha=alphas,
                                 weights=weights)

    for i in range(10):
        params1 = np.random.randint(-3, 3, pol.smoothers[0].dim_basis)
        params2 = np.random.randint(-3, 3, pol.smoothers[1].dim_basis)
        params = np.concatenate([params1, params2])
        c1 = gp1.func(params1)
        c2 = gp2.func(params2)
        c = mgp.func(params)
        assert_allclose(c, c1 + c2, atol=1.e-10, rtol=1.e-10)

        d1 = gp1.deriv(params1)
        d2 = gp2.deriv(params2)
        d12 = np.concatenate([d1, d2])
        d = mgp.deriv(params)
        assert_allclose(d, d12)

        h1 = gp1.deriv2(params1)
        h2 = gp2.deriv2(params2)
        h12 = block_diag(h1, h2)
        h = mgp.deriv2(params)
        assert_allclose(h, h12)
Exemplo n.º 3
0
def test_multivariate_gam_cv():
    # SMOKE test
    # no test is performed. It only checks that there isn't any runtime error

    def cost(x1, x2):
        return np.linalg.norm(x1 - x2) / len(x1)

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    file_path = os.path.join(cur_dir, "results", "prediction_from_mgcv.csv")
    data_from_r = pd.read_csv(file_path)
    # dataset used to train the R model
    x = data_from_r.x.values
    y = data_from_r.y.values

    df = [10]
    degree = [5]
    bsplines = BSplines(x, degree=degree, df=df)
    # y_mgcv is obtained from R with the following code
    # g = gam(y~s(x, k = 10, bs = "cr"), data = data, scale = 80)

    alphas = [0.0251]
    alphas = [2]
    cv = KFold(3)

    gp = MultivariateGamPenalty(bsplines, alpha=alphas)  # noqa: F841
    gam_cv = MultivariateGAMCV(smoother=bsplines,
                               alphas=alphas,
                               gam=GLMGam,
                               cost=cost,
                               endog=y,
                               exog=None,
                               cv_iterator=cv)
    gam_cv_res = gam_cv.fit()  # noqa: F841
Exemplo n.º 4
0
def test_multivariate_gam_1d_data():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    file_path = os.path.join(cur_dir, "results", "prediction_from_mgcv.csv")
    data_from_r = pd.read_csv(file_path)
    # dataset used to train the R model
    x = data_from_r.x.values
    y = data_from_r.y

    df = [10]
    degree = [3]
    bsplines = BSplines(x, degree=degree, df=df)
    # y_mgcv is obtained from R with the following code
    # g = gam(y~s(x, k = 10, bs = "cr"), data = data, scale = 80)
    y_mgcv = data_from_r.y_est

    # alpha is by manually adjustment to reduce discrepancy in fittedvalues
    alpha = [0.0168 * 0.0251 / 2 * 500]
    gp = MultivariateGamPenalty(bsplines, alpha=alpha)    # noqa: F841

    glm_gam = GLMGam(y, exog=np.ones((len(y), 1)), smoother=bsplines,
                     alpha=alpha)
    # "nm" converges to a different params, "bfgs" params are close to pirls
    # res_glm_gam = glm_gam.fit(method='nm', max_start_irls=0,
    #                           disp=1, maxiter=10000, maxfun=5000)
    res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0,
                              disp=1, maxiter=10000)
    y_gam = res_glm_gam.fittedvalues

    # plt.plot(x, y_gam, '.', label='gam')
    # plt.plot(x, y_mgcv, '.', label='mgcv')
    # plt.plot(x, y, '.', label='y')
    # plt.legend()
    # plt.show()

    assert_allclose(y_gam, y_mgcv, atol=0.01)
Exemplo n.º 5
0
    def __init__(self, endog, smoother, alpha, *args, **kwargs):
        if not isinstance(alpha, Iterable):
            alpha = np.array([alpha] * len(smoother.smoothers))

        self.smoother = smoother
        self.alpha = alpha
        self.pen_weight = 1  # TODO: pen weight should not be defined here!!
        penal = MultivariateGamPenalty(smoother, alpha=alpha)

        super(LogitGam, self).__init__(endog, smoother.basis, penal=penal,
                                       *args, **kwargs)
Exemplo n.º 6
0
    def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None,
                 offset=None, exposure=None, missing='none', **kwargs):

        # TODO: check usage of hasconst
        hasconst = kwargs.get('hasconst', None)
        xnames_linear = None
        if hasattr(exog, 'design_info'):
            self.design_info_linear = exog.design_info
            xnames_linear = self.design_info_linear.column_names

        is_pandas = _is_using_pandas(exog, None)

        # TODO: handle data is experimental, see #5469
        # This is a bit wasteful because we need to `handle_data twice`
        self.data_linear = self._handle_data(endog, exog, missing, hasconst)
        if xnames_linear is None:
            xnames_linear = self.data_linear.xnames
        if exog is not None:
            exog_linear = np.asarray(exog)
            k_exog_linear = exog_linear.shape[1]
        else:
            exog_linear = None
            k_exog_linear = 0
        self.k_exog_linear = k_exog_linear
        # We need exog_linear for k-fold cross validation
        # TODO: alternative is to take columns from combined exog
        self.exog_linear = exog_linear

        self.smoother = smoother
        self.k_smooths = smoother.k_variables
        self.alpha = self._check_alpha(alpha)
        penal = MultivariateGamPenalty(smoother, alpha=self.alpha,
                                       start_idx=k_exog_linear)
        kwargs.pop('penal', None)
        if exog_linear is not None:
            exog = np.column_stack((exog_linear, smoother.basis))
        else:
            exog = smoother.basis

        # TODO: check: xnames_linear will be None instead of empty list
        #       if no exog_linear
        # can smoother be empty ? I guess not allowed.
        if xnames_linear is None:
            xnames_linear = []
        xnames = xnames_linear + self.smoother.col_names

        if is_pandas and exog_linear is not None:
            # we a dataframe so we can get a PandasData instance for wrapping
            exog = pd.DataFrame(exog, index=self.data_linear.row_labels,
                                columns=xnames)

        super(GLMGam, self).__init__(endog, exog=exog, family=family,
                                     offset=offset, exposure=exposure,
                                     penal=penal, missing=missing, **kwargs)

        if not is_pandas:
            # set exog nanmes if not given by pandas DataFrame
            self.exog_names[:] = xnames

        # TODO: the generic data handling might attach the design_info from the
        #       linear part, but this is incorrect for the full model and
        #       causes problems in wald_test_terms

        if hasattr(self.data, 'design_info'):
            del self.data.design_info
        # formula also might be attached which causes problems in predict
        if hasattr(self, 'formula'):
            self.formula_linear = self.formula
            self.formula = None
            del self.formula