def __fit__(correctors, correctors_re, groups, predictors, observations, sample_weight=None, n_jobs=-1, *args, **kwargs): ncols = correctors.shape[1] dims = (correctors.shape[0], ncols + predictors.shape[1]) xdata = np.zeros(dims) xdata[:, :ncols] = correctors.view() xdata[:, ncols:] = predictors.view() M = observations.shape[1] K = correctors.shape[1] params = np.empty((K, M), dtype=object) for it_m in range(M): free = MixedLMParams.from_components( fe_params=np.ones(xdata.shape[1]), cov_re=np.eye(correctors_re.shape[1])) model = MixedLM(endog=observations, exog=xdata, groups=groups, exog_re=correctors_re) results = model.fit(free=free) params[..., it_m] = free return (params[:ncols], params[ncols:])
def test_compare_numdiff(self): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for use_sqrt in False, True: for reml in False, True: for profile_fe in False, True: np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed( use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3)
def test_compare_numdiff(self): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for use_sqrt in False, True: for reml in False, True: for profile_fe in False, True: np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2) ** 2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed(use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3)
def test_compare_numdiff(self): import statsmodels.tools.numdiff as nd n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for jl in 0, 1: for reml in False, True: for cov_pen_wt in 0, 10: cov_pen = penalties.PSD(cov_pen_wt) np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 slopes = np.random.normal(size=(n_grp, k_re)) slopes = np.kron(slopes, np.ones((grpsize, 1))) re_values = (slopes * exog_re).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) if jl == 0: md = MixedLM(endog, exog_fe, groups, exog_re) score = lambda x: -md.score_sqrt(x) hessian = lambda x: -md.hessian_sqrt(x) else: md = MixedLM(endog, exog_fe, groups, exog_re, use_sqrt=False) score = lambda x: -md.score_full(x) hessian = lambda x: -md.hessian_full(x) md.reml = reml md.cov_pen = cov_pen loglike = lambda x: -md.loglike(x) rslt = md.fit() # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) params = MixedLMParams.from_components( fe_params, cov_re) if jl == 0: params_vec = params.get_packed() else: params_vec = params.get_packed(use_sqrt=False) # Check scores gr = score(params) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-2) # Hessian matrices don't agree well away from # the MLE. #if cov_pen_wt == 0: # hess = hessian(params) # nhess = nd.approx_hess(params_vec, loglike) # assert_allclose(hess, nhess, rtol=1e-2) # Check Hessian matrices at the MLE. if cov_pen_wt == 0: hess = hessian(rslt.params_object) params_vec = rslt.params_object.get_packed() nhess = nd.approx_hess(params_vec, loglike) assert_allclose(hess, nhess, rtol=1e-2)
def test_compare_numdiff(self, use_sqrt, reml, profile_fe): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM( endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function( model, profile_fe=profile_fe, has_fe=not profile_fe) try: # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components( fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed( has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we don't have # the profile Hessian matrix and we don't care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed( use_sqrt=False, has_fe=True) loglike_h = loglike_function( model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3) except AssertionError: # See GH#5628; because this test fails unpredictably but only on # OSX, we only xfail it there. if PLATFORM_OSX: pytest.xfail("fails on OSX due to unresolved " "numerical differences") else: raise
def test_compare_numdiff(self, use_sqrt, reml, profile_fe): n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp * grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp * grpsize, k_re)) exog_re[:, 0] = 1 exog_vc = np.random.normal(size=(n_grp * grpsize, 3)) slopes = np.random.normal(size=(n_grp, k_re)) slopes[:, -1] *= 2 slopes = np.kron(slopes, np.ones((grpsize, 1))) slopes_vc = np.random.normal(size=(n_grp, 3)) slopes_vc = np.kron(slopes_vc, np.ones((grpsize, 1))) slopes_vc[:, -1] *= 2 re_values = (slopes * exog_re).sum(1) vc_values = (slopes_vc * exog_vc).sum(1) err = np.random.normal(size=n_grp * grpsize) endog = exog_fe.sum(1) + re_values + vc_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) vc = {"a": {}, "b": {}} for i in range(n_grp): ix = np.flatnonzero(groups == i) vc["a"][i] = exog_vc[ix, 0:2] vc["b"][i] = exog_vc[ix, 2:3] model = MixedLM(endog, exog_fe, groups, exog_re, exog_vc=vc, use_sqrt=use_sqrt) rslt = model.fit(reml=reml) loglike = loglike_function(model, profile_fe=profile_fe, has_fe=not profile_fe) try: # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) vcomp = np.random.normal(size=2)**2 params = MixedLMParams.from_components(fe_params, cov_re=cov_re, vcomp=vcomp) params_vec = params.get_packed(has_fe=not profile_fe, use_sqrt=use_sqrt) # Check scores gr = -model.score(params, profile_fe=profile_fe) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-3) # Check Hessian matrices at the MLE (we do not have # the profile Hessian matrix and we do not care # about the Hessian for the square root # transformed parameter). if (profile_fe is False) and (use_sqrt is False): hess = -model.hessian(rslt.params_object) params_vec = rslt.params_object.get_packed(use_sqrt=False, has_fe=True) loglike_h = loglike_function(model, profile_fe=False, has_fe=True) nhess = nd.approx_hess(params_vec, loglike_h) assert_allclose(hess, nhess, rtol=1e-3) except AssertionError: # See GH#5628; because this test fails unpredictably but only on # OSX, we only xfail it there. if PLATFORM_OSX: pytest.xfail("fails on OSX due to unresolved " "numerical differences") else: raise
def test_compare_numdiff(self): import statsmodels.tools.numdiff as nd n_grp = 200 grpsize = 5 k_fe = 3 k_re = 2 for jl in 0,1: for reml in False,True: for cov_pen_wt in 0,10: cov_pen = penalties.PSD(cov_pen_wt) np.random.seed(3558) exog_fe = np.random.normal(size=(n_grp*grpsize, k_fe)) exog_re = np.random.normal(size=(n_grp*grpsize, k_re)) exog_re[:, 0] = 1 slopes = np.random.normal(size=(n_grp, k_re)) slopes = np.kron(slopes, np.ones((grpsize,1))) re_values = (slopes * exog_re).sum(1) err = np.random.normal(size=n_grp*grpsize) endog = exog_fe.sum(1) + re_values + err groups = np.kron(range(n_grp), np.ones(grpsize)) if jl == 0: md = MixedLM(endog, exog_fe, groups, exog_re) score = lambda x: -md.score_sqrt(x) hessian = lambda x : -md.hessian_sqrt(x) else: md = MixedLM(endog, exog_fe, groups, exog_re, use_sqrt=False) score = lambda x: -md.score_full(x) hessian = lambda x: -md.hessian_full(x) md.reml = reml md.cov_pen = cov_pen loglike = lambda x: -md.loglike(x) rslt = md.fit() # Test the score at several points. for kr in range(5): fe_params = np.random.normal(size=k_fe) cov_re = np.random.normal(size=(k_re, k_re)) cov_re = np.dot(cov_re.T, cov_re) params = MixedLMParams.from_components(fe_params, cov_re) if jl == 0: params_vec = params.get_packed() else: params_vec = params.get_packed(use_sqrt=False) # Check scores gr = score(params) ngr = nd.approx_fprime(params_vec, loglike) assert_allclose(gr, ngr, rtol=1e-2) # Hessian matrices don't agree well away from # the MLE. #if cov_pen_wt == 0: # hess = hessian(params) # nhess = nd.approx_hess(params_vec, loglike) # assert_allclose(hess, nhess, rtol=1e-2) # Check Hessian matrices at the MLE. if cov_pen_wt == 0: hess = hessian(rslt.params_object) params_vec = rslt.params_object.get_packed() nhess = nd.approx_hess(params_vec, loglike) assert_allclose(hess, nhess, rtol=1e-2)