def finalize(params, stats, cov, weight_mat): tstats = params.tstats params = params.params out = AttrDict(params=params, tstats=tstats, stats=stats, cov=cov, weight_mat=weight_mat) for key in stats.index: out[key] = stats[key] fixes = { 'model_ss': 'mss', 'resid_ss': 'rss', 'rsquared': 'r2', 'rsquared_adj': 'r2_a' } for key in fixes: if fixes[key] in out: out[key] = out[fixes[key]] else: out[key] = None if 'chi2' in out: out['f_statistic'] = out['chi2'] elif 'F' in out: out['f_statistic'] = out['F'] else: out['f_statistic'] = None return out
def data(): n, k, p = 1000, 5, 3 np.random.seed(12345) clusters = np.random.randint(0, 10, n) rho = 0.5 r = np.zeros((k + p + 1, k + p + 1)) r.fill(rho) r[-1, 2:] = 0 r[2:, -1] = 0 r[-1, -1] = 0.5 r += np.eye(9) * 0.5 v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n) x = v[:, :k] z = v[:, 2:k + p] e = v[:, [-1]] params = np.arange(1, k + 1) / k params = params[:, None] y = x @ params + e nobs, nvar = x.shape xzizx = x.T @ z @ z.T @ x / nobs xzizx_inv = inv(xzizx) return AttrDict(nobs=nobs, e=e, x=x, y=y, z=z, params=params, clusters=clusters, nvar=nvar, i=np.eye(k + p - 2), xzizx=xzizx, xzizx_inv=xzizx_inv)
def finalize(params, stats, cov, weight_mat): tstats = params.tstats params = params.params out = AttrDict(params=params, tstats=tstats, stats=stats, cov=cov, weight_mat=weight_mat) for key in stats.index: out[key] = stats[key] fixes = { "model_ss": "mss", "resid_ss": "rss", "rsquared": "r2", "rsquared_adj": "r2_a", } for key in fixes: if fixes[key] in out: out[key] = out[fixes[key]] else: out[key] = None if "chi2" in out: out["f_statistic"] = out["chi2"] elif "F" in out: out["f_statistic"] = out["F"] else: out["f_statistic"] = None return out
def data(): idx = date_range('2000-01-01', periods=100) df1 = DataFrame(np.arange(100)[:, None], columns=['A'], index=idx) x = np.reshape(np.arange(200), (100, 2)) df2 = DataFrame(x, columns=['B', 'C'], index=idx[::-1]) s = Series(300 + np.arange(100), index=idx, name='D') return AttrDict(df1=df1, df2=df2, s=s)
def _multivariate_ls_finalize(self, beta, eps, sigma, cov_type, **cov_config): k = len(self._wx) # Covariance estimation if cov_type == 'unadjusted': cov_est = HomoskedasticCovariance else: cov_est = HeteroskedasticCovariance cov = cov_est(self._wx, eps, sigma, sigma, gls=False, constraints=self._constraints, **cov_config).cov individual = AttrDict() debiased = cov_config.get('debiased', False) for i in range(k): wy = wye = self._wy[i] w = self._w[i] cons = int(self.has_constant.iloc[i]) if cons: wc = np.ones_like(wy) * np.sqrt(w) wye = wy - wc @ np.linalg.lstsq(wc, wy)[0] total_ss = float(wye.T @ wye) stats = self._common_indiv_results(i, beta, cov, eps, eps, 'OLS', cov_type, 0, debiased, cons, total_ss) key = self._eq_labels[i] individual[key] = stats nobs = eps.size results = self._common_results(beta, cov, 'OLS', 0, nobs, cov_type, sigma, individual, debiased) results['wresid'] = results.resid return SURResults(results)
def generate_data(nfactor=3, nportfolio=25, nobs=1000, premia=None, output="pandas", alpha=False): np.random.seed(12345) if premia is None: premia = np.arange(1, nfactor + 1) / (10 * nfactor) rho = 0.2 e = np.random.randn(nobs, nfactor) factors = rho * np.random.randn(nobs, 1) + np.sqrt(1 - rho**2) * e factors = np.sqrt(0.20**2 / 12) * factors factors += premia[None, :] / 12 idio = np.sqrt(0.10**2 / 12) * np.random.randn(nobs, nportfolio) betas = np.random.chisquare(2, (nfactor, nportfolio)) / 2.0 portfolios = factors @ betas + idio if alpha: portfolios += np.arange(nportfolio)[None, :] / nportfolio / 100 index = pd.date_range("1930-1-1", periods=nobs, freq="D") if output == "pandas": cols = ["factor_{0}".format(i) for i in range(1, nfactor + 1)] factors = pd.DataFrame(factors, columns=cols, index=index) cols = ["port_{0}".format(i) for i in range(1, nportfolio + 1)] portfolios = pd.DataFrame(portfolios, columns=cols, index=index) return AttrDict(factors=factors, portfolios=portfolios)
def _common_results(self, beta, cov, method, iter_count, nobs, cov_type, sigma, individual, debiased): results = AttrDict() results['method'] = method results['iter'] = iter_count results['nobs'] = nobs results['cov_type'] = cov_type results['index'] = self._dependent[0].rows results['sigma'] = sigma results['individual'] = individual results['params'] = beta results['df_model'] = beta.shape[0] results['param_names'] = self._param_names results['cov'] = cov results['debiased'] = debiased total_ss = resid_ss = 0.0 resid = [] for key in individual: total_ss += individual[key].total_ss resid_ss += individual[key].resid_ss resid.append(individual[key].resid) resid = hstack(resid) results['resid_ss'] = resid_ss results['total_ss'] = total_ss results['r2'] = 1.0 - results.resid_ss / results.total_ss results['resid'] = resid results['constraints'] = self._constraints results['model'] = self return results
def data(request): moments = np.random.randn(500, 10) jacobian = np.random.rand(10, 8) jacobian_inv = np.eye(10) return AttrDict(moments=moments, jacobian=jacobian, inv_jacobian=jacobian_inv)
def data(): return AttrDict( dep=SIMULATED_DATA.y_robust, exog=add_constant(SIMULATED_DATA[["x3", "x4", "x5"]]), endog=SIMULATED_DATA[["x1", "x2"]], instr=SIMULATED_DATA[["z1", "z2"]], )
def data(): n, q, k, p = 1000, 2, 5, 3 np.random.seed(12345) clusters = np.random.randint(0, 10, n) rho = 0.5 r = np.zeros((k + p + 1, k + p + 1)) r.fill(rho) r[-1, 2:] = 0 r[2:, -1] = 0 r[-1, -1] = 0.5 r += np.eye(9) * 0.5 v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n) x = v[:, :k] z = v[:, k:k + p] e = v[:, [-1]] params = np.arange(1, k + 1) / k params = params[:, None] y = x @ params + e xhat = z @ np.linalg.pinv(z) @ x nobs, nvar = x.shape s2 = e.T @ e / nobs s2_debiased = e.T @ e / (nobs - nvar) v = xhat.T @ xhat / nobs vinv = np.linalg.inv(v) kappa = 0.99 vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs return AttrDict(nobs=nobs, e=e, x=x, y=y, z=z, xhat=xhat, params=params, s2=s2, s2_debiased=s2_debiased, clusters=clusters, nvar=nvar, v=v, vinv=vinv, vk=vk, kappa=kappa, dep=y, exog=x[:, q:], endog=x[:, :q], instr=z)
def data(): idx = date_range("2000-01-01", periods=100) df1 = DataFrame(np.arange(100)[:, None], columns=["A"], index=idx) x = np.reshape(np.arange(200), (100, 2)) df2 = DataFrame(x, columns=["B", "C"], index=idx[::-1]) s = Series(300 + np.arange(100), index=idx, name="D") return AttrDict(df1=df1, df2=df2, s=s)
def generate_3sls_data_v2(n=500, k=3, nexog=3, nendog=2, ninstr=3, const=True, rho=0.5, output_dict=True, seed=1234, omitted='none'): np.random.seed(seed) eqns = AttrDict() for i in range(k): exog_instr = np.random.standard_normal((n, ninstr + nexog)) f = np.random.standard_normal((n, 1)) exog_instr = np.sqrt(rho) * f + np.sqrt(1 - rho) * exog_instr exog = exog_instr[:, :nexog] instr = exog_instr[:, nexog:] eps = np.random.standard_normal((n, 1)) endog = np.empty((n, nendog)) for j in range(nendog): c = np.random.chisquare(2, (ninstr + nexog, 1)) / 2 scale = np.arange(1, ninstr + nexog + 1) / (ninstr + nexog) scale = scale / scale.sum() c = c * scale[:, None] endog[:, [j]] = exog_instr @ c + eps + np.random.standard_normal((n, 1)) params = np.arange(1, nendog + nexog + const + 1)[:, None] x = np.hstack([exog, endog]) if const: x = np.hstack([np.ones((n, 1)), x]) exog = np.hstack([np.ones((n, 1)), exog]) dep = x @ params + eps + nendog * np.random.standard_normal((n, 1)) if omitted == 'none' or omitted == 'drop': if exog.shape[1] == 0: exog = None if endog.shape[1] == 0: endog = None if instr.shape[1] == 0: instr = None eqn = AttrDict(dependent=dep, exog=exog, endog=endog, instruments=instr, params=params) eqns['eqn.{0}'.format(i)] = eqn if not output_dict: for key in eqns: eq = eqns[key] eqns[key] = (eq.dependent, eq.exog, eq.endog, eq.instruments) else: if omitted == 'drop': for key in eqns: eq = eqns[key] for key2 in ('exog', 'endog', 'instruments'): if eq[key2] is None: del eq[key2] return eqns
def lsdv_config(request): weights, entity_effects, time_effects, other_effects = request.param return AttrDict( weights=weights, entity_effects=entity_effects, time_effects=time_effects, other_effects=other_effects, )
def const_data(request): missing, datatype = request.param data = generate_data(missing, datatype, ntk=(91, 7, 1)) y = PanelData(data.y).dataframe x = y.copy() x.iloc[:, :] = 1 x.columns = ['Const'] return AttrDict(y=y, x=x, w=PanelData(data.w).dataframe)
def generate_data(nkp=(1000, 5, 3)): n, k, p = nkp np.random.seed(12345) clusters = np.random.randint(0, 10, n) rho = 0.5 r = np.zeros((k + p + 1, k + p + 1)) r.fill(rho) r[-1, 2:] = 0 r[2:, -1] = 0 r[-1, -1] = 0.5 r += np.eye(9) * 0.5 v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n) x = v[:, :k] z = v[:, 2:k + p] e = v[:, [-1]] endog = x[:, :2] exog = x[:, 2:] instr = z[:, k - 2:] params = np.arange(1, k + 1) / k params = params[:, None] y = x @ params + e dep = y xhat = z @ np.linalg.pinv(z) @ x nobs, nvar = x.shape s2 = e.T @ e / nobs s2_debiased = e.T @ e / (nobs - nvar) v = xhat.T @ xhat / nobs vinv = np.linalg.inv(v) kappa = 0.99 vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs xzizx = x.T @ z @ z.T @ x / nobs xzizx_inv = np.linalg.inv(xzizx) return AttrDict( nobs=nobs, e=e, x=x, y=y, z=z, xhat=xhat, params=params, s2=s2, s2_debiased=s2_debiased, clusters=clusters, nvar=nvar, v=v, vinv=vinv, vk=vk, i=np.eye(k + p - 2), kappa=kappa, xzizx=xzizx, xzizx_inv=xzizx_inv, dep=dep, exog=exog, endog=endog, instr=instr, )
def __init__(self, results: AttrDict) -> None: super(SystemEquationResult, self).__init__(results) self._eq_label = results.eq_label self._dependent = results.dependent self._f_statistic = results.f_stat self._r2a = results.r2a self._instruments = results.instruments self._endog = results.endog self._weight_estimator = results.get("weight_estimator", None)
def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None): if rng is None: np.random.seed(12345) else: np.random.set_state(rng.get_state()) n, t, k = ntk k += const x = standard_normal((k, t, n)) beta = np.arange(1, k + 1)[:, None, None] / k y = (x * beta).sum(0) + standard_normal((t, n)) + 2 * standard_normal((1, n)) w = np.random.chisquare(5, (t, n)) / 5 c = None if other_effects == 1: cats = ['Industries'] else: cats = ['cat.' + str(i) for i in range(other_effects)] if other_effects: c = np.random.randint(0, 4, (other_effects, t, n)) vcats = ['varcat.' + str(i) for i in range(2)] vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n)) vc1 = vc2[[0]] if const: x[0] = 1.0 if missing > 0: locs = np.random.choice(n * t, int(n * t * missing)) y.flat[locs] = np.nan locs = np.random.choice(n * t * k, int(n * t * k * missing)) x.flat[locs] = np.nan if datatype in ('pandas', 'xarray'): entities = ['firm' + str(i) for i in range(n)] time = pd.date_range('1-1-1900', periods=t, freq='A-DEC') vars = ['x' + str(i) for i in range(k)] y = pd.DataFrame(y, index=time, columns=entities) w = pd.DataFrame(w, index=time, columns=entities) x = pd.Panel(x, items=vars, major_axis=time, minor_axis=entities) c = pd.Panel(c, items=cats, major_axis=time, minor_axis=entities) vc1 = pd.Panel(vc1, items=vcats[:1], major_axis=time, minor_axis=entities) vc2 = pd.Panel(vc2, items=vcats, major_axis=time, minor_axis=entities) if datatype == 'xarray': x = xr.DataArray(x) y = xr.DataArray(y) w = xr.DataArray(w) c = xr.DataArray(c) vc1 = xr.DataArray(vc1) vc2 = xr.DataArray(vc2) if rng is not None: rng.set_state(np.random.get_state()) return AttrDict(y=y, x=x, w=w, c=c, vc1=vc1, vc2=vc2)
def data(request): steps, robust = request.param weight_type = 'robust' if robust else 'unadjusted' eqns = generate_3sls_data_v2(k=3) y = [eqns[key].dependent for key in eqns] x = [np.concatenate([eqns[key].exog, eqns[key].endog], 1) for key in eqns] z = [np.concatenate([eqns[key].exog, eqns[key].instruments], 1) for key in eqns] return AttrDict(eqns=eqns, x=x, y=y, z=z, steps=steps, robust=robust, weight_type=weight_type)
def test_formula_equivalence_weights(data): weights = AttrDict() eqn_copy = AttrDict() for key in data.eqns: eqn = {k: v for k, v in data.eqns[key].items()} nobs = eqn['dependent'].shape[0] w = np.random.chisquare(2, (nobs, 1)) / 2 weights[key] = w eqn['weights'] = w eqn_copy[key] = eqn mod = IVSystemGMM(eqn_copy, weight_type='unadjusted') df = [] formulas = OrderedDict() for i, key in enumerate(data.eqns): eqn = data.eqns[key] dep = eqn.dependent ex = eqn.exog en = eqn.endog instr = eqn.instruments dep = pd.DataFrame(dep, columns=['dep_{0}'.format(i)]) has_const = False if np.any(np.all(ex == 1, 0)): ex = ex[:, 1:] has_const = True ex = pd.DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) en = pd.DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])]) instr = pd.DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) fmla = ''.join(dep.columns) + ' ~ ' if has_const: fmla += ' 1 + ' fmla += ' + '.join(ex.columns) + ' + [' fmla += ' + '.join(en.columns) + ' ~ ' fmla += ' + '.join(instr.columns) + ' ] ' formulas[key] = fmla df.extend([dep, ex, en, instr]) df = pd.concat(df, 1) formula_mod = IVSystemGMM.from_formula(formulas, df, weights=weights, weight_type='unadjusted') res = mod.fit(cov_type='unadjusted') formula_res = formula_mod.fit(cov_type='unadjusted') assert_allclose(res.params, formula_res.params)
def _gls_finalize(self, beta, sigma, full_sigma, gls_eps, eps, cov_type, iter_count, **cov_config): """Collect results to return after GLS estimation""" wx = self._wx k = len(self._wy) # Covariance estimation if cov_type == 'unadjusted': cov_est = HomoskedasticCovariance else: cov_est = HeteroskedasticCovariance gls_eps = reshape(gls_eps, (k, gls_eps.shape[0] // k)).T eps = reshape(eps, (k, eps.shape[0] // k)).T cov = cov_est(wx, gls_eps, sigma, full_sigma, gls=True, constraints=self._constraints, **cov_config).cov # Repackage results for individual equations individual = AttrDict() debiased = cov_config.get('debiased', False) method = 'Iterative GLS' if iter_count > 1 else 'GLS' for i in range(k): cons = int(self.has_constant.iloc[i]) if cons: c = np.sqrt(self._w[i]) ye = self._wy[i] - c @ np.linalg.lstsq(c, self._wy[i])[0] else: ye = self._wy[i] total_ss = float(ye.T @ ye) stats = self._common_indiv_results(i, beta, cov, gls_eps, eps, method, cov_type, iter_count, debiased, cons, total_ss) key = self._eq_labels[i] individual[key] = stats # Populate results dictionary nobs = eps.size results = self._common_results(beta, cov, method, iter_count, nobs, cov_type, sigma, individual, debiased) # wresid is different between GLS and OLS wresid = [] for key in individual: wresid.append(individual[key].wresid) wresid = hstack(wresid) results['wresid'] = wresid return SURResults(results)
def kernel(request): kernel_name = request.param if kernel_name == "bartlett": weight_func = kernel_weight_bartlett alt_names = ["newey-west"] elif kernel_name == "parzen": weight_func = kernel_weight_parzen alt_names = ["gallant"] else: weight_func = kernel_weight_quadratic_spectral alt_names = ["quadratic-spectral", "andrews"] return AttrDict(kernel=kernel_name, alt_names=alt_names, weight=weight_func)
def model_data(request): key = request.param dgp, model_type = key.split("-") if dgp == "basic": data = basic_data elif dgp == "common": data = common_data for i, data_key in enumerate(data): if i == 0: exog = data[data_key]["exog"] else: data[data_key]["exog"] = exog else: # dgp == 'missing' data = missing_data cov_kwds = {"cov_type": "unadjusted"} if model_type == "ss": cov_kwds["debiased"] = True stata_result = stata_results[key] rekeyed_data = {} for data_key in data: temp = data[data_key] new_key = temp["dependent"].columns[0] rekeyed_data[new_key] = temp constraint = None if model_type == "constrained": cols = [] widths = [] for new_key in rekeyed_data: exog = rekeyed_data[new_key]["exog"] cols.extend([new_key + "_" + col for col in exog.columns]) widths.append(exog.shape[1]) r = pd.DataFrame(columns=cols, index=["r0", "r1"], dtype=np.float64) r.iloc[:, :] = 0.0 r.iloc[:, 0] = -1.0 r.iloc[0, widths[0]] = 1.0 r.iloc[1, widths[0] + widths[1]] = 1.0 constraint = r mod = SUR(rekeyed_data) if constraint is not None: mod.add_constraints(constraint) res = mod.fit(**cov_kwds) return AttrDict( data=rekeyed_data, cov_kwds=cov_kwds, model_type=model_type, stata_result=stata_result, key=key, constraint=constraint, mod=mod, res=res, )
def config(self): """ Weight estimator configuration Returns ------- config : AttrDict Dictionary containing weight estimator configuration information """ out = AttrDict([(k, v) for k, v in self._config.items()]) out['bandwidth'] = self.bandwidth return out
def __init__(self, x, eps, sigma, full_sigma, *, gls=False, debiased=False, constraints=None): self._eps = eps self._x = x self._nobs = eps.shape[0] self._k = len(x) self._sigma = sigma self._full_sigma = full_sigma self._gls = gls self._debiased = debiased self._constraints = constraints self._name = 'Homoskedastic (Unadjusted) Covariance' self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls) self._cov_config = AttrDict(debiased=self._debiased)
def simple_3sls(y, x, z): out = AttrDict() k = len(y) b = [] eps = [] xhat = [] for i in range(k): xhat.append(z[i] @ lstsq(z[i], x[i])[0]) b.append(lstsq(xhat[i], y[i])[0]) eps.append(y[i] - x[i] @ b[-1]) b = np.vstack(b) out['beta0'] = b out['eps0'] = eps eps = np.hstack(eps) nobs = eps.shape[0] sigma = eps.T @ eps / nobs out['sigma'] = sigma omega = np.kron(sigma, np.eye(nobs)) omegainv = np.linalg.inv(omega) by = np.vstack([y[i] for i in range(k)]) bx = [] for i in range(k): row = [] for j in range(k): if i == j: row.append(xhat[i]) else: row.append(np.zeros((nobs, xhat[j].shape[1]))) row = np.hstack(row) bx.append(row) bx = np.vstack(bx) xpx = (bx.T @ omegainv @ bx) xpy = (bx.T @ omegainv @ by) beta1 = np.linalg.solve(xpx, xpy) out['beta1'] = beta1 out['xpx'] = xpx out['xpy'] = xpy idx = 0 eps = [] for i in range(k): k = x[i].shape[1] b = beta1[idx:idx + k] eps.append(y[i] - x[i] @ b) idx += k eps = np.hstack(eps) nobs = eps.shape[0] sigma = eps.T @ eps / nobs out['eps'] = eps out['cov'] = np.linalg.inv(bx.T @ omegainv @ bx) return out
def kernel(request): kernel_name = request.param if kernel_name == 'bartlett': weight_func = kernel_weight_bartlett alt_names = ['newey-west'] elif kernel_name == 'parzen': weight_func = kernel_weight_parzen alt_names = ['gallant'] else: weight_func = kernel_weight_quadratic_spectral alt_names = ['quadratic-spectral', 'andrews'] return AttrDict(kernel=kernel_name, alt_names=alt_names, weight=weight_func)
def model_data(request): key = request.param dgp, model_type = key.split('-') if dgp == 'basic': data = basic_data elif dgp == 'common': data = common_data for i, data_key in enumerate(data): if i == 0: exog = data[data_key]['exog'] else: data[data_key]['exog'] = exog else: # dgp == 'missing' data = missing_data cov_kwds = {'cov_type': 'unadjusted'} if model_type == 'ss': cov_kwds['debiased'] = True stata_result = stata_results[key] rekeyed_data = OrderedDict() for data_key in data: temp = data[data_key] new_key = temp['dependent'].columns[0] rekeyed_data[new_key] = temp constraint = None if model_type == 'constrained': cols = [] widths = [] for new_key in rekeyed_data: exog = rekeyed_data[new_key]['exog'] cols.extend([new_key + '_' + col for col in exog.columns]) widths.append(exog.shape[1]) r = pd.DataFrame(columns=cols, index=['r0', 'r1'], dtype=np.float64) r.iloc[:, :] = 0.0 r.iloc[:, 0] = -1.0 r.iloc[0, widths[0]] = 1.0 r.iloc[1, widths[0] + widths[1]] = 1.0 constraint = r mod = SUR(rekeyed_data) if constraint is not None: mod.add_constraints(constraint) res = mod.fit(**cov_kwds) return AttrDict(data=rekeyed_data, cov_kwds=cov_kwds, model_type=model_type, stata_result=stata_result, key=key, constraint=constraint, mod=mod, res=res)
def __init__( self, x: List[ndarray], eps: NDArray, sigma: NDArray, full_sigma: NDArray, *, gls: bool = False, debiased: bool = False, constraints: Optional[LinearConstraint] = None, ) -> None: self._eps = eps self._x = x self._nobs = eps.shape[0] self._k = len(x) self._sigma = sigma self._full_sigma = full_sigma self._gls = gls self._debiased = debiased self._constraints = constraints self._name = "Homoskedastic (Unadjusted) Covariance" self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls) self._cov_config = AttrDict(debiased=self._debiased)
def data(request): model, vcv, weights, missing = request.param.split("-") y_vars = ["y"] x_vars = ["x1", "x2", "x3", "x4", "x5"] vars = y_vars + x_vars if missing: for i, v in enumerate(vars): vars[i] = v + missing y_vars = vars[:1] x_vars = vars[1:] y = sim_data[y_vars] x = sim_data[["intercept"] + x_vars] mod = MODELS[model] mod_options = {} if model == "fixed_effect": mod_options = {"entity_effects": True} if weights == "weighted": mod_options.update({"weights": sim_data["w"]}) fit_options = {"debiased": True} if weights == "wls": fit_options.update({"reweight": True}) if vcv == "robust" and model not in ("fixed_effect", "random_effect"): fit_options.update({"cov_type": "robust"}) elif vcv in ("cluster", "robust"): y_data = PanelData(y) eid = y_data.entity_ids entities = pd.DataFrame(eid, index=y_data.index, columns=["firm_ids"]) fit_options.update({"cov_type": "clustered", "clusters": entities}) else: fit_options.update({"cov_type": "unadjusted"}) if vcv == "cluster" or (model in ("fixed_effect", "random_effect") and vcv == "robust"): fit_options.update({"group_debias": True}) spec_mod = mod(y, x, **mod_options) fit = spec_mod.fit(**fit_options) return AttrDict( fit=fit, model=spec_mod, model_options=mod_options, y=y, x=x, stata=STATA_RESULTS[request.param], fit_options=fit_options, model_name=model, vcv=vcv, weights=weights, missing=missing, )
def data(request): model, vcv, weights, missing = request.param.split('-') y_vars = ['y'] x_vars = ['x1', 'x2', 'x3', 'x4', 'x5'] vars = y_vars + x_vars if missing: for i, v in enumerate(vars): vars[i] = v + missing y_vars = vars[:1] x_vars = vars[1:] y = sim_data[y_vars] x = sim_data[['intercept'] + x_vars] mod = MODELS[model] mod_options = {} if model == 'fixed_effect': mod_options = {'entity_effects': True} if weights == 'weighted': mod_options.update({'weights': sim_data['w']}) fit_options = {'debiased': True} if weights == 'wls': fit_options.update({'reweight': True}) if vcv == 'robust' and model not in ('fixed_effect', 'random_effect'): fit_options.update({'cov_type': 'robust'}) elif vcv in ('cluster', 'robust'): y_data = PanelData(y) eid = y_data.entity_ids entities = pd.DataFrame(eid, index=y_data.index, columns=['firm_ids']) fit_options.update({'cov_type': 'clustered', 'clusters': entities}) else: fit_options.update({'cov_type': 'unadjusted'}) if vcv == 'cluster' or (model in ('fixed_effect', 'random_effect') and vcv == 'robust'): fit_options.update({'group_debias': True}) spec_mod = mod(y, x, **mod_options) fit = spec_mod.fit(**fit_options) return AttrDict(fit=fit, model=spec_mod, model_options=mod_options, y=y, x=x, stata=STATA_RESULTS[request.param], fit_options=fit_options, model_name=model, vcv=vcv, weights=weights, missing=missing)