def create_table(models, out): """Creates a table of regression results. Args: models: regression results out: the generated table is saved here Returns: None """ results = [] model_names = [] covariate_names = {} for model in models: with open(model, 'rb') as file: result = (pickle.load(file)) results.append(result) model_names.append(result.model_name) covariate_names.update(result.var_names) table = Stargazer(results) table.dependent_variable_name( covariate_names[results[0].model.endog_names]) table.custom_columns(model_names, [1] * len(model_names)) table.rename_covariates(covariate_names) latex_table = table.render_latex() latex_table = re.sub(r"l(c+)\}", r"lc\1}", latex_table) # ugly hack because stargazer generates an invalid latex table with open(out, 'w') as file: file.write(latex_table)
def create_table(models, path): path = Path(path) stargazer = Stargazer(models) if path.suffix == ".tex": table = stargazer.render_latex() elif path.suffix == ".html": table = stargazer.render_html() else: raise NotImplementedError with open(path, "w") as file: file.write(table)
class StargazerTestCase(unittest.TestCase): def setUp(self): self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))), columns =['a', 'b']) self.est1 = smf.ols('a ~ 0 + b', self.df).fit() self.est2 = smf.ols('a ~ 1 + b', self.df).fit() self.stargazer = Stargazer([self.est1, self.est2]) def test_add_line(self): # too few arguments self.assertRaises(AssertionError, self.stargazer.add_line, '', [0]) # wrong location self.assertRaises(ValueError, self.stargazer.add_line, '', [0, 0], 'wrong') # correct usage for loc in LineLocation: self.stargazer.add_line(f'test {loc.value}', ['N/A', 'N/A'], loc) latex = self.stargazer.render_latex() for loc in LineLocation: self.assertIn(f' test {loc.value} & N/A & N/A \\', latex) def test_render_latex(self): # test escaping self.stargazer.rename_covariates({'b': 'b_'}) self.assertIn(' b_ ', self.stargazer.render_latex()) self.assertIn(r' b\_ ', self.stargazer.render_latex(escape=True))
def Appendix_Table_3(df): df_short_g = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad"] == 0][df[df["dummynews_goodbad"] == 0]['treatgroup'] == 4]['beliefadjustment_normalized'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad"] == 0][df[df["dummynews_goodbad"] == 0] ['treatgroup'] == 4] ['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ beliefadjustment_bayes_norm", data=df_short_g) reg_s_1 = model_ols.fit(cov_type='HC1') df_short_b = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad"] == 1][df[df["dummynews_goodbad"] == 1]['treatgroup'] == 4]['beliefadjustment_normalized'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad"] == 1][df[df["dummynews_goodbad"] == 1] ['treatgroup'] == 4] ['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ beliefadjustment_bayes_norm", data=df_short_b) reg_s_2 = model_ols.fit(cov_type='HC1') df["interact_negative_bayes"] = df["beliefadjustment_bayes_norm"] * df[ "dummynews_goodbad"] model_ols = smf.ols( formula= "beliefadjustment_normalized ~ beliefadjustment_bayes_norm + dummynews_goodbad + interact_negative_bayes", data=df[df['treatgroup'] == 4]) reg_s_3 = model_ols.fit(cov_type='HC1') Appendix_Table_3 = Stargazer([reg_s_1, reg_s_2, reg_s_3]) Appendix_Table_3.title('Table 8: Belief Adjustment in the Short-Run') Appendix_Table_3.dependent_variable_name('Belief Adjustment') Appendix_Table_3.custom_columns([ 'Positive Information', 'Negative Information', 'Difference-in-difference' ], [1, 1, 1]) return Appendix_Table_3
def table3_7(df, regression_type): df_3_7E = df[[ 'ethnicity_C2', 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7L = df[[ 'language_C2', 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) df_3_7R = df[[ 'religion_C2', 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'LOScandin', 'democ', 'mtnall', 'RulLaw' ]].dropna(axis=0) exo = sm.add_constant(df_3_7E[[ 'ethnicity_C2', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'lnArea', 'democ', 'mtnall' ]]) exo2 = sm.add_constant(df_3_7E[['ethnicity_C2', 'ethnicity_I']]) exo3 = sm.add_constant(df_3_7L[[ 'language_C2', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'lnArea', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo4 = sm.add_constant(df_3_7L[['language_C2', 'language_I']]) exo5 = sm.add_constant(df_3_7R[[ 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'lnArea', 'democ', 'mtnall' ]]) exo6 = sm.add_constant(df_3_7R[['religion_C2', 'religion_I']]) if regression_type == 'IV2SLS': reg = IV2SLS( df_3_7E['RulLaw'], exo, sm.add_constant(df_3_7E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg2 = IV2SLS( df_3_7E['RulLaw'], exo2, sm.add_constant( df_3_7E[['ethnicity_instrument_C2_thresh', 'ethnicity_I']])).fit() reg3 = IV2SLS( df_3_7L['RulLaw'], exo3, sm.add_constant(df_3_7L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'lnArea' ]])).fit() reg4 = IV2SLS( df_3_7L['RulLaw'], exo4, sm.add_constant( df_3_7L[['language_instrument_C2_thresh', 'language_I']])).fit() reg5 = IV2SLS( df_3_7R['RulLaw'], exo5, sm.add_constant(df_3_7R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall', 'lnArea' ]])).fit() reg6 = IV2SLS( df_3_7R['RulLaw'], exo6, sm.add_constant( df_3_7R[['religion_instrument_C2_thresh', 'religion_I']])).fit() elif regression_type == 'OLS': reg2 = sm.OLS(df_3_7E['RulLaw'], exo2).fit(cov_type='HC1') reg = sm.OLS(df_3_7E['RulLaw'], exo).fit(cov_type='HC1') reg4 = sm.OLS(df_3_7L['RulLaw'], exo4).fit(cov_type='HC1') reg3 = sm.OLS(df_3_7L['RulLaw'], exo3).fit(cov_type='HC1') reg6 = sm.OLS(df_3_7R['RulLaw'], exo6).fit(cov_type='HC1') reg5 = sm.OLS(df_3_7R['RulLaw'], exo5).fit(cov_type='HC1') stargazer = Stargazer([reg2, reg, reg4, reg3, reg6, reg5]) stargazer.covariate_order([ 'ethnicity_C2', 'ethnicity_I', 'language_C2', 'language_I', 'religion_C2', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'lnArea', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'const' ]) stargazer.rename_covariates({ 'ethnicity_C2': 'Segregation $\hat{S}$ (ethnicity)', 'ethnicity_I': 'Fractionalization $F$ (ethnicity)', 'language_C2': 'Segregation $\hat{S}$ (language)', 'language_I': 'Fractionalization $F$ (language)', 'religion_C2': 'Segregation $\hat{S}$ (religion)', 'religion_I': 'Fractionalization $F$ (religion)', 'lnpopulation': 'ln (population)', 'lnGDP_pc': 'ln (GDP per capita)', 'lnArea': 'ln (average size of region)', 'protestants': 'Pretestants share', 'muslims': 'Muslmis Share', 'catholics': 'Catholics share', 'latitude': 'Latitude', 'LOEnglish': 'English legal origin', 'LOGerman': 'German legal origin', 'LOSocialist': 'Socialist legal origin', 'LOScandin': 'Scandinavian legal origin', 'democ': 'Democratic tradition', 'mtnall': 'Mountains', 'const': 'Constant' }) return HTML(stargazer.render_html())
interpolation='nearest', data=True) plt.colorbar() tick_marks = [i for i in range(len(DataReg1.columns))] plt.xticks(tick_marks, DataReg1.columns, rotation=45) plt.yticks(tick_marks, DataReg1.columns, rotation=45) plt.title('NASDAQ Variable Correlations') for i in range(len(DataReg1.columns)): for j in range(len(DataReg1.columns)): text = '%.2f' % (DataReg1.corr().iloc[i, j]) plt.text(i - 0.2, j - 0.1, text) # In[54]: print(model2.summary()) #F-significantly large #Good R-Squared value # In[57]: stargazer = Stargazer([model2, model3]) stargazer # In[61]: sns.pairplot(DataReg) plt.suptitle('Pair Plot of Economic Indicators and Price', size=15) # In[ ]:
reg_info = { "Observações": lambda x: x.nobs, "R^2": lambda x: x.rsquared, "R^2 Ajustado": lambda x: x.rsquared_adj, "Estatística F": lambda x: f"{x.fvalue:.3f} ({x.f_pvalue:.3f})", "Jarque-Bera": lambda x: f"{jarque_bera(x.resid)[0]:.3f} ({jarque_bera(x.resid)[1]:.3f})", "Dickey-Fuller": lambda x: f"{adfuller(x.resid, maxlag=1, autolag=None)[0]:.3f} ({adfuller(x.resid, maxlag=1, autolag=None)[1]:.3f})", "Durbin-Watson": lambda x: f"{durbin_watson(x.resid):.3f}" } print(summary_col([reg], stars=True, info_dict=reg_info).as_latex()) print(Stargazer([reg]).render_latex()) reg_resid = reg.resid.shift(1).dropna() reg_resid.name = "equilibrio" y = d_series.spread, X = pd.concat([reg_resid, d_series.selic, d_series.inad, d_series.ibc], axis="columns") ecm = sm.OLS( endog=d_series.spread, exog=pd.concat([reg_resid, d_series.selic, d_series.inad, d_series.ibc], axis="columns"), ).fit() print(summary_col([ecm], stars=True, info_dict=reg_info).as_latex())
df = pd.concat([country, df], axis=1) df.drop(["Thailand", "location"], axis=1, inplace=True) df.dropna(subset=["new_tests_per_thousand"], inplace=True) df["date"] = pd.to_datetime(df["date"]) df_my = df[df["Malaysia"] == 1] df_my = df_my.merge(mob_my, on="date") df_th = df[df["Malaysia"] == 0] df_th = df_th.merge(mob_th, on="date") df = pd.concat([df_my, df_th]) df["post_election"] = df["date"].apply( lambda x: 0 if x <= pd.to_datetime("2020-09-26") else 1) mdl_joined_el = ols( "new_cases_per_million ~ Malaysia * post_election + new_tests_per_thousand + retail + grocery + parks + transit", data=df) results_joined_el = mdl_joined_el.fit() results_joined_el.summary() stargazer = Stargazer([results_joined_el]) stargazer.render_latex()
def main(): #%% #Load data os.chdir('/Users/rgreen/Documents/Github/NDVI_Projection/') oromia = pd.ExcelFile('oromia.xlsx') arsi = pd.read_excel(oromia, 'arsi') bale = pd.read_excel(oromia, 'bale') borena = pd.read_excel(oromia, 'borena') guji = pd.read_excel(oromia, 'guji') westarsi = pd.read_excel(oromia, 'westarsi') arsi.insert(0, 'Time', np.linspace(1,585,585)) bale.insert(0, 'Time', np.linspace(1,585,585)) borena.insert(0, 'Time', np.linspace(1,585,585)) guji.insert(0, 'Time', np.linspace(1,585,585)) westarsi.insert(0, 'Time', np.linspace(1,585,585)) #%% #dekadal data (P, LST, ET) ddf = pd.DataFrame() ddf['D_NDVI'] = arsi.NDVI.diff()[1:] ddf = ddf.reset_index(drop=True) ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI[:-1]))* (arsi.P[:-1])) ddf['N_LST'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.LST[:-1])).shift(4) #need to shift back lags, use shift not index ddf['N_ET'] = (((arsi.NDVI[:-1]) - min(arsi.NDVI))* (arsi.ET[:-1])).shift(4) ddf = pd.DataFrame() ddf['D_NDVI'] = arsi.NDVI.diff().shift(-1) ddf = ddf.reset_index(drop=True) #L_NDVI = arsi.NDVI.shift(-1) #lag ddf['N_P'] = ((max(arsi.NDVI) - (arsi.NDVI))* (arsi.P)) ddf['N_LST'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.LST)).shift(4) #need to shift back lags, use shift not index ddf['N_ET'] = (((arsi.NDVI) - min(arsi.NDVI))* (arsi.ET)).shift(4) #shift is (n-1) dekads, ex. shift(4) is 5 dekads lag #mask mask_lst = ~np.isnan(ddf.N_LST) & ~np.isnan(ddf.D_NDVI) mask_et = ~np.isnan(ddf.N_ET) & ~np.isnan(ddf.D_NDVI) #stats.lingress (X,Y) slope1, intercept1, r1, p1, std1 = stats.linregress(ddf.N_P, ddf.D_NDVI) line1 = slope1*ddf.N_P+intercept1 print("r-squared: %f" % r1**2) slope2, intercept2, r2, p2, std2 = stats.linregress(ddf.N_LST[mask_lst], ddf.D_NDVI[mask_lst]) line2 = slope2*ddf.N_LST+intercept2 print("r-squared: %f" % r2**2) slope3, intercept3, r3, p3, std3 = stats.linregress(ddf.N_ET[mask_et], ddf.D_NDVI[mask_et]) line3 = slope3*ddf.N_ET+intercept3 print("r-squared: %f" % r3**2) fig, (ax1, ax2, ax3) = plt.subplots(3, sharey =True) ax1.scatter(ddf.N_P, ddf.D_NDVI, color = 'darkcyan') ax1.plot(ddf.N_P, line1, color = 'k') ax1.set(xlabel = r'$(NDVI_{max} - NDVI_{t-1})*P_{t-1}$', ylabel = '') ax1.text(16, -0.04, r'y = 0.005x + 0.02', fontsize=8) ax1.text(16, -0.05, r'$r^2$ = 0.528', fontsize=8) ax2.scatter(ddf.N_LST, ddf.D_NDVI, color = 'forestgreen') ax2.set(xlabel = r'$(NDVI_{t-5} - NDVI_{min})*LST_{t-5}$', ylabel = '') ax2.plot(ddf.N_LST, line2, color = 'k') ax2.text(9, 0.07, r'y = -0.005x + 0.038', fontsize=8) ax2.text(9, 0.06, r'$r^2$ = 0.386', fontsize=8) ax3.scatter(ddf.N_ET, ddf.D_NDVI, color = 'cornflowerblue') ax3.set(xlabel = r'$(NDVI_{max} - NDVI_{t-5})*ET_{t-5}$', ylabel = '') ax3.plot(ddf.N_ET, line3, color = 'k') ax3.text(15, 0.07, r'y = -0.003x + 0.031', fontsize=8) ax3.text(15, 0.06, r'$r^2$ = 0.414', fontsize=8) fig.text(0.06, 0.5, r'$\Delta NDVI$', ha='center', va='center', rotation='vertical') #common ylabel # ============================================================================= # # sns.set(style="ticks", color_codes=True) # # fig = plt.figure() # sns.regplot(x=ddf.N_P, y=ddf.D_NDVI) # # g = sns.PairGrid(ddf, y_vars=["D_NDVI"], x_vars=["N_P", "N_LST", "N_ET"], height=4) # g.map(sns.regplot, color=".3") # # replacements = {'D_NDVI': r'$\Delta NDVI$', 'N_P': '(maxNDVI - NDVIt-1)*Pt-1', # 'N_LST': '(NDVIt-5 - minNDVI)*LSTt-5', 'N_ET': '(NDVIt-5 - minNDVI)*N_ETt-5'} # # for i in range(4): # for j in range(4): # xlabel = g.axes[i][j].get_xlabel() # ylabel = g.axes[i][j].get_ylabel() # if xlabel in replacements.keys(): # g.axes[i][j].set_xlabel(replacements[xlabel]) # if ylabel in replacements.keys(): # g.axes[i][j].set_ylabel(replacements[ylabel]) # # ============================================================================= # ============================================================================= # # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_P) # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST) # r, p = stats.pearsonr(ddf.D_NDVI, ddf.N_LST) # # ============================================================================= #%% X = ddf[['N_P']] Y = ddf['D_NDVI'] X = sm.add_constant(X) est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() original = Y + arsi.NDVI predicted = Y_pred + arsi.NDVI[:-2] result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186 X = ddf[['N_P', 'N_LST', 'N_ET']] Y = ddf['D_NDVI'] X = sm.add_constant(X) est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() original = Y + arsi.NDVI predicted = Y_pred + arsi.NDVI[:-6] result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] rms1 = sqrt(mean_squared_error(result.Original[:-6], result.Predicted[:-6])) #0.0186 fig = plt.figure() plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original') plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected') plt.legend(loc='upper right') plt.title('Arsi NDVI') X = ((max(arsi.NDVI) - arsi.NDVI)* arsi.P) #for multivariate regression ddf[['N_P', 'N_LST']] Y = arsi.NDVI.diff().shift(-1) X = sm.add_constant(X) #only use when doing first run OLS then remove when fitting prediction est = sm.OLS(Y,X, missing = 'drop').fit() est.summary() Y_pred = est.predict() Y_pred2 = 0.0047*X - 0.0201 #take delta predictions and convert to forecast original = Y + arsi.NDVI original = np.append(result.original, np.nan) predicted = Y_pred2 + arsi.NDVI result = pd.concat([original, predicted], axis=1) result.columns= ['Original','Predicted'] fig = plt.figure() plt.plot(arsi.DT, result.Original, 'darkblue', label = 'Original') plt.plot(arsi.DT, result.Predicted, 'dodgerblue', label = 'Projected') plt.legend(loc='upper right') plt.title('Arsi NDVI') rms1 = sqrt(mean_squared_error(result.Original[:-2], result.Predicted[:-2])) #0.0186 #create stargazer model table stargazer = Stargazer([est]) HTML(stargazer.render_html()) model = sm.OLS(ddf.D_NDVI, sm.add_constant(ddf.N_P)).fit() #predict values of Y Y_pred = model.predict() #summary table model.summary()
def table8_9_ext5(df, name, GDP): df_8_9A = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'voice', 'PolStab', 'GovEffec', 'RegQual', 'ConCorr', 'RulLaw' ]].dropna(axis=0) df_8_9B = df_8_9A[[ f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'voice', 'PolStab', 'GovEffec', 'RegQual', 'ConCorr', 'RulLaw' ]] if GDP == 'democ': df_8_9C = df_8_9A[df_8_9A.democ >= 1] elif GDP == 'GDP': df_8_9C = df_8_9A[df_8_9A.lnGDP_pc >= 7] exoA = sm.add_constant(df_8_9A[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exoB = sm.add_constant(df_8_9B[[f'{name}_C2', f'{name}_I']]) exoC = sm.add_constant(df_8_9C[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) insA = sm.add_constant(df_8_9A[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) insB = sm.add_constant( df_8_9B[[f'{name}_instrument_C2_thresh', f'{name}_I']]) insC = sm.add_constant(df_8_9C[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) df_8_9s = [df_8_9A, df_8_9B, df_8_9C] exos = [exoA, exoB, exoC] inss = [insA, insB, insC] y = [[f'y{idx}A', f'y{idx}B', f'y{idx}C'] for idx in range(1, 7)] est = [[f'est{idx}A', f'est{idx}B', f'est{idx}C'] for idx in range(1, 7)] star = ['starA', 'starB', 'starC'] for idx, i in enumerate(['A', 'B', 'C']): y[0][idx] = df_8_9s[idx]['voice'] y[1][idx] = df_8_9s[idx]['PolStab'] y[2][idx] = df_8_9s[idx]['GovEffec'] y[3][idx] = df_8_9s[idx]['RegQual'] y[4][idx] = df_8_9s[idx]['RulLaw'] y[5][idx] = df_8_9s[idx]['ConCorr'] est[0][idx] = IV2SLS(y[0][idx], exos[idx], inss[idx]).fit() est[1][idx] = IV2SLS(y[1][idx], exos[idx], inss[idx]).fit() est[2][idx] = IV2SLS(y[2][idx], exos[idx], inss[idx]).fit() est[3][idx] = IV2SLS(y[3][idx], exos[idx], inss[idx]).fit() est[4][idx] = IV2SLS(y[4][idx], exos[idx], inss[idx]).fit() est[5][idx] = IV2SLS(y[5][idx], exos[idx], inss[idx]).fit() star[idx] = Stargazer([ est[0][idx], est[1][idx], est[2][idx], est[3][idx], est[4][idx], est[5][idx] ]) for i in range(3): star[i].covariate_order([f'{name}_C2', f'{name}_I']) star[i].rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) star[i].show_model_numbers(False) star[i].custom_columns([ 'Voice', 'Political stability', 'Govern-t effectiv.', 'Regul. quality', 'Rule of law', 'Control of corr' ], [1, 1, 1, 1, 1, 1]) if GDP == 'democ': star[0].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes']) star[0].add_line('Sample', ['Full', 'Full', 'Full', 'Full', 'Full', 'Full']) star[1].add_line('Controls', ['No', 'No', 'No', 'No', 'No', 'No']) star[1].add_line('Sample', ['Full', 'Full', 'Full', 'Full', 'Full', 'Full']) star[2].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes']) star[2].add_line( 'Sample', ['Democ', 'Democ', 'Democ', 'Democ', 'Democ', 'Democ']) star[0].title('Panel A. Baseline : All controls and full sample') star[1].title('Panel B. No controls and full sample') star[2].title('Panel C. All controls; sample excludes dictatorship') return [star[0], star[1], star[2]] if GDP == 'GDP': if name == 'ethnicity': star[2].title( 'Panal A. Ethnicity: All controls; sample excludes poorest countries' ) elif name == 'language': star[2].title( 'Panel B. Language: All controls; sample excludes poorest countries' ) return star[2]
def df_table12(df, name): df_table12 = df[[ f'{name}_C2', f'{name}_instrument_C2_thresh', f'{name}_I', 'trust', 'democ', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'mtnall' ]].dropna(axis=0) df_demo = df_table12[df_table12.democ > 1] dep1 = df_table12['trust'] dep2 = df_demo['trust'] exo1 = sm.add_constant(df_table12[f'{name}_C2']) exo2 = sm.add_constant(df_table12[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) exo3 = sm.add_constant(df_demo[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins1 = sm.add_constant(df_table12[f'{name}_instrument_C2_thresh']) ins2 = sm.add_constant(df_table12[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) ins3 = sm.add_constant(df_demo[[ f'{name}_instrument_C2_thresh', f'{name}_I', 'lnpopulation', 'lnArea', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) reg1 = sm.OLS(dep1, exo1).fit(cov_type='HC1') reg2 = sm.OLS(dep1, exo2).fit(cov_type='HC1') reg3 = sm.OLS(dep2, exo3).fit(cov_type='HC1') reg4 = IV2SLS(dep1, exo1, ins1).fit() reg5 = IV2SLS(dep1, exo2, ins2).fit() reg6 = IV2SLS(dep2, exo3, ins3).fit() stargazer = Stargazer([reg1, reg2, reg3, reg4, reg5, reg6]) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.custom_columns(['OLS', 'OLS', 'OLS', '2SLS', '2SLS', '2SLS'], [1, 1, 1, 1, 1, 1]) stargazer.add_line('Controls', ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes']) stargazer.add_line('Sample', ['Full', 'Full', 'Democ', 'Full', 'Full', 'Democ']) if name == 'ethnicity': stargazer.title('Panel A. Ethnicity') return stargazer else: stargazer.title('Panel B. Language') return stargazer
model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit() print(model3.summary2()) #modelo 4 model4 = ols('cnt ~ temp_celsius + windspeed_kh + hum + workingday', data=wbr).fit() print(model4.summary2()) # coef intercept numéro de bicis que vendo cuando todas las otras variables son 0) # coef temp_celsius por cada incremento de 1 temp_celius aumentan mis ventas 161. # Coef working day en los no working day 4009 pero si quito los otros vendo 125 mas. #!pip install stargazer from stargazer.stargazer import Stargazer #genera codigo HTML Stargazer([model1, model2, model3, model4]).render_html() # Cuando hay una relacion que sube y baja hay que calcular el ^2 y restarlo . # cuando una variable nominal tiene mas de 2 categorias tenemos que hacer wbr["S1"] = 0 wbr["S2"] = 0 wbr["S3"] = 0 wbr["S4"] = 0 wbr.loc[(wbr['season'] == 1), "S1"] = 1 wbr.loc[(wbr['season'] == 2), "S2"] = 1 wbr.loc[(wbr['season'] == 3), "S3"] = 1 wbr.loc[(wbr['season'] == 4), "S4"] = 1 #Cuando hacemos dummys siempre tenemos que dejar una variable fuera que actua como referencia de las demas(la mas frecuente)
os.getcwd() os.chdir( "/Users/manunavjeevan/Desktop/UCLA/Second Year/Winter 2020/IO/Problem Set 1" ) data = pd.read_csv('dataCleaned.csv') data.head() data #Part 1: Logit ## Want to run a regression of logged share differences against ## price and promotion y = data['shareDiff'] x = data[['price', 'prom']] #x = sm.add_constant(x) model1 = sm.OLS(y, x).fit() print(model1.summary()) print(Stargazer([model1]).render_latex()) summary_col([model1]).as_latex() ## price, promotion, and a dummy for brand brandDummies = pd.get_dummies(data['brand'], prefix='brand') x = data[['price', 'prom']].join(brandDummies) #x = sm.add_constant(x) model2 = sm.OLS(y, x).fit() print(model2.summary()) print(Stargazer([model2]).render_latex()) print(summary_col([model2]).as_latex()) ## Price, promotion and store*brand data['storeBrand'] = data.store + data['brand'] / 100 storeBrandDummies = pd.get_dummies(data['storeBrand']) storeBrandDummies
x = sm.add_constant(x1) x_withfemale = sm.add_constant(x2) model1 = sm.OLS(y, x).fit() model2 = sm.OLS(y, x_withfemale).fit() model1.summary() model2.summary() # ============================================================================= # STARGAZER MODEL OUTPUTS # ============================================================================= from stargazer.stargazer import Stargazer stargazer = Stargazer([model1, model2]) stargazer.custom_columns(['Base Model', 'Spesified Model'], [1, 1]) stargazer.significant_digits(2) stargazer.covariate_order([ 'const', 'propwomen', 'oppospower', 'gdpcap', 'sepowerdist', 'youthunemp' ]) stargazer.rename_covariates({ 'const': 'Constant', 'oppospower': 'Opposition Power', 'gdpcap': 'GDP($)', 'sepowerdist': 'Class Political Power', 'youthunemp': 'Unemployed Youth %', 'propwomen': 'Female Property Rights' })
tbl.show_f_statistic = False tbl.show_notes = True return tbl ### fname = "src/tablespecs/table_01.json" with open(fname) as json_file: json_dict = json.load(json_file) models = json_dict['MODELS'] models = [json_dict["MODEL_PATH"] + i for i in models] reg = [] for iModel in models: reg.append(sm.load(iModel)) stargazer = Stargazer(reg) stargazer2 = publish_table(stargazer, json_dict) print('---') print(json_dict["RESTRICTIONS"]) print('---') print(stargazer2.render_latex(only_tabular=True))
""" model2 = ols('cnt ~ windspeed_kh', data=wbr).fit() model2.summary2() """ Siguiente modelo basado en la temperatura y el windspeed_kh Como podemos ver en el modelo, al incluir nuevas variables cambia la influencia de las variables. """ model3 = ols('cnt ~ temp_celsius + windspeed_kh', data=wbr).fit() model3.summary2() wbr.hum.hist() """ Siguiende modelo basado con la variable humedad """ model4 = ols('cnt ~ hum', data=wbr).fit() model4.summary2() """ Siguiente modelo basado en la temperatura, el windspeed_kh y hum """ model5 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit() model5.summary2() """ stargazer ayuda a representar todos los modelos. """ #!pip install stargazer from stargazer.stargazer import Stargazer stargazer = Stargazer([model1, model2, model3, model4, model5]) stargazer.render_html()
# Independent variables = temp_celsius + windspeed_kh model2 = ols("cnt ~ temp_celsius + windspeed_kh", data=wbr).fit() model2.summary2() # Independent variable = hum wbr.hum.hist() model1_h = ols("cnt ~ hum", data=wbr).fit() model1_h.summary2() # Independent variables = temp_celsius + windspeed_kh + hum model3 = ols("cnt ~ temp_celsius + windspeed_kh + hum", data=wbr).fit() model3.summary2() # Compare all models stargazer = Stargazer([model1_t, model2, model3]) stargazer stargazer.title("Table 1. A model of bicycle demand in Washington D.C.") stargazer ##################### # REGRESSION WITH DUMMIES # Independent variable = workingday model1_wd = ols("cnt ~ workingday", data=wbr).fit() model1_wd.summary2() # Independent variables = temp_celsius + windspeed_kh + hum + workingday model4 = ols("cnt ~ temp_celsius + windspeed_kh + hum + workingday", data=wbr).fit() model4.summary2()
def get_table_4and7(dependent_var, data): ''' argument:dependent variable, dataset return:either table4 or table7 depending on the input dataset ''' model_1 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["state"])).fit() model_2 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["state", "bk", "kfc", "roys", "co_owned"]])).fit() model_3 = sm_api.OLS(data[dependent_var], sm_api.add_constant(data["GAP"])).fit() model_4 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[["GAP", "bk", "kfc", "roys", "co_owned"]])).fit() model_5 = sm_api.OLS( data[dependent_var], sm_api.add_constant(data[[ "GAP", "bk", "kfc", "roys", "co_owned", "southj", "centralj", "pa1", "pa2" ]])).fit() Table = Stargazer([model_1, model_2, model_3, model_4, model_5]) Table.rename_covariates({ 'state': 'New Jersey dummy', 'GAP': 'Initial wage GAP' }) Table.add_line('Controls for chain and ownership', ['No', 'Yes', 'No', 'Yes', 'Yes']) Table.add_line('Controls for region', ['No', 'No', 'No', 'No', 'Yes']) F2 = model_2.f_test( '(state = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F4 = model_4.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0)' ).pvalue.round(3) F5 = model_5.f_test( '(GAP = 0), (bk = 0), (kfc = 0), (roys =0),(co_owned= 0),(const=0), (southj=0),(centralj=0),(pa1=0),(pa2=0)' ).pvalue.round(3) if dependent_var == "change_in_FTE": Table.add_line('Probability value for controls', ['-', F2, '-', F4, F5]) Table.title("Models for " + dependent_var) Table.covariate_order(['state', 'GAP']) print("The mean and standard deviation of the dependent variable are", data[dependent_var].mean(), "and", data[dependent_var].std(), ",respectively.") return Table
def table4_5(df, name): df_table4A = df[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'voice', 'PolStab', 'GovEffec', 'RegQual', 'ConCorr', 'RulLaw' ]].dropna(axis=0) df_table4B = df_table4A[[ f'{name}_C2', f'{name}_I', 'voice', 'PolStab', 'GovEffec', 'RegQual', 'ConCorr', 'RulLaw' ]] df_table4C = df_table4A[df_table4A.democ > 1] xA = sm.add_constant(df_table4A[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) xB = sm.add_constant(df_table4B[[f'{name}_C2', f'{name}_I']]) xC = sm.add_constant(df_table4C[[ f'{name}_C2', f'{name}_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) df_table4s = [df_table4A, df_table4B, df_table4C] xs = [xA, xB, xC] y = [[f'y{idx}A', f'y{idx}B', f'y{idx}C'] for idx in range(1, 7)] est = [[f'est{idx}A', f'est{idx}B', f'est{idx}C'] for idx in range(1, 7)] star = ['starA', 'starB', 'starC'] for idx, i in enumerate(['A', 'B', 'C']): y[0][idx] = df_table4s[idx]['voice'] y[1][idx] = df_table4s[idx]['PolStab'] y[2][idx] = df_table4s[idx]['GovEffec'] y[3][idx] = df_table4s[idx]['RegQual'] y[4][idx] = df_table4s[idx]['RulLaw'] y[5][idx] = df_table4s[idx]['ConCorr'] est[0][idx] = sm.OLS(y[0][idx], xs[idx]).fit(cov_type='HC1') est[1][idx] = sm.OLS(y[1][idx], xs[idx]).fit(cov_type='HC1') est[2][idx] = sm.OLS(y[2][idx], xs[idx]).fit(cov_type='HC1') est[3][idx] = sm.OLS(y[3][idx], xs[idx]).fit(cov_type='HC1') est[4][idx] = sm.OLS(y[4][idx], xs[idx]).fit(cov_type='HC1') est[5][idx] = sm.OLS(y[5][idx], xs[idx]).fit(cov_type='HC1') star[idx] = Stargazer([ est[0][idx], est[1][idx], est[2][idx], est[3][idx], est[4][idx], est[5][idx] ]) for i in range(3): star[i].covariate_order([f'{name}_C2', f'{name}_I']) star[i].rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) star[i].show_model_numbers(False) star[i].custom_columns([ 'Voice', 'Political stability', 'Govern-t effectiv.', 'Regul. quality', 'Rule of law', 'Control of corr' ], [1, 1, 1, 1, 1, 1]) star[0].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes']) star[0].add_line('Sample', ['Full', 'Full', 'Full', 'Full', 'Full', 'Full']) star[1].add_line('Controls', ['No', 'No', 'No', 'No', 'No', 'No']) star[1].add_line('Sample', ['Full', 'Full', 'Full', 'Full', 'Full', 'Full']) star[2].add_line('Controls', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes']) star[2].add_line('Sample', ['Democ', 'Democ', 'Democ', 'Democ', 'Democ', 'Democ']) star[0].title('Panel A. Baseline : All controls and full sample') star[1].title('Panel B. No controls and full sample') star[2].title('Panel C. All controls; sample excludes dictatorship') return [star[0], star[1], star[2]]
def Appendix_Table_1(df): df_good = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad_h"] == 0]['beliefadjustment_normalized'], "dummytreat_direct1month": df[df["dummynews_goodbad_h"] == 0]['dummytreat_direct1month'], "rank": df[df["dummynews_goodbad_h"] == 0]['rank'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad_h"] == 0]['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ dummytreat_direct1month", data=df_good) reg_1 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm", data=df_good) reg_2 = model_ols.fit(cov_type='HC1') df_bad = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad_h"] == 1]['beliefadjustment_normalized'], "dummytreat_direct1month": df[df["dummynews_goodbad_h"] == 1]['dummytreat_direct1month'], "rank": df[df["dummynews_goodbad_h"] == 1]['rank'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad_h"] == 1]['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ dummytreat_direct1month", data=df_bad) reg_3 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm", data=df_bad) reg_4 = model_ols.fit(cov_type='HC1') #Generating interaction term df["interact_direct1month"] = df["dummytreat_direct1month"] * df[ "dummynews_goodbad"] model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad_h + interact_direct1month", data=df) reg_5 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad_h + rank + interact_direct1month + beliefadjustment_bayes_norm", data=df) reg_6 = model_ols.fit(cov_type='HC1') Appendix_Table_1 = Stargazer([reg_1, reg_2, reg_3, reg_4, reg_5, reg_6]) Appendix_Table_1.title( 'Appendix Table 1 - Belief Adjustment: Direct versus One Month Later') Appendix_Table_1.dependent_variable_name('Normalized Belief Adjustment - ') Appendix_Table_1.custom_columns([ 'Positive Information', 'Negative Information', 'Difference-in-difference' ], [2, 2, 2]) return Appendix_Table_1
def table6(df, alternative=True): df_6E = df[[ 'ethnicity_C2', 'ethnicity_I', 'ethnicity_C', 'ethnicity_instrument_C_thresh', 'ethnicity_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6L = df[[ 'language_C2', 'language_I', 'language_C', 'language_instrument_C_thresh', 'language_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6R = df[[ 'religion_C2', 'religion_I', 'religion_C', 'religion_instrument_C_thresh', 'religion_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'RulLaw', 'country' ]].dropna(axis=0) df_6E_demo = df_6E[df_6E.democ >= 1] df_6L_demo = df_6L[df_6L.democ >= 1] df_6R_demo = df_6R[df_6R.democ >= 1] x1 = sm.add_constant(df_6E[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2 = sm.add_constant(df_6L[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3 = sm.add_constant(df_6R[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4 = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5 = sm.add_constant(df_6L_demo[[ 'language_instrument_C2_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6 = sm.add_constant(df_6R_demo[[ 'religion_instrument_C2_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1 = df_6E['ethnicity_C2'] y2 = df_6L['language_C2'] y3 = df_6R['religion_C2'] y4 = df_6E_demo['ethnicity_C2'] y5 = df_6L_demo['language_C2'] y6 = df_6R_demo['religion_C2'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = sm.OLS(y2, x2).fit(cov_type='HC1') est3 = sm.OLS(y3, x3).fit(cov_type='HC1') est4 = sm.OLS(y4, x4).fit(cov_type='HC1') est5 = sm.OLS(y5, x5).fit(cov_type='HC1') est6 = sm.OLS(y6, x6).fit(cov_type='HC1') x1a = sm.add_constant(df_6E[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x2a = sm.add_constant(df_6L[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x3a = sm.add_constant(df_6R[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) x4a = sm.add_constant(df_6E_demo[[ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x5a = sm.add_constant(df_6L_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]]) x6a = sm.add_constant(df_6R_demo[[ 'religion_instrument_C_thresh', 'religion_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'democ', 'mtnall' ]]) y1a = df_6E['ethnicity_C'] y2a = df_6L['language_C'] y3a = df_6R['religion_C'] y4a = df_6E_demo['ethnicity_C'] y5a = df_6L_demo['language_C'] y6a = df_6R_demo['religion_C'] est1a = sm.OLS(y1a, x1a).fit(cov_type='HC1') est2a = sm.OLS(y2a, x2a).fit(cov_type='HC1') est3a = sm.OLS(y3a, x3a).fit(cov_type='HC1') est4a = sm.OLS(y4a, x4a).fit(cov_type='HC1') est5a = sm.OLS(y5a, x5a).fit(cov_type='HC1') est6a = sm.OLS(y6a, x6a).fit(cov_type='HC1') df_6Lb = df_6L.set_index('country') df_6Lb_demo = df_6L_demo.set_index('country') x2b = sm.add_constant(df_6Lb[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) x5b = sm.add_constant(df_6Lb_demo[[ 'language_instrument_C_thresh', 'language_I', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ]].drop(index='usa')) y2b = df_6Lb['language_C'].drop(index='usa') y5b = df_6Lb_demo['language_C'].drop(index='usa') est2b = sm.OLS(y2b, x2b).fit(cov_type='HC1') est5b = sm.OLS(y5b, x5b).fit(cov_type='HC1') stargazer = Stargazer([est1, est2, est3, est4, est5, est6]) stargazer_a = Stargazer([est1a, est2a, est3a, est4a, est5a, est6a]) stargazer_b = Stargazer([est2b, est5b]) stargazer.covariate_order([ 'ethnicity_instrument_C2_thresh', 'ethnicity_I', 'language_instrument_C2_thresh', 'language_I', 'religion_instrument_C2_thresh', 'religion_I' ]) stargazer.rename_covariates({ 'ethnicity_instrument_C2_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C2_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C2_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer.custom_columns([ 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$', 'E$\hat{S}$', 'L$\hat{S}$', 'R$\hat{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer.show_model_numbers(False) stargazer.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer.title('Panel A. Segregation index $\hat{S}$') stargazer_a.covariate_order([ 'ethnicity_instrument_C_thresh', 'ethnicity_I', 'language_instrument_C_thresh', 'language_I', 'religion_instrument_C_thresh', 'religion_I' ]) stargazer_a.rename_covariates({ 'ethnicity_instrument_C_thresh': 'Instrument E', 'ethnicity_I': '$F$ (ethnicity)', 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)', 'religion_instrument_C_thresh': 'Instrument R', 'religion_I': '$F$ (religion)' }) stargazer_a.custom_columns([ 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$', 'E$\\tilde{S}$', 'L$\\tilde{S}$', 'R$\\tilde{S}$' ], [1, 1, 1, 1, 1, 1]) stargazer_a.show_model_numbers(False) stargazer_a.add_line( 'Sample', ['Full', 'Full', 'Full', 'Democracy', 'Democracy', 'Democracy']) stargazer_a.title('Panel B. Segregation index $\\tilde{S}$') stargazer_b.covariate_order(['language_instrument_C_thresh', 'language_I']) stargazer_b.rename_covariates({ 'language_instrument_C_thresh': 'Instrument L', 'language_I': '$F$ (language)' }) stargazer_b.custom_columns(['L$\\tilde{S}$', 'L$\\tilde{S}$'], [1, 1]) stargazer_b.show_model_numbers(False) stargazer_b.add_line('Sample', ['Full', 'Democracy']) stargazer_b.title( 'Panel C. Segregation index $\\tilde{S}$ for language with sample excluding the US' ) return [stargazer, stargazer_a, stargazer_b]
def setUp(self): self.df = pd.DataFrame(list(zip(range(9), range(0, 18, 2))), columns =['a', 'b']) self.est1 = smf.ols('a ~ 0 + b', self.df).fit() self.est2 = smf.ols('a ~ 1 + b', self.df).fit() self.stargazer = Stargazer([self.est1, self.est2])
def table10_11(df, name, democ): full_x = [ f'{name}_I', f'{name}_C2', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] ins = [ f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall' ] df_10_11_1 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'icrg_qog' ]].dropna(axis=0) df_10_11_2 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'ef_regul', 'ef_corruption', 'ef_property_rights' ]].dropna(axis=0) df_10_11_3 = df[[ f'{name}_C2', f'{name}_I', f'{name}_instrument_C2_thresh', 'lnpopulation', 'lnGDP_pc', 'protestants', 'muslims', 'catholics', 'latitude', 'LOEnglish', 'LOGerman', 'LOSocialist', 'LOScandin', 'democ', 'mtnall', 'taxevas' ]].dropna(axis=0) if democ == 'democracy': df_10_11_1 = df_10_11_1[df_10_11_1.democ >= 1] df_10_11_2 = df_10_11_2[df_10_11_2.democ >= 1] df_10_11_3 = df_10_11_3[df_10_11_3.democ >= 1] x1 = sm.add_constant(df_10_11_1[full_x]) x2 = sm.add_constant(df_10_11_2[full_x]) x3 = sm.add_constant(df_10_11_3[full_x]) ins1 = sm.add_constant(df_10_11_1[ins]) ins2 = sm.add_constant(df_10_11_2[ins]) ins3 = sm.add_constant(df_10_11_3[ins]) else: x1 = sm.add_constant(df_10_11_1[[f'{name}_I', f'{name}_C2']]) x2 = sm.add_constant(df_10_11_2[[f'{name}_I', f'{name}_C2']]) x3 = sm.add_constant(df_10_11_3[[f'{name}_I', f'{name}_C2']]) ins1 = sm.add_constant( df_10_11_1[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins2 = sm.add_constant( df_10_11_2[[f'{name}_I', f'{name}_instrument_C2_thresh']]) ins3 = sm.add_constant( df_10_11_3[[f'{name}_I', f'{name}_instrument_C2_thresh']]) y1 = df_10_11_1['icrg_qog'] y2 = df_10_11_2['ef_corruption'] y3 = df_10_11_2['ef_property_rights'] y4 = df_10_11_2['ef_regul'] y5 = df_10_11_3['taxevas'] est1 = sm.OLS(y1, x1).fit(cov_type='HC1') est2 = IV2SLS(y1, x1, ins1).fit() est3 = sm.OLS(y2, x2).fit(cov_type='HC1') est4 = IV2SLS(y2, x2, ins2).fit() est5 = sm.OLS(y3, x2).fit(cov_type='HC1') est6 = IV2SLS(y3, x2, ins2).fit() est7 = sm.OLS(y4, x2).fit(cov_type='HC1') est8 = IV2SLS(y4, x2, ins2).fit() est9 = sm.OLS(y5, x3).fit(cov_type='HC1') est10 = IV2SLS(y5, x3, ins3).fit() stargazer = Stargazer( [est1, est2, est3, est4, est5, est6, est7, est8, est9, est10]) stargazer.custom_columns([ 'ICRG quality of gov', 'EF Corruption', 'EF Property rights', 'EF Regulation', 'Tax eva' ], [2, 2, 2, 2, 2]) stargazer.show_model_numbers(False) stargazer.covariate_order([f'{name}_C2', f'{name}_I']) stargazer.rename_covariates({ f'{name}_C2': 'Segregation $\hat{S}$ (' f'{name}' ')', f'{name}_I': 'Fractionalization $F$ (' f'{name}' ')' }) stargazer.add_line('Method', [ 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS', 'OLS', '2SLS' ]) if democ == 'democracy': stargazer.title('Panel B. Democracies sample, all controls') return stargazer else: stargazer.title('Panel A. Full sample, no additional controls') return stargazer
def Main_Table_1(df): df_good = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad"] == 0]['beliefadjustment_normalized'], "dummytreat_direct1month": df[df["dummynews_goodbad"] == 0]['dummytreat_direct1month'], "rank": df[df["dummynews_goodbad"] == 0]['rank'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad"] == 0]['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ dummytreat_direct1month", data=df_good) reg_1 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm", data=df_good) reg_2 = model_ols.fit(cov_type='HC1') df_bad = pd.DataFrame({ "beliefadjustment_normalized": df[df["dummynews_goodbad"] == 1]['beliefadjustment_normalized'], "dummytreat_direct1month": df[df["dummynews_goodbad"] == 1]['dummytreat_direct1month'], "rank": df[df["dummynews_goodbad"] == 1]['rank'], "beliefadjustment_bayes_norm": df[df["dummynews_goodbad"] == 1]['beliefadjustment_bayes_norm'] }) model_ols = smf.ols( formula="beliefadjustment_normalized ~ dummytreat_direct1month", data=df_bad) reg_3 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + rank + beliefadjustment_bayes_norm", data=df_bad) reg_4 = model_ols.fit(cov_type='HC1') #Generating interaction term df["interact_direct1month"] = df["dummytreat_direct1month"] * df[ "dummynews_goodbad"] model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month", data=df) reg_5 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + rank + interact_direct1month + beliefadjustment_bayes_norm", data=df) reg_6 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month + rankdummy1 + rankdummy2 + rankdummy3 + rankdummy4 + rankdummy5 + rankdummy6 + rankdummy7 + rankdummy8 + rankdummy9 + rankdummy1_interact + rankdummy2_interact + rankdummy3_interact + rankdummy4_interact + rankdummy5_interact + rankdummy6_interact + rankdummy7_interact + rankdummy8_interact + rankdummy9_interact", data=df) reg_7 = model_ols.fit(cov_type='HC1') model_ols = smf.ols( formula= "beliefadjustment_normalized ~ dummytreat_direct1month + dummynews_goodbad + interact_direct1month + beliefadjustment_bayes_norm + rankdummy1 + rankdummy2 + rankdummy3 + rankdummy4 + rankdummy5 + rankdummy6 + rankdummy7 + rankdummy8 + rankdummy9 + rankdummy1_interact + rankdummy2_interact + rankdummy3_interact + rankdummy4_interact + rankdummy5_interact + rankdummy6_interact + rankdummy7_interact + rankdummy8_interact + rankdummy9_interact", data=df) reg_8 = model_ols.fit(cov_type='HC1') Main_Table_1 = Stargazer( [reg_1, reg_2, reg_3, reg_4, reg_5, reg_6, reg_7, reg_8]) Main_Table_1.title( 'Table 1 - Belief Adjustment: Direct versus One Month Later') Main_Table_1.dependent_variable_name('Normalized Belief Adjustment - ') Main_Table_1.custom_columns([ 'Positive Information', 'Negative Information', 'Difference-in-difference', 'Difference-in-difference with rank fixed effects' ], [2, 2, 2, 2]) return Main_Table_1
def get_table3(df): ### regressions: rslt = smf.ols(formula="stdgrade ~ treat + pol1+ pol1t", data=df, weights=df["kwgt"]).fit(cov_type='cluster',cov_kwds={'groups': df["studentid"]}) rslt1 = rslt formula2 = "stdgrade ~ treat + treatmentvol + treatmentfor + volcourse + forcourse + pol1 + pol1t + pol1vol + pol1tvol + pol1for + pol1tfor" rslt = smf.ols(formula=formula2, data=df,weights=df["kwgt"] ).fit(cov_type='cluster',cov_kwds={'groups': df["studentid"]}) rslt2 = rslt ### Table stargazer: stargazer = Stargazer([rslt1,rslt2]) stargazer.custom_columns(["column 1","column 4" ], [1,1]) stargazer.title("Table 3 - Effects on standardized grades") stargazer.show_model_numbers(False) stargazer.significant_digits(2) stargazer.covariate_order([ "treat","treatmentvol","treatmentfor"]) stargazer.rename_covariates({"treat": "1st-year GPA is below 7", "treatmentvol":"Attendance is voluntary x treatment", "treatmentfor":"Absence is penalized x treatment"}) stargazer.show_degrees_of_freedom(False) stargazer.add_line('Fixed Effects', ['No', 'No']) return stargazer
model1a = ols('cnt ~ temp_celsius', data=wbr).fit() #Primero y, luego x aquí model1a.summary2() model1b = ols('cnt ~ windspeed_kh', data=wbr).fit() model1b.summary2( ) #Es significativa también y negativa, pero R2 es mucho menor: solo el 6% depende de la variabilidad en el viento ### model2 = ols('cnt ~ temp_celsius + windspeed_kh', data=wbr).fit() #Dos predictores ahora model2.summary2() ### wbr.hum.hist() #Describir primero SIEMPRE model1c = ols('cnt ~ hum', data=wbr).fit() model1c.summary2() model3 = ols('cnt ~ temp_celsius + windspeed_kh + hum', data=wbr).fit() model3.summary2() #Aumenta R2 respecto a model2 y cambian los coeficientes ### Para reportar #!pip install stargazer from stargazer.stargazer import Stargazer stargazer = Stargazer([model1a, model2, model3]) stargazer.render_html()
# 5. Estimating regression of the return on each strategy on FF 5 factors: reg_df = strategy_ret_df.copy() reg_df = pd.merge(reg_df, ff_df, left_index=True, right_index=True) strategy_name_list = list(strategy_ret_df.columns) results_list = [] for name in strategy_name_list: # to have the same name for all variables reg_df_tmp = reg_df.rename({name: "ret"}, axis=1) results_list.append( smf.ols(formula="ret ~ MKT + SMB + HML + CMA + RMW", data=reg_df_tmp * 12).fit()) # Outputting short regression results: stargazer = Stargazer([results_list[0], results_list[3], results_list[6]]) stargazer.custom_columns(['D 30', 'prob 20', 'prob 40'], [1, 1, 1]) stargazer.covariate_order(['Intercept', 'MKT', 'SMB', 'HML', 'RMW', 'CMA']) stargazer.show_degrees_of_freedom(False) f = open( "/Users/rsigalov/Dropbox/2019_Revision/Writing/Predictive Regressions/tables/disaster_sort_reg_on_ff.tex", "w") f.write(stargazer.render_latex()) f.close() # Doing extended regression table where I do regressions of strategy return on # (1) just the market, (2) FF 3 factors and (3) FF 5 factors. results_list = [] for name in ["D_30", "p_20_30"]: # to have the same name for all variables reg_df_tmp = reg_df.rename({name: "ret"}, axis=1)
def ols_regression_formatted(data, specifications, as_latex=False, covariates_names=None, covariates_order=None): """ Creates formatted tables for different dependent variables and specifications Input: data (df): Dataframe containing all necessary variables for OLS regression specifications (dictionary): dependent variables as keys and list of specifications as values as_latex (bool): specify whether Output as table or Latex code covariate_names (dict): dictionary with covariate names as in "data" as keys and new covariate names as values Output: list_of_tables (list of stargazer tables): list of formatted tables """ # Create dictionary which connects dependent variables with formatted tables dict_regression_tables = {} # Generate regressions for depvar in specifications.keys(): regression_list = [] specification_list = specifications[depvar] list_all_covariates = [] for specification in specification_list: estimation_equation = depvar + " ~ " + specification regression = smf.ols(data=data, formula=estimation_equation).fit() regression_list.append(regression) # Create set of all variables for this dependent variable list_all_covariates = list( set(list_all_covariates + regression.params.index.values.tolist())) # Format table with stargazer formatted_table = Stargazer(regression_list) # No dimension of freedoms and blank dependent variable formatted_table.show_degrees_of_freedom(False) formatted_table.dependent_variable_name("") # Optional: Change order of covariates if covariates_order is not None: covariates_order_depvar = list( OrderedSet(covariates_order).intersection(list_all_covariates)) list_remaining_covariates = list( OrderedSet(list_all_covariates).difference( OrderedSet(covariates_order_depvar))) covariates_sorted = list( OrderedSet(covariates_order_depvar).union( list_remaining_covariates)) covariates_sorted.remove("Intercept") covariates_sorted = covariates_sorted + ["Intercept"] formatted_table.covariate_order(covariates_sorted) # Optional: Change name of covariates if covariates_names is not None: formatted_table.rename_covariates(covariates_names) # Add table or latex code to dictionary if as_latex is True: dict_regression_tables[depvar] = formatted_table.render_latex() # Delete tabular environment around it dict_regression_tables[depvar] = dict_regression_tables[ depvar].replace("\\begin{table}[!htbp] \\centering\n", "") dict_regression_tables[depvar] = dict_regression_tables[ depvar].replace("\\end{table}", "") else: dict_regression_tables[depvar] = formatted_table return dict_regression_tables