def random_effects(panel_data, formula, weights=None, cov="unadjusted"): """ Fits a standard Random Effects model with the corresponding covariance matrix. It can be estimated WITH and WITHOUT a constant. It is preferred when the unobserved effects aren't correlated with the error term and, therefore, CAN estimate constant terms. Remember to include an intercept in the formula ('y ~ 1 + x1 + ...') and to assign it to an object! :param panel_data : dataframe (which must be in a panel structure) :param formula : patsy formula :param weights : N x 1 Series or vector containing weights to be used in estimation; defaults to None Use is recommended when analyzing survey data, passing on the weight available in the survey :param cov : str unadjusted: common standard errors robust: robust standard errors kernel: robust to heteroskedacity AND serial autocorrelation clustered: clustered standard errors by the entity column :return : linearmodels model instance """ ## Creating model instance if weights is None: mod = RandomEffects.from_formula(formula=formula, data=panel_data) else: mod = RandomEffects.from_formula(formula=formula, data=panel_data, weights=weights) ## Fitting with desired covariance matrix mod = mod.fit(cov_type='clustered', cluster_entity=True) if cov == 'clustered' else mod.fit(cov_type=cov) print(mod.summary) return mod
def hausman_fe_re(panel_data, inef_formula, weights=None, cov="unadjusted", level=0.05): """ Executes a Hausman test, which H0: there is no correlation between unobserved effects and the independent variables It is not necessary to assign the function to an object! But remember to include an intercept in the formulas. :param panel_data : dataframe (which must be in a panel structure) :param inef_formula : patsy formula for the inefficient model under H0 (fixed effects) :param weights : N x 1 Series or vector containing weights to be used in estimation; defaults to None Use is recommended when analyzing survey data, passing on the weight available in the survey :param cov : str unadjusted: common standard errors robust: robust standard errors kernel: robust to heteroskedacity AND serial autocorrelation :param level : significance level for the test. Defaults to 5%. """ ## Random Effects if weights is None: random = RandomEffects.from_formula(formula=inef_formula, data=panel_data).fit(cov_type=cov) else: random = RandomEffects.from_formula(formula=inef_formula, data=panel_data, weights=weights).fit(cov_type=cov) ## Fixed Effects formula_fe = inef_formula + ' + EntityEffects' if weights is None: fixed = PanelOLS.from_formula(formula=formula_fe, data=panel_data, drop_absorbed=True).fit(cov_type=cov) else: fixed = PanelOLS.from_formula(formula=formula_fe, data=panel_data, drop_absorbed=True, weights=weights).fit(cov_type=cov) ## Computing the Hausman statistic # Difference between asymptotic variances var_assin = fixed.cov - random.cov # Difference between parameters d = fixed.params - random.params # Calculating H (statistic) H = d.dot(np.linalg.inv(var_assin)).dot(d) # Degrees of freedom freedom = random.params.size - 1 # Calculating p-value using chi2 survival function (sf, 1 - cumulative distribution function) p = stats.chi2(freedom).sf(H) if p < level: print(f"The value of H is {round(H, 6)} with {freedom} degrees of freedom in the chi-squared distribution.") print(f"The p-value of the test is {round(p, 6)} and, therefore, H0 is REJECTED and fixed effects is preferred") else: print(f"The value of H is {round(H, 6)} with {freedom} degrees of freedom in the chi-squared distribution.") print(f"The p-value of the test is {round(p, 6)} and H0 is NOT REJECTED and random effects is preferred.")
def Reg_Painel_Efeitos_Aleatórios(x, y, constante="S", cov="normal"): ''' Função que calcula uma regressão de efeitos fixos, sendo, por default, computada com um intercepto e com erros padrões robustos. **IMPORTANTE: para o painel estar arrumado, os dados devem estar multi-indexados por indíviduo e por tempo, nesta ordem. Caso contrário, transformar o dataframe usando a função 'Arrumar Painel' x: lista ou array com os valores das variáveis independentes; y: lista ou array com os valores da variável dependente; constante: "S" para regressão com intercepto e qualquer outro valor para sem intercepto. Caso em branco, a regressão é computada com intercepto; robusta: "N" para regressão com erros-padrão tradicionais e qualquer outro valor para erros-padrões robustos. Caso em branco, a regressão é computada com erros-padrão robustos. ''' global df, Resultado # formando o vetor de variáveis independentes if constante == "S": X = sm.add_constant(x) else: X = x #Criando o Modelo Modelo = RandomEffects(y, X) if cov == "robust": Resultado = Modelo.fit(cov_type='robust') elif cov == 'kernel': ## correlação robusta à heteroscedasticidade e autocorrelação serial Resultado = Modelo.fit(cov_type='kernel') elif cov == 'clustered' or cov == 'cluster': Resultado = Modelo.fit(cov_type='clustered', cluster_entity=True) else: Resultado = Modelo.fit() print(Resultado)
import pandas as pd from linearmodels.panel import PanelOLS import statsmodels.api as sm from linearmodels.panel import PooledOLS from linearmodels import RandomEffects import sys import os DATA_FILE = sys.argv[1] OUTPUT_FILE = sys.argv[2] change_df=pd.read_csv(DATA_FILE) base = os.path.basename(OUTPUT_FILE) incomegroup = base.split(".")[0].split("_")[-1] select_df=change_df[change_df.IncomeGroup==incomegroup] #filter out unbalanced data points num_period=len(select_df.period.unique()) select_df['size']=select_df.groupby('Code')['Code'].transform('size') select_df=select_df[select_df['size']==num_period] select_df['Income_t0_log']=np.log10(select_df['Income_t0']) select_df=select_df.set_index(['Code','date']) exog_vars = ['Income_t0_log','nm_change','shm_change','ne_change','sum_adv_t0'] exog = sm.add_constant(select_df[exog_vars]) mod = RandomEffects(select_df.growth_rate, exog) fe_res = mod.fit() with open(OUTPUT_FILE,'w') as f: f.write(fe_res.summary.as_text())
import numpy as np import pandas as pd from linearmodels.panel import PanelOLS import statsmodels.api as sm from linearmodels.panel import PooledOLS from linearmodels import RandomEffects import sys DATA_FILE = sys.argv[1] OUTPUT_FILE = sys.argv[2] change_df = pd.read_csv(DATA_FILE) #filter out unbalanced data points num_period = len(change_df.period.unique()) change_df['size'] = change_df.groupby('Code')['Code'].transform('size') change_df = change_df[change_df['size'] == num_period] change_df['Income_t0_log'] = np.log10(change_df['Income_t0']) change_df = change_df.set_index(['Code', 'date']) exog_vars = [ 'Income_t0_log', 'nm_change', 'shm_change', 'ne_change', 'sum_adv_t0' ] exog = sm.add_constant(change_df[exog_vars]) mod = RandomEffects(change_df.growth_rate, exog) fe_res = mod.fit() with open(OUTPUT_FILE, 'w') as f: f.write(fe_res.summary.as_text())
# # PANEL # from linearmodels import BetweenOLS, RandomEffects, PanelOLS # WITHIN w = data.n BetweenModel = BetweenOLS.from_formula('fcs ~ rev_percap + month_Decembre', data=data, weights=w) BetweenModel.fit(cov_type='robust', reweight=True) # RANDOM EFFECTS RandomEffectsModel = RandomEffects.from_formula( 'fcs ~ rev_percap + year + month_Decembre', data=data, weights=w) REModFit = RandomEffectsModel.fit(cov_type='robust') REModFit REModFit.variance_decomposition REModFit.theta # BASIC PANEL PanelModel = PanelOLS.from_formula( 'fcs ~ 1 + rev_percap + month_Decembre + EntityEffects', data=data, weights=w) PanelModel.fit(cov_type='robust') # INTERPRETATION : TO BE FULLY CHECKED # une augmentation de 1000 du revenu par rapport à sa moyenne sur a période # augmente de X le score fcs par rapport à sa moyenne sur a période
df = df[df['DATE'] < '2014-02-01'] df = df.set_index(['DATE', 'MARKET']) print(df.columns) # Hausman Test to decided FE or RE # Fixed Effects # Random Effects exog_vars = [ 'PPORTUGAL', 'Q', 'QPOR', 'PRICE_OIL', 'PRICE_GAS', 'RISK_PREMIUM', 'TME', 'TMAX', 'TMIN', 'PP', 'WORKDAY', 'SUMMER', 'WINTER', 'NULL_PRICE', 'LITINIT' ] exog = sm.add_constant(df[exog_vars]) mod = RandomEffects(df['P'], exog) re_res = mod.fit() print(re_res) # Fixed Effect exog_vars = [ 'PPORTUGAL', 'Q', 'QPOR', 'PRICE_OIL', 'PRICE_GAS', 'RISK_PREMIUM', 'TME', 'TMAX', 'TMIN', 'PP', 'WORKDAY', 'SUMMER', 'WINTER', 'NULL_PRICE', 'LITINIT', 'TIMEEF_2013', 'FE_SPAIN' ] exog = sm.add_constant(df[exog_vars]) mod = sm.OLS(endog=df['P'], exog=df[exog_vars]) re_res = mod.fit() print(re_res.summary()) # DIF IN DIF
industryFE = PanelOLS(Y, df[dd + ['industrycode']]) print(industryFE.fit(cov_type='clustered', cluster_entity=True)) Just for exercise purpose, suppose that the unobserved factor $\alpha_i$ is ignored. This assumption is called Random Effects (RE). In this case, $\alpha_i$ will be inside the error term $v_{it}$ and potentially biased the results. $$Y_{it}=\beta X_{it}+v_{it}$$ $$v_{it}= \alpha_i+\epsilon_{it}$$ In an experiment, the treatment variable is uncorrelated with the unobserved factor $\alpha_i$. In this case, Random Effects (RE) model has the advantage of producing lower standard errors than the Fixed Effects models. Note that if we run a simple Random Effects (RE) regression, we might conclude wrongly that St. Louis Fed policy increased the firm revenue in 7%. from linearmodels import RandomEffects re = RandomEffects(Y, df[['const', 'st_louis_fed']]) print(re.fit(cov_type='clustered', cluster_entity=True)) ## Exercises 1| Suppose a non-experimental setting, where the control group differs from the treatment group. Justify if it is reasonable or not to use Difference-in-Differences (DID) to estimate a causal effect? Should you modify or add something in the DID framework? 2| Suppose a study claims based on Difference-in-Differences (DID) method that Fed avoided massive business failures via the bank bailout of 2008. Suppose another study based on Regression Discontinuity (RD) claims the opposite or denies the impact of Fed on business failures. What do you think is more credible empirical strategy DID or RD to estimate the causal impact of Fed policy? Justify you answer. 3| In a panel data, where the unit of analysis can be firm or county, what is more credible the result at firm or at county level? Justify. 4| Use the data from Ziebarth (2013) to estimate the impact of St. Louis Fed policy on firm's revenue. Specifically, run Difference-in-Differences (DID) with Random Effects (RE). Interpret the result. What can be inferred about the unobserved factor $\alpha_i$? 5| Use the data from Ziebarth (2013) to estimate the impact of St. Louis Fed policy on firm's revenue. Specifically, run Difference-in-Differences (DID) with Firm Fixed Effects (FE) without using the command "entity_effects=True". Hint: You must use dummy variables for each firm.
import os from statsmodels.iolib.summary2 import summary_col import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm from linearmodels import PanelOLS from linearmodels import RandomEffects if __name__ == "__main__": REG_DATA = sys.argv[1] RES3_PATH = sys.argv[2] metadata = pd.read_csv(REG_DATA) metadata = metadata.sort_values(by=['Code', 'Year']) metadata = metadata.set_index(['Code', 'Year']) metadata['Income_t0_log'] = np.log10(metadata['Income_t0']) base = os.path.basename(RES3_PATH) incomegroup = base.split(".")[0].split("_")[-1] metadata = metadata[metadata.IncomeGroup == incomegroup] metadata = metadata.dropna() num_period = len(metadata['period'].unique()) metadata = metadata[metadata['size'] == num_period] exog_vars = ['ECI', 'Income_t0_log', 'diversity'] exog = sm.add_constant(metadata[exog_vars]) mod = RandomEffects(metadata.growth, exog) with open(RES3_PATH, 'w') as f: f.write(mod.fit().summary.as_text())