示例#1
0
def run_regressions(dataa, datab, endog1, endog2, exog1, exog2, options=0):
    results = []
    print(endog1)
    for index, elem in enumerate(endog1):
        name = 'endog1' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=dataa.gvkey))
    for index, elem in enumerate(endog2):
        name = 'endog2' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=datab.gvkey))
    return results
示例#2
0
    def __fitreg(self, dt, start_datetime, end_datetime, y, var_pit, var_norm,
                 fix, cluster, c):

        # filter dates
        dt = dt.loc[(dt['date'] >= start_datetime)
                    & (dt['date'] <= end_datetime)]

        # filter columns
        dt = dt[y + ['year', 'ticker'] + [col for col in dt.columns[c:]] + fix]

        # choose x
        x = '+'.join(dt.columns[3:])

        #print("Start filling NAs...")
        #dt = dt.fillna(dt.groupby('ticker').transform('mean'))
        #dt = dt.fillna(dt.transform('mean'))
        dt = dt.dropna()
        #print("Filling NAs done.")
        dt = dt.set_index(['ticker', 'year'])

        if len(fix) == 0 and len(cluster) == 0:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x, data=dt)
            fit1 = mod.fit(cov_type='clustered',
                           cluster_time=False,
                           cluster_entity=False)
            return fit1

        if len(fix) == 1:
            mod = PanelOLS.from_formula(y[0] + '~1+' + x + '+' + fix[0],
                                        data=dt)
            if len(cluster) == 0:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=False)
                return fit1
            elif cluster == ['year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=False)
                return fit1
            elif cluster == ['ticker']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=False,
                               cluster_entity=True)
                return fit1
            elif cluster == ['year', 'ticker'
                             ] or cluster == ['ticker', 'year']:
                fit1 = mod.fit(cov_type='clustered',
                               cluster_time=True,
                               cluster_entity=True)
                return fit1
            else:
                raise KeyError("Please choose either year or ticker, or both.")

        if len(fix) > 1:
            raise KeyError(
                "You have {} fixed effects! Please pick one.".format(len(fix)))
示例#3
0
def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0):
    results = []
    print(endog)
    for index, elem in enumerate(data):
        # name = 'endog' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=False,
                           time_effects=True)
        if options == 2:
            print(type(elem))
            mod = PooledOLS(elem[endog], elem[exog])
        if clusterfirm == 0:
            results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey))
        if clusterfirm == 1:
            results.append(mod.fit(cov_type='clustered', cluster_entity=True))
        if clusterfirm == 2:
            results.append(mod.fit())
    return results
示例#4
0
def regressions(data, endog, exog, options, clusterfirm, constant):
    #results = []
    if constant == 1:
        exog = sm.add_constant(data[exog])
    if constant == 0:
        exog = data[exog]
    if options == 0:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=True,
                       time_effects=True)
    if options == 1:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=False,
                       time_effects=True)
    if options == 2:
        #print(data[[endog]], exog)
        mod = PooledOLS(data[endog], exog)
    if clusterfirm == 0:
        results = mod.fit(cov_type='clustered', clusters=data.gvkey)
    if clusterfirm == 1:
        results = mod.fit(cov_type='clustered', cluster_entity=True)
    if clusterfirm == 2:
        results = mod.fit()
    return results
示例#5
0
	def preprocessing_regression(self):
		#Filling missing values with mean values.
		imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
		self.df.iloc[:, :9] = imputer.fit_transform(self.df.iloc[:, :9])
		data = self.df.iloc[:, :10]
		#Taking natural log of variable that have outliers
		data.mezun = np.log(self.df.iloc[:, 2])
		data.yogunluk = np.log(self.df.iloc[:, 3])
		data.dogum = np.log(self.df.iloc[:, 4])
		#Setting indexes in order to shape to data into panel form.
		data = data.set_index(['iller', 'yil'])
		#Regressing variables to find out time effect on the relation between regressand and regressors.
		mod = PanelOLS(data.mezun, data.iloc[:, 1:9], time_effects=True)
		res = mod.fit(cov_type='clustered', cluster_entity=True)

		return res
示例#6
0
def run_regressions_2(data, endog=[], exog=[], options=0):
    results = []
    print(endog)
    for index, elem in enumerate(endog):
        name = 'endog' + '_' + str(index)
        if options == 0:
            for i, e in enumerate(endog):
                mod = PanelOLS(data[elem],
                               data[e],
                               entity_effects=True,
                               time_effects=True)
        if options == 1:
            mod = PanelOLS(data[elem],
                           data[e],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=data.gvkey))
    return results
示例#7
0
def cond_corr_e2_e1timesprize(df):
    """Correlation of e2 and the interaction of e1 and prize after partialing out other effects."""
    df_resid = pd.DataFrame(columns=["e2_resid", "e1timesprize_resid"])
    for label in ["e2", "e1timesprize"]:
        column, formula = (
            f"{label}_resid",
            f"{label}~e1+prize+tt2+tt3+tt4+tt5+tt6+tt7+tt8+tt9+tt10+EntityEffects",
        )
        df_resid.loc[:, column] = PanelOLS.from_formula(formula, data=df).fit().resids
    return df_resid["e2_resid"].corr(df_resid["e1timesprize_resid"])
def get_fe(
    regression_variables: List[Tuple],
    data: Dict[str, pd.DataFrame],
    datasets: Dict[pd.DataFrame, Any],
    entity_effects: bool = False,
    time_effects: bool = False,
) -> Tuple[DataFrame, Any, List[Any], Any]:
    """When effects are correlated with the regressors the RE and BE estimators are not consistent.
    The usual solution is to use Fixed Effects which are called entity_effects when applied to
    entities and time_effects when applied to the time dimension. [Source: LinearModels]

    Parameters
    ----------
    regression_variables : list
        The regressions variables entered where the first variable is
        the dependent variable.
    data : dict
        A dictionary containing the datasets.
    datasets: dict
        A dictionary containing the column and dataset names of
        each column/dataset combination.
    entity_effects : bool
        Whether to include entity effects
    time_effects : bool
        Whether to include time effects

    Returns
    -------
    The dataset used, the dependent variable, the independent variable and
    the OLS model.
    """

    regression_df, dependent_variable, independent_variables = get_regression_data(
        regression_variables, data, datasets, "FE")

    if regression_df.empty:
        model = None
    else:
        with warnings.catch_warnings(record=True) as warning_messages:
            exogenous = add_constant(regression_df[independent_variables])
            model = PanelOLS(
                regression_df[dependent_variable],
                exogenous,
                entity_effects=entity_effects,
                time_effects=time_effects,
            ).fit()
            console.print(model)

            if len(warning_messages) > 0:
                console.print("Warnings:")
                for warning in warning_messages:
                    console.print(f"[red]{warning.message}[/red]".replace(
                        "\n", ""))

    return regression_df, dependent_variable, independent_variables, model
示例#9
0
def old_percentile_correlation(df):
    """J percentile of the correlation of e2 and e1 after partialing out other effects."""
    df_resid = pd.DataFrame(columns=["e2_resid", "e1_resid"], index=df.index)
    for label in ["e2", "e1"]:
        column, formula = f"{label}_resid", f"{label}~prize+e1timesprize+TimeEffects"
        df_resid.loc[:, column] = PanelOLS.from_formula(formula, data=df).fit().resids
    dfs = dict()
    for sub in df_resid.index.get_level_values('subject').unique():
        dfs[f"{sub}"] = df_resid.query(f"subject == {sub}")
    cond_corr = list()
    for key in dfs:
        cond_corr.append(dfs[key]["e2_resid"].corr(dfs[key]["e1_resid"]))
    return np.percentile(cond_corr, 66)
示例#10
0
def panel_regression(y,
                     xs,
                     years,
                     country,
                     list_x,
                     prev=0,
                     show=False,
                     save=True,
                     path="",
                     diff=False,
                     constant=False,
                     entity_effects=False):
    data = bdf.filter_origin_country_dataset(y, country, years,
                                             xs.index.levels[0].tolist(), xs,
                                             prev)
    if constant == False:
        exog = data[list_x]
    else:
        exog = sm.add_constant(data[list_x])
    #
    if diff == False:
        mod = PanelOLS(data.y, exog, entity_effects=entity_effects)
    else:
        mod = FirstDifferenceOLS(data.y, exog)
    res = mod.fit()
    #print("The R-squared of the regression model is %f." %res.rsquared)
    #print("Estimated parameters:")
    #print(pd.DataFrame(res.params))

    evaluation(data, res.fitted_values, constant, len(xs.columns.tolist()))

    if show == True:
        pmf.plot_real_VS_prediction(y, res.fitted_values, xs, years, country,
                                    45, "Regression model", save, path)
    else:
        pass

    return (res.params, res.fitted_values)
def balancing_tests_cantonal_results(df, exog):
    ##These are the conditional results
    ##between countries as= asylum seekers
    mod_balancing = PanelOLS(df.share_AS_between * 100,
                             exog,
                             entity_effects=True,
                             time_effects=True,
                             singletons=False)
    result_balancing_canton = mod_balancing.fit(cov_type='clustered',
                                                clusters=df.id_e,
                                                singletons=False)

    mod_balancing2 = PanelOLS(df.share_AS_within * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = mod_balancing2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    mod_balancing3 = PanelOLS(df.sex_ratio_AS_ntc * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = mod_balancing3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Between countries': result_balancing_canton,
            'Within countries': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3
        },
        stars=True))
示例#12
0
def process_data(tag, area_tag):
    """
    处理数据
    :param area_tag
    :return:
    """
    root_path = getRootPath()
    tif_file = os.path.join(
        root_path, "{0}/result/avg_data/avg_{1}.tif".format(tag, area_tag))
    bandArray = get_raster_band_array(tif_file)
    df = pd.DataFrame(bandArray,
                      columns=["sday", "eday", "gsl", "gdd", "edd", "pre"])
    df.sday = df.sday.astype(np.int64)
    df.eday = df.eday.astype(np.int64)
    df = df.set_index(["eday", "sday"])
    df.dropna()
    print("-------- use EntityEffects ---------")
    mod = PanelOLS.from_formula('gsl ~ 1 + gdd + edd + pre + EntityEffects',
                                df)
    res = mod.fit(cov_type='unadjusted')
    print(res)
示例#13
0
def process_data(tag, area_tag):
    """
    处理预测数据:
    :param area_tag:
    :return:
    """
    print("process data area_tag: {}".format(area_tag))
    root_path = getRootPath()
    src_path = os.path.join(root_path,
                            "{0}/process/merge/{1}".format(tag, area_tag))
    tif_files = walkDirFile(src_path, ext=".tif")
    bandArray = None
    flag = False
    for tif_file in tif_files:
        tempArr = get_raster_band_array(tif_file)
        if not flag:
            bandArray = tempArr
            flag = True
        else:
            bandArray = np.vstack((bandArray, tempArr))

    if not flag:
        return
    df = pd.DataFrame(
        bandArray,
        columns=["sday", "eday", "gsl", "year", "gdd", "edd", "pre"])
    df.sday = df.sday.astype(np.int64)
    df.eday = df.eday.astype(np.int64)
    df.year = df.year.astype(np.int64)
    df = df.set_index(["year", "eday"])
    df.dropna()
    print("-------- use EntityEffects ---------")
    mod = PanelOLS.from_formula('gsl ~ 1 + gdd + edd + pre + EntityEffects',
                                df)
    res = mod.fit(cov_type='unadjusted')
    print(res)
jtrain2 = jtrain
jtrain2[:5]
## Define the ID and Time column for Panel Regression
jtrain2 = jtrain2.set_index(['fcode', 'year'])
print(jtrain2.head(5))
exog_vars = ['d88', 'd89', 'grant', 'grant_1']
grant_vars = ['grant']
exog = sm.add_constant(jtrain2[exog_vars])
grant0 = sm.add_constant(jtrain2[grant_vars])

## Model Pooled OLS
model_pool = PooledOLS(jtrain2.lscrap, exog)
pooled_res = model_pool.fit()
print(pooled_res)
## Model Fixed Effects -- Entity Effects - True
model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Model Fixed Effects -- Entity and Time Effects - True
model_fe = PanelOLS(jtrain2.lscrap,
                    exog,
                    entity_effects=True,
                    time_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Random Effects Model
model_re = RandomEffects(jtrain2.lscrap, exog)
re_res = model_re.fit()
print(fe_res)
#################################################
## Regress scrap~grant
#mod = PanelOLS(temp.UE12M, temp[['activeWeight12M']], entity_effects = True)


#mod = PanelOLS(temp.UE3M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight12MSquared']], entity_effects = True)


#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight12MSquared']], entity_effects = True)


mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight12MSquared']], entity_effects = True)


#Both entity and time effect
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3M', 'activeWeight6M', 'activeWeight12M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight6M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight12M']], entity_effects = True, time_effects = True)


#mod = PanelOLS(temp.UE6M, temp[['activeWeight3M', 'activeWeight6M', 'activeWeight12M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight3M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight6M']], entity_effects = True, time_effects = True)
示例#16
0
    x = np.stack([calc_mat[:, 1], calc_mat[:, 2], calc_mat[:, 3], calc_mat[:, 4]])
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for elem in x[1:]:
        X = sm.add_constant(np.column_stack((elem, X)))
    res = sm.OLS(y,X).fit()
    print(res.summary())

    FE模型回归
    company_codes = []
    for each_file in file_list:
        company_code = each_file.split('.')[0]
        company_code = int(company_code)
        company_codes.append(company_code)
    time = [2019] * 50
    df = pd.DataFrame({
        'TDA': x[0],
        'CR5': x[1],
        'SIZE': x[2],
        'ROE': x[3],
        'REWARD': y,
        'YEAR': time,
        'CODE': company_codes
    })
    df.to_stata('Stock/res.dta')
    df = df.set_index(['CODE', 'YEAR'])
    exog_vars = ['TDA', 'LDA', 'SIZE', 'ROE']
    exog = sm.add_constant(df[exog_vars])
    model = PanelOLS(df['REWARD'], exog, entity_effects=True)
    fe = model.fit()
    print(fe)
示例#17
0
test['volume'] = test['volume'] / 1000000
test = test.loc[test['year'].isin(['2020', '2018', '2019'])]
test = test[[
    'year', 'ticker', 'assetclasslevel1', 'assetclasslevel2',
    'assetclasslevel3', 'cd', 'cdlag1', 'pd', 'volume', 'age'
]]

test = test.dropna()

# In[16]:

test0 = test.set_index(['ticker', 'year'])

# fix assetclasslevel1, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel1', data=test0)
fit01 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix assetclasslevel2, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel2', data=test0)
fit02 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix assetclasslevel3, cluster time + ticker
mod = PanelOLS.from_formula(
    'cd ~ 1 + cdlag1 + volume + pd + age + assetclasslevel3', data=test0)
fit03 = mod.fit(cov_type='clustered', cluster_time=True, cluster_entity=True)

# fix year, cluster time + ticker
mod = PanelOLS.from_formula('cd ~ 1 + cdlag1 + volume + pd + TimeEffects',
                            data=test0)
示例#18
0
# CLO has much more positive holding period return than corporate bonds

# In[41]:

#Part B
# 1. OLS without fixed effect
hpr_OLS = smf.ols(formula='lnhpr ~ clo+tmkt_rf+tsmb+thml+tterm+tdef+hp',
                  data=ps5)
# I use panel data to regression holding period return on common risk factors (tmkt_rf,tsmb,thml,tterm,and tdef) and
# holding period. CLO is an indicator which is 1 if bond is CLO. If CLO is significant and positive, CLO has higher
# return than corporate bond.
res = hpr_OLS.fit()
print(res.summary())
# The significant positive coefficient for CLO shows that CLO has higher excess return than corporate bond

# In[59]:

# 2. OLS with firm fixed effect
startyear = pd.Categorical(ps5.startyear)
ps5 = ps5.set_index(['entity_name', 'startyear'])

# In[67]:

exog_vars = ['clo', 'tmkt_rf', 'tsmb', 'thml', 'tterm', 'tdef', 'hp']
exog = sm.add_constant(ps5[exog_vars])
mod = PanelOLS(ps5.lnhpr, exog, entity_effects=True)
res = mod.fit()
print(res)
# After adding firm fixed effect, the coefficient of CLO is still significant positive and at similiar magnititude.
# The argument that CLO has higher excess return than corporate return is valid.
def baseline_results(df):
    ##first column of baseline
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline = sm.add_constant(mi_data[exog_vars])

    mod = PanelOLS(mi_data.crime_rate_all_violent_p30,
                   exog_baseline,
                   entity_effects=True,
                   time_effects=True,
                   singletons=False)
    res = mod.fit(cov_type='clustered',
                  clusters=mi_data.id_e,
                  singletons=False)

    ##second column of baseline results

    CPRT_baseline_maleage_sub = df[(df['allmk_periode'] == 1)]
    mi_data2 = CPRT_baseline_maleage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog2 = sm.add_constant(mi_data2[exog_vars2])

    mod2 = PanelOLS(mi_data2.crime_rate_all_violent_p30,
                    exog2,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res2 = mod2.fit(cov_type='clustered',
                    clusters=mi_data2.id_e,
                    singletons=False)

    ##third column of baseline results

    CPRT_baseline_maleage_sub_sub = df[(df['all_periode'] == 1)]
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.drop(
        ['kid012_all'], axis=1)
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.rename(
        columns={"kid012": "kid012_all"})
    mi_data3 = CPRT_baseline_maleage_sub_sub.set_index(["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012_all", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71",
        "exp_all_72", "exp_all_73", "exp_all_74", "exp_all_75", "exp_all_76",
        "exp_all_77", "exp_all_78", "exp_all_79", "exp_all_80", "exp_all_81",
        "exp_all_82", "exp_all_83", "exp_all_84", "exp_all_85", "exp_all_86"
    ]
    exog3 = sm.add_constant(mi_data3[exog_vars3])

    mod3 = PanelOLS(mi_data3.crime_rate_all_violent_p30,
                    exog3,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res3 = mod3.fit(cov_type='clustered',
                    clusters=mi_data3.id_e,
                    singletons=False)

    ##4th column
    CPRT_baseline_maleage_sub4 = df[(df['mk_periode'] == 1)]
    mi_data4 = CPRT_baseline_maleage_sub4.set_index(["id_a", "id_e_t"])

    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71",
        "exp_mk_72", "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76",
        "exp_mk_77", "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81",
        "exp_mk_82", "exp_mk_83"
    ]
    exp_mk4 = [
        "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17",
        "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22",
        "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27",
        "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32",
        "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37",
        "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42",
        "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47",
        "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52",
        "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57",
        "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62",
        "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67",
        "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71", "exp_mk_72",
        "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76", "exp_mk_77",
        "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81", "exp_mk_82",
        "exp_mk_83", "exp_mk_84", "exp_mk_85", "exp_mk_86", "exp_mk_87",
        "exp_mk_88", "exp_mk_89", "exp_mk_90", "exp_mk_91", "exp_mk_92",
        "exp_mk_93", "exp_mk_94", "exp_mk_95", "exp_mk_96", "exp_mk_97",
        "exp_mk_98", "exp_mk_99"
    ]
    exog4 = sm.add_constant(mi_data4[exog_vars4])

    mod4 = PanelOLS(mi_data4.crime_rate_all_violent_p30,
                    exog4,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res4 = mod4.fit(cov_type='clustered',
                    clusters=mi_data4.id_e,
                    singletons=False)
    ##presentation
    return (compare({
        'Full': res,
        'CC and MK': res2,
        'CC': res3,
        'MK': res4
    },
                    stars=True))
def baseline_results_women(df):
    CPRT_baseline_female = df.groupby(by=['sex'])
    CPRT_baseline_women = CPRT_baseline_female.get_group("F")
    CPRT_baseline_womenage = CPRT_baseline_women[~(
        CPRT_baseline_women['age'] <= 18)]

    mi_data_women = CPRT_baseline_womenage.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70"
    ]
    exog_women = sm.add_constant(mi_data_women[exog_vars])

    CPRT_baseline_womenage.head()

    mod_women = PanelOLS(mi_data_women.crime_rate_all_violent_p30,
                         exog_women,
                         entity_effects=True,
                         time_effects=True,
                         drop_absorbed=True,
                         singletons=False)
    res_women = mod_women.fit(cov_type='clustered',
                              cluster=mi_data_women.id_e,
                              singletons=False)
    CPRT_baseline_womenage_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['allmk_periode'] == 1)]
    mi_data2_women = CPRT_baseline_womenage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71"
    ]

    exog2_women = sm.add_constant(mi_data2_women[exog_vars2])

    mod2_women = PanelOLS(mi_data2_women.crime_rate_all_violent_p30,
                          exog2_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res2_women = mod2_women.fit(cov_type='clustered',
                                cluster=mi_data2_women["id_e"],
                                singletons=False)

    CPRT_baseline_womenage_sub_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['all_periode'] == 1)]
    mi_data3_women = CPRT_baseline_womenage_sub_sub.set_index(
        ["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71"
    ]

    exog3_women = sm.add_constant(mi_data3_women[exog_vars3])

    mod3_women = PanelOLS(mi_data3_women.crime_rate_all_violent_p30,
                          exog3_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res3_women = mod3_women.fit(cov_type='clustered',
                                cluster=mi_data3_women["id_e"],
                                singletons=False)
    ##Table 5 column 4 women
    CPRT_baseline_womenage_sub4 = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['mk_periode'] == 1)]
    mi_data4_women = CPRT_baseline_womenage_sub4.set_index(["id_a", "id_e_t"])
    ##had to delete nr. 71-86
    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70"
    ]

    exog4_women = sm.add_constant(mi_data4_women[exog_vars4])

    mod4_women = PanelOLS(mi_data4_women.crime_rate_all_violent_p30,
                          exog4_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res4_women = mod4_women.fit(cov_type='clustered',
                                cluster=CPRT_baseline_womenage_sub["id_e"],
                                singletons=False)
    return (compare(
        {
            'Full': res_women,
            'CC and MK': res2_women,
            'CC': res3_women,
            'MK': res4_women
        },
        stars=True))
示例#21
0
autor["other"] = autor["rs_om"] + autor["rs_of"]
autor["married"] = autor["marfem"] + autor["marmale"]

# Create categorical for state
autor["state_c"] = pd.Categorical(autor["state"])

# Set index for use with linearmodels
autor = autor.set_index(["state", "year"], drop=False)

# Diff-in-diff regression
did = PanelOLS.from_formula(
    ("lnths ~"
     "1 +"
     "lnemp +"
     "admico_2 + admico_1 + admico0 + admico1 + admico2 + admico3 + mico4 +"
     "admppa_2 + admppa_1 + admppa0 + admppa1 + admppa2 + admppa3 + mppa4 +"
     "admgfa_2 + admgfa_1 + admgfa0 + admgfa1 + admgfa2 + admgfa3 + mgfa4 +"
     "state_c:t +"
     "EntityEffects + TimeEffects"),
    data=autor,
    drop_absorbed=True).fit(cov_type='clustered', cluster_entity=True)

# Store results in a DataFrame for a plot
results_did = pd.DataFrame({
    "coef": did.params * 100,
    "ci": 1.96 * did.std_errors * 100
})

# Keep only the relevant coefficients
results_did = results_did.filter(regex="admico|mico", axis=0).reset_index()
    VIF[y] = 1 / (1 - res.rsquared)
with open('../result/VIF.txt', 'w') as f:
    print(VIF, file=f)

# pooled 回归
x = data[["MV", "RM", "BM", "ROE", "Inv"]]
y = data["Ret"]
results = sm.OLS(y, x).fit()
with open('../result/pooled_reg.txt', 'w') as f:
    print(results.summary(), file=f)

# 固定效应回归
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Stkcd', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/fixed_effects.txt', 'w') as f:
    print(res, file=f)

# 控制行业回归
data = pd.read_csv("../data/data_all.csv")
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Industry', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/industry_control.txt', 'w') as f:
    print(res, file=f)
示例#23
0
                       index='treat',
                       columns='year',
                       values='defor',
                       aggfunc=np.sum)
count = pd.pivot_table(data=defor_df,
                       index='treat',
                       columns='year',
                       values='defor',
                       aggfunc="count")
defor_df = defor_df.set_index(['idx', 'year'])

# =============================================================================
# Run regression to estimate treatment effect
# =============================================================================
## Simple diff in diff
mod = PanelOLS.from_formula('defor ~ treat * post', defor_df)
res = mod.fit(cov_type='clustered', cluster_entity=True)
print(res)

## Generalized did using two-way fixed effects
# Outer is entity, inner is time
from linearmodels.panel import PanelOLS
defor_df['t'] = defor_df['treat'] * defor_df['post']
mod = PanelOLS.from_formula('defor ~ t + EntityEffects + TimeEffects',
                            defor_df)
res = mod.fit(cov_type='clustered', cluster_entity=True)
print(res)

### KEY OBSERVATION: FE estimator yields ~ (estimate of att) = diff + att while
### simple diff in diff yields ~ (estimate of att) = att
示例#24
0
print("1%            :", orePriceRes_BDI_ADF[2])
print("5%            :", orePriceRes_BDI_ADF[3])
print("10%           :", orePriceRes_BDI_ADF[4])

# Setting up the DataFrame for PanelOLS and cluster effect by port
freightCost_panel = freightCost_df.set_index(["port", "date"])

# Defining the Explanatory Variables
freightCost_vars = [
    "growth", "logd", "logf", "ore_price", "port_dummy1", "port_dummy2"
]
freightCost_reg = sm.add_constant(freightCost_panel[freightCost_vars])

# Running a panel regression
freightCost_results = PanelOLS(freightCost_panel["avefreight"],
                               freightCost_reg,
                               entity_effects=False).fit(cov_type="clustered",
                                                         cluster_entity=True)

# Setting up the DataFrame for PanelOLS and cluster effect by port
freightCost_BDI_panel = freightCost_df.set_index(["port", "date"])

# Defining the Explanatory Variables
freightCost_BDI_vars = [
    "growth", "logd", "logf", "ore_price", "BDI", "port_dummy1", "port_dummy2"
]
freightCost_BDI_reg = sm.add_constant(
    freightCost_BDI_panel[freightCost_BDI_vars])

# Running a panel regression
freightCost_BDI_results = PanelOLS(freightCost_BDI_panel["avefreight"],
                                   freightCost_BDI_reg,
from linearmodels.panel import PanelOLS
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
import sys
import os

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)
base = os.path.basename(OUTPUT_FILE)
incomegroup = base.split(".")[0].split("_")[-1]
select_df = change_df[change_df.IncomeGroup == incomegroup]

#filter out unbalanced data points
num_period = len(select_df.period.unique())
select_df['size'] = select_df.groupby('Code')['Code'].transform('size')
select_df = select_df[select_df['size'] == num_period]

select_df['Income_t0_log'] = np.log10(select_df['Income_t0'])
select_df = select_df.set_index(['Code', 'date'])

exog_vars = [
    'Income_t0_log', 'nm_change', 'shm_change', 'ne_change', 'sum_adv_t0'
]
exog = sm.add_constant(select_df[exog_vars])
mod = PanelOLS(select_df.growth_rate, exog, entity_effects=True)
fe_res = mod.fit()
with open(OUTPUT_FILE, 'w') as f:
    f.write(fe_res.summary.as_text())
# print(data1)

d = pd.Categorical(data1['Date'])
data1 = data1.set_index(['ID', 'Date'])
data1['Date'] = d
# print(data1)

exog_vars = [
    'Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate', 'Date'
]
a = ['Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate']
print(data1[a])
exog = sm.add_constant(data1[exog_vars])
exog1 = sm.add_constant(data1[a])
mod = PanelOLS(data1['Accelerator pedal position'],
               exog,
               entity_effects=True,
               time_effects=False)
mod1 = PooledOLS(data1['Accelerator pedal position'], exog1)
mod2 = RandomEffects(data1['Accelerator pedal position'], exog1)
mod3 = BetweenOLS(data1['Accelerator pedal position'], exog1)
res = mod.fit()
pooled_res = mod1.fit()
re_res = mod2.fit()
be_res = mod3.fit()
print(res)

print(compare({'Pooled': pooled_res, 'RE': re_res, 'BE': be_res}))

if __name__ == '__main__':
    pass
def balancing_tests_cohort_results(df, exog):
    post_exposure1 = PanelOLS(df.adult,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton1 = post_exposure1.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure2 = PanelOLS(df.below_median_age_restr,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = post_exposure2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure3 = PanelOLS(df.sex_ratio,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = post_exposure3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure4 = PanelOLS(df.have_adults_patch,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton4 = post_exposure4.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Size of cohort': result_balancing_canton1,
            'Below median age': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3,
            'Have families': result_balancing_canton4
        },
        stars=True))
def crime_by_type(df):
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline_type = sm.add_constant(mi_data[exog_vars])

    result_6_1 = PanelOLS(mi_data.crime_rate_violent_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_violent = result_6_1.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_2 = PanelOLS(mi_data.crime_rate_freedom_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_freedom = result_6_2.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_3 = PanelOLS(mi_data.crime_rate_sexual_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_sexual = result_6_3.fit(cov_type='clustered',
                                  cluster=mi_data["id_e"])

    result_6_4 = PanelOLS(mi_data.crime_rate_property_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_property = result_6_4.fit(cov_type='clustered',
                                    cluster=mi_data["id_e"])
    return (compare(
        {
            'violent': res_6_violent,
            'freedom': res_6_freedom,
            'sexual': res_6_sexual,
            'property': res_6_property
        },
        stars=True))
示例#29
0
import sys

import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)
change_df = change_df.set_index(["Code", "date"])

exog_vars = ["Income_t0_log", "nm_change", "shm_change", "ne_change", "sum_adv_t0"]
exog = sm.add_constant(change_df[exog_vars])
mod = PanelOLS(change_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT_FILE, "w") as f:
    f.write(fe_res.summary.as_text())
示例#30
0
import numpy as np
import linearmodels as lm
lm.WARN_ON_MISSING = False
from linearmodels import utility
utility.missing_warning(np.array([True, True, False]))

from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
data = wage_panel.load()
data = data.set_index(['nr','year'])
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))

print(data.columns)