Пример #1
0
def run_regressions_3(data=[], endog=[], exog=[], options=0, clusterfirm=0):
    results = []
    print(endog)
    for index, elem in enumerate(data):
        # name = 'endog' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(elem[endog],
                           elem[exog],
                           entity_effects=False,
                           time_effects=True)
        if options == 2:
            print(type(elem))
            mod = PooledOLS(elem[endog], elem[exog])
        if clusterfirm == 0:
            results.append(mod.fit(cov_type='clustered', clusters=elem.gvkey))
        if clusterfirm == 1:
            results.append(mod.fit(cov_type='clustered', cluster_entity=True))
        if clusterfirm == 2:
            results.append(mod.fit())
    return results
Пример #2
0
def run_regressions(dataa, datab, endog1, endog2, exog1, exog2, options=0):
    results = []
    print(endog1)
    for index, elem in enumerate(endog1):
        name = 'endog1' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(dataa[elem],
                           dataa[exog1],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=dataa.gvkey))
    for index, elem in enumerate(endog2):
        name = 'endog2' + '_' + str(index)
        if options == 0:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=True,
                           time_effects=True)
        if options == 1:
            mod = PanelOLS(datab[elem],
                           datab[exog2],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=datab.gvkey))
    return results
Пример #3
0
def regressions(data, endog, exog, options, clusterfirm, constant):
    #results = []
    if constant == 1:
        exog = sm.add_constant(data[exog])
    if constant == 0:
        exog = data[exog]
    if options == 0:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=True,
                       time_effects=True)
    if options == 1:
        mod = PanelOLS(data[endog],
                       exog,
                       entity_effects=False,
                       time_effects=True)
    if options == 2:
        #print(data[[endog]], exog)
        mod = PooledOLS(data[endog], exog)
    if clusterfirm == 0:
        results = mod.fit(cov_type='clustered', clusters=data.gvkey)
    if clusterfirm == 1:
        results = mod.fit(cov_type='clustered', cluster_entity=True)
    if clusterfirm == 2:
        results = mod.fit()
    return results
Пример #4
0
def run_regressions_2(data, endog=[], exog=[], options=0):
    results = []
    print(endog)
    for index, elem in enumerate(endog):
        name = 'endog' + '_' + str(index)
        if options == 0:
            for i, e in enumerate(endog):
                mod = PanelOLS(data[elem],
                               data[e],
                               entity_effects=True,
                               time_effects=True)
        if options == 1:
            mod = PanelOLS(data[elem],
                           data[e],
                           entity_effects=False,
                           time_effects=True)
        results.append(mod.fit(cov_type='clustered', clusters=data.gvkey))
    return results
def get_fe(
    regression_variables: List[Tuple],
    data: Dict[str, pd.DataFrame],
    datasets: Dict[pd.DataFrame, Any],
    entity_effects: bool = False,
    time_effects: bool = False,
) -> Tuple[DataFrame, Any, List[Any], Any]:
    """When effects are correlated with the regressors the RE and BE estimators are not consistent.
    The usual solution is to use Fixed Effects which are called entity_effects when applied to
    entities and time_effects when applied to the time dimension. [Source: LinearModels]

    Parameters
    ----------
    regression_variables : list
        The regressions variables entered where the first variable is
        the dependent variable.
    data : dict
        A dictionary containing the datasets.
    datasets: dict
        A dictionary containing the column and dataset names of
        each column/dataset combination.
    entity_effects : bool
        Whether to include entity effects
    time_effects : bool
        Whether to include time effects

    Returns
    -------
    The dataset used, the dependent variable, the independent variable and
    the OLS model.
    """

    regression_df, dependent_variable, independent_variables = get_regression_data(
        regression_variables, data, datasets, "FE")

    if regression_df.empty:
        model = None
    else:
        with warnings.catch_warnings(record=True) as warning_messages:
            exogenous = add_constant(regression_df[independent_variables])
            model = PanelOLS(
                regression_df[dependent_variable],
                exogenous,
                entity_effects=entity_effects,
                time_effects=time_effects,
            ).fit()
            console.print(model)

            if len(warning_messages) > 0:
                console.print("Warnings:")
                for warning in warning_messages:
                    console.print(f"[red]{warning.message}[/red]".replace(
                        "\n", ""))

    return regression_df, dependent_variable, independent_variables, model
def balancing_tests_cohort_results(df, exog):
    post_exposure1 = PanelOLS(df.adult,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton1 = post_exposure1.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure2 = PanelOLS(df.below_median_age_restr,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = post_exposure2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure3 = PanelOLS(df.sex_ratio,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = post_exposure3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)
    post_exposure4 = PanelOLS(df.have_adults_patch,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton4 = post_exposure4.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Size of cohort': result_balancing_canton1,
            'Below median age': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3,
            'Have families': result_balancing_canton4
        },
        stars=True))
def balancing_tests_cantonal_results(df, exog):
    ##These are the conditional results
    ##between countries as= asylum seekers
    mod_balancing = PanelOLS(df.share_AS_between * 100,
                             exog,
                             entity_effects=True,
                             time_effects=True,
                             singletons=False)
    result_balancing_canton = mod_balancing.fit(cov_type='clustered',
                                                clusters=df.id_e,
                                                singletons=False)

    mod_balancing2 = PanelOLS(df.share_AS_within * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton2 = mod_balancing2.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    mod_balancing3 = PanelOLS(df.sex_ratio_AS_ntc * 100,
                              exog,
                              entity_effects=True,
                              time_effects=True,
                              singletons=False)
    result_balancing_canton3 = mod_balancing3.fit(cov_type='clustered',
                                                  clusters=df.id_e,
                                                  singletons=False)

    return (compare(
        {
            'Between countries': result_balancing_canton,
            'Within countries': result_balancing_canton2,
            'Sex ratio': result_balancing_canton3
        },
        stars=True))
Пример #8
0
	def preprocessing_regression(self):
		#Filling missing values with mean values.
		imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
		self.df.iloc[:, :9] = imputer.fit_transform(self.df.iloc[:, :9])
		data = self.df.iloc[:, :10]
		#Taking natural log of variable that have outliers
		data.mezun = np.log(self.df.iloc[:, 2])
		data.yogunluk = np.log(self.df.iloc[:, 3])
		data.dogum = np.log(self.df.iloc[:, 4])
		#Setting indexes in order to shape to data into panel form.
		data = data.set_index(['iller', 'yil'])
		#Regressing variables to find out time effect on the relation between regressand and regressors.
		mod = PanelOLS(data.mezun, data.iloc[:, 1:9], time_effects=True)
		res = mod.fit(cov_type='clustered', cluster_entity=True)

		return res
Пример #9
0
def panel_regression(y,
                     xs,
                     years,
                     country,
                     list_x,
                     prev=0,
                     show=False,
                     save=True,
                     path="",
                     diff=False,
                     constant=False,
                     entity_effects=False):
    data = bdf.filter_origin_country_dataset(y, country, years,
                                             xs.index.levels[0].tolist(), xs,
                                             prev)
    if constant == False:
        exog = data[list_x]
    else:
        exog = sm.add_constant(data[list_x])
    #
    if diff == False:
        mod = PanelOLS(data.y, exog, entity_effects=entity_effects)
    else:
        mod = FirstDifferenceOLS(data.y, exog)
    res = mod.fit()
    #print("The R-squared of the regression model is %f." %res.rsquared)
    #print("Estimated parameters:")
    #print(pd.DataFrame(res.params))

    evaluation(data, res.fitted_values, constant, len(xs.columns.tolist()))

    if show == True:
        pmf.plot_real_VS_prediction(y, res.fitted_values, xs, years, country,
                                    45, "Regression model", save, path)
    else:
        pass

    return (res.params, res.fitted_values)
Пример #10
0
# CLO has much more positive holding period return than corporate bonds

# In[41]:

#Part B
# 1. OLS without fixed effect
hpr_OLS = smf.ols(formula='lnhpr ~ clo+tmkt_rf+tsmb+thml+tterm+tdef+hp',
                  data=ps5)
# I use panel data to regression holding period return on common risk factors (tmkt_rf,tsmb,thml,tterm,and tdef) and
# holding period. CLO is an indicator which is 1 if bond is CLO. If CLO is significant and positive, CLO has higher
# return than corporate bond.
res = hpr_OLS.fit()
print(res.summary())
# The significant positive coefficient for CLO shows that CLO has higher excess return than corporate bond

# In[59]:

# 2. OLS with firm fixed effect
startyear = pd.Categorical(ps5.startyear)
ps5 = ps5.set_index(['entity_name', 'startyear'])

# In[67]:

exog_vars = ['clo', 'tmkt_rf', 'tsmb', 'thml', 'tterm', 'tdef', 'hp']
exog = sm.add_constant(ps5[exog_vars])
mod = PanelOLS(ps5.lnhpr, exog, entity_effects=True)
res = mod.fit()
print(res)
# After adding firm fixed effect, the coefficient of CLO is still significant positive and at similiar magnititude.
# The argument that CLO has higher excess return than corporate return is valid.
    need_sde,
    tolerance,
    r,
    interactive_start_value_effect,
    within_effect,
):
    # gerate simulation data
    X, Y, panel_df = dgp_func(T_N_sim.loc[case, "T"], T_N_sim.loc[case, "N"])
    p = X.shape[0]
    # within model require no collinear variable combinations
    no_collinear_x_var = ["x" + str(i + 1) for i in range(min(p, 3))]
    # run estimator for starting value for interactive estimator
    if interactive_start_value_effect == "twoways":
        start_value_estimator = PanelOLS(
            panel_df.y,
            panel_df[no_collinear_x_var],
            entity_effects=True,
            time_effects=True,
        )
    else:
        start_value_estimator = PooledOLS(
            panel_df.y, panel_df[["x" + str(i) for i in range(1, p + 1)]]
        )
    start_value_result = start_value_estimator.fit()
    interactive_start_value = [
        *start_value_result.params.tolist(),
        *np.zeros(p - len(start_value_result.params)),
    ]
    # run interactive fixed effect estimator
    interactive_estimator = InteractiveFixedEffect(Y, X)
    (
        beta_hat_interactive,
Пример #12
0
print("1%            :", orePriceRes_BDI_ADF[2])
print("5%            :", orePriceRes_BDI_ADF[3])
print("10%           :", orePriceRes_BDI_ADF[4])

# Setting up the DataFrame for PanelOLS and cluster effect by port
freightCost_panel = freightCost_df.set_index(["port", "date"])

# Defining the Explanatory Variables
freightCost_vars = [
    "growth", "logd", "logf", "ore_price", "port_dummy1", "port_dummy2"
]
freightCost_reg = sm.add_constant(freightCost_panel[freightCost_vars])

# Running a panel regression
freightCost_results = PanelOLS(freightCost_panel["avefreight"],
                               freightCost_reg,
                               entity_effects=False).fit(cov_type="clustered",
                                                         cluster_entity=True)

# Setting up the DataFrame for PanelOLS and cluster effect by port
freightCost_BDI_panel = freightCost_df.set_index(["port", "date"])

# Defining the Explanatory Variables
freightCost_BDI_vars = [
    "growth", "logd", "logf", "ore_price", "BDI", "port_dummy1", "port_dummy2"
]
freightCost_BDI_reg = sm.add_constant(
    freightCost_BDI_panel[freightCost_BDI_vars])

# Running a panel regression
freightCost_BDI_results = PanelOLS(freightCost_BDI_panel["avefreight"],
                                   freightCost_BDI_reg,
    VIF[y] = 1 / (1 - res.rsquared)
with open('../result/VIF.txt', 'w') as f:
    print(VIF, file=f)

# pooled 回归
x = data[["MV", "RM", "BM", "ROE", "Inv"]]
y = data["Ret"]
results = sm.OLS(y, x).fit()
with open('../result/pooled_reg.txt', 'w') as f:
    print(results.summary(), file=f)

# 固定效应回归
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Stkcd', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/fixed_effects.txt', 'w') as f:
    print(res, file=f)

# 控制行业回归
data = pd.read_csv("../data/data_all.csv")
data['Time'] = pd.to_datetime(data['Time'])
data = data.set_index(['Industry', 'Time'])
dependent = data.Ret
exog = sm.add_constant(data[['MV', 'BM', 'RM', 'ROE', 'Inv']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res = mod.fit(cov_type='clustered')
with open('../result/industry_control.txt', 'w') as f:
    print(res, file=f)
def crime_by_type(df):
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline_type = sm.add_constant(mi_data[exog_vars])

    result_6_1 = PanelOLS(mi_data.crime_rate_violent_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_violent = result_6_1.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_2 = PanelOLS(mi_data.crime_rate_freedom_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_freedom = result_6_2.fit(cov_type='clustered',
                                   cluster=mi_data["id_e"])

    result_6_3 = PanelOLS(mi_data.crime_rate_sexual_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_sexual = result_6_3.fit(cov_type='clustered',
                                  cluster=mi_data["id_e"])

    result_6_4 = PanelOLS(mi_data.crime_rate_property_p30,
                          exog_baseline_type,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=True)
    res_6_property = result_6_4.fit(cov_type='clustered',
                                    cluster=mi_data["id_e"])
    return (compare(
        {
            'violent': res_6_violent,
            'freedom': res_6_freedom,
            'sexual': res_6_sexual,
            'property': res_6_property
        },
        stars=True))
from linearmodels.panel import PanelOLS
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
import sys
import os

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)
base = os.path.basename(OUTPUT_FILE)
incomegroup = base.split(".")[0].split("_")[-1]
select_df = change_df[change_df.IncomeGroup == incomegroup]

#filter out unbalanced data points
num_period = len(select_df.period.unique())
select_df['size'] = select_df.groupby('Code')['Code'].transform('size')
select_df = select_df[select_df['size'] == num_period]

select_df['Income_t0_log'] = np.log10(select_df['Income_t0'])
select_df = select_df.set_index(['Code', 'date'])

exog_vars = [
    'Income_t0_log', 'nm_change', 'shm_change', 'ne_change', 'sum_adv_t0'
]
exog = sm.add_constant(select_df[exog_vars])
mod = PanelOLS(select_df.growth_rate, exog, entity_effects=True)
fe_res = mod.fit()
with open(OUTPUT_FILE, 'w') as f:
    f.write(fe_res.summary.as_text())
Пример #16
0
# In[118]:

from linearmodels.panel import PanelOLS
# fixed effects
# documentation: https://bashtage.github.io/linearmodels/panel/models.html#linearmodels.panel.model.PanelOLS

independent_vars = [
    'Gross fixed capital formation (% of GDP)',
    'Gross domestic savings (% of GDP)', 'Population growth (annual %)',
    'FDI, net inflows (% of GDP)', 'Aid/Gdp_sqr', 'Aid/Gdp', 'ln_ODA', 'wopen',
    'Trade'
]

mod = PanelOLS(df['ln_gdp_pc'],
               df[independent_vars],
               entity_effects=True,
               time_effects=True
               )  # you can turn on or off both entity_effects and time_effects

res = mod.fit(
    cov_type='clustered',
    cluster_entity=True)  # here cov_type means covariance estimators type.
# cov_type can be ‘unadjusted’, ‘homoskedastic’ or ‘robust’, ‘heteroskedastic’ or ‘clustered` - One or two way clustering.

print(res)

# <h4> ODA - Official development assistance
#  Three significant Independent Varibles: Aid/GDP, Aid/GDP^2, ln_ODA </h4>

# <h2><center>DIAGNOSTIC ANALYSIS</center></h2>
#mod = PanelOLS(temp.UE12M, temp[['activeWeight12M']], entity_effects = True)


#mod = PanelOLS(temp.UE3M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight12MSquared']], entity_effects = True)


#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight12MSquared']], entity_effects = True)


mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared', 'activeWeight6MSquared', 'activeWeight12MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight3MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight6MSquared']], entity_effects = True)
#mod = PanelOLS(temp.UE12M, temp[['activeWeight12MSquared']], entity_effects = True)


#Both entity and time effect
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3M', 'activeWeight6M', 'activeWeight12M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight3M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight6M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE3M, temp[['activeWeight12M']], entity_effects = True, time_effects = True)


#mod = PanelOLS(temp.UE6M, temp[['activeWeight3M', 'activeWeight6M', 'activeWeight12M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight3M']], entity_effects = True, time_effects = True)
#mod = PanelOLS(temp.UE6M, temp[['activeWeight6M']], entity_effects = True, time_effects = True)
Пример #18
0
def panel_regression_training_test(y,
                                   xs,
                                   years_training,
                                   years_test,
                                   country,
                                   list_x,
                                   prev=0,
                                   show=False,
                                   save=True,
                                   path="",
                                   diff=False,
                                   constant=False,
                                   entity_effects=False):
    years = years_training + years_test
    data = bdf.filter_origin_country_dataset(y, country, years,
                                             xs.index.levels[0].tolist(), xs,
                                             prev)

    data_tr = data.loc[(slice(None), years_training), :]
    data_te = data.loc[(slice(None), years_test), :]

    if constant == False:
        exog_tr = data_tr[list_x]
        exog_te = data_te[list_x]
    else:
        exog_tr = sm.add_constant(data_tr[list_x])
        exog_te = sm.add_constant(data_te[list_x])
    #
    if diff == False:
        mod = PanelOLS(data_tr.y, exog_tr, entity_effects=entity_effects)
    else:
        mod = FirstDifferenceOLS(data_tr.y, exog_tr)
    res_tr = mod.fit()

    #print("---------------- Training Results ----------------")
    #evaluation(data_tr, res_tr.fitted_values, constant)

    fitted_values_te = res_tr.params.values * exog_te
    fitted_values_te["fitted_values"] = fitted_values_te.sum(axis=1)
    fitted_values_ = fitted_values_te.append(res_tr.fitted_values)
    fitted_values_ = fitted_values_.sort_index()
    if show == True:
        pmf.plot_real_VS_prediction(y,
                                    fitted_values_,
                                    xs,
                                    years,
                                    country,
                                    45,
                                    "Regression model",
                                    save=save,
                                    path="")
    else:
        pass

    print("-------------- Trainin-Test  Results --------------")
    #print(data.head())
    #print(fitted_values_.head())
    #print(fitted_values_te.head())
    evaluation(data.loc[(slice(None), years_test), ], fitted_values_te,
               constant, len(xs.columns.tolist()))
    #evaluation(data.loc[(slice(None), years_test), ], fitted_values_.loc[(slice(None), years_test), ], constant, len(xs.columns.tolist()))

    return (res_tr.params, fitted_values_)
Пример #19
0
# Variable Constructions
df['lognprints'] = np.log(df['NPRINTs'])
w = (df['YENVOL'] / 100) * (df['volatility'])
df['logwdbw'] = np.log(w / (601000 * 0.016))
# drop -inf when volatility equals to 0
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
# Pooled OLS
exog = sm.add_constant(df['logwdbw'])
mod = PooledOLS(df.lognprints, exog)
pooled_res = mod.fit()
print(pooled_res)
"""
# fixed effects with time dummy
exog = sm.add_constant(df[['logwdbw','Date1']])
mod = PanelOLS(df.lognprints, exog, entity_effects=True)
fe_res1 = mod.fit()
print(fe_res1)
"""
# fixed effects with ticker dummy
exog = sm.add_constant(df[['logwdbw', "Ticker1"]])
mod = PanelOLS(df.lognprints, exog, time_effects=True)
fe_res3 = mod.fit()
print(fe_res3)

# two-way fixed effects
"""
exog = sm.add_constant(df['logwdbw'])
mod = PanelOLS(df.lognprints, exog, entity_effects=True, time_effects=True)
fe_res2 = mod.fit()
print(fe_res2)
"""
Пример #20
0
#temp = PanelDataTrade3M.copy(deep = True)
# (3) 6-month trade-based AFP
#temp = PanelDataTrade6M.copy(deep = True)
# (4) 12-month trade-based AFP
#temp = PanelDataTrade12M.copy(deep = True)

#Preparing the variables
temp = temp.set_index(['crsp_portno', 'slided_caldt'])
temp = temp[temp.AFP != -np.inf]
temp = temp[temp.AFP != np.inf]

# Please comment / uncomment the following if you want to choose for the dependent variable
# (1) Carhart Alpha
#mod = PanelOLS(temp.CarhartAlpha, temp.AFP, time_effects = True)
# (2) Fama-French Alpha
mod = PanelOLS(temp.FFAlpha, temp.AFP, time_effects=True)

# Please comment / uncomment the following if you want to choose for the standard errors
# (1) to be clustered by fund
#res = mod.fit(cov_type = 'clustered', cluster_entity = True)
# (2) to be clustered by time
res = mod.fit(cov_type='clustered', cluster_time=True)

#Print regression result
print(res)

del temp
#%%
# =============================================================================
# Part 2: Panel Data Regression with multiple independent variables:
# - AFP
# print(data1)

d = pd.Categorical(data1['Date'])
data1 = data1.set_index(['ID', 'Date'])
data1['Date'] = d
# print(data1)

exog_vars = [
    'Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate', 'Date'
]
a = ['Kilo', 'Brakes', 'Range', 'Speed', 'RPM', 'Engine fuel rate']
print(data1[a])
exog = sm.add_constant(data1[exog_vars])
exog1 = sm.add_constant(data1[a])
mod = PanelOLS(data1['Accelerator pedal position'],
               exog,
               entity_effects=True,
               time_effects=False)
mod1 = PooledOLS(data1['Accelerator pedal position'], exog1)
mod2 = RandomEffects(data1['Accelerator pedal position'], exog1)
mod3 = BetweenOLS(data1['Accelerator pedal position'], exog1)
res = mod.fit()
pooled_res = mod1.fit()
re_res = mod2.fit()
be_res = mod3.fit()
print(res)

print(compare({'Pooled': pooled_res, 'RE': re_res, 'BE': be_res}))

if __name__ == '__main__':
    pass
def baseline_results_women(df):
    CPRT_baseline_female = df.groupby(by=['sex'])
    CPRT_baseline_women = CPRT_baseline_female.get_group("F")
    CPRT_baseline_womenage = CPRT_baseline_women[~(
        CPRT_baseline_women['age'] <= 18)]

    mi_data_women = CPRT_baseline_womenage.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70"
    ]
    exog_women = sm.add_constant(mi_data_women[exog_vars])

    CPRT_baseline_womenage.head()

    mod_women = PanelOLS(mi_data_women.crime_rate_all_violent_p30,
                         exog_women,
                         entity_effects=True,
                         time_effects=True,
                         drop_absorbed=True,
                         singletons=False)
    res_women = mod_women.fit(cov_type='clustered',
                              cluster=mi_data_women.id_e,
                              singletons=False)
    CPRT_baseline_womenage_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['allmk_periode'] == 1)]
    mi_data2_women = CPRT_baseline_womenage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71"
    ]

    exog2_women = sm.add_constant(mi_data2_women[exog_vars2])

    mod2_women = PanelOLS(mi_data2_women.crime_rate_all_violent_p30,
                          exog2_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res2_women = mod2_women.fit(cov_type='clustered',
                                cluster=mi_data2_women["id_e"],
                                singletons=False)

    CPRT_baseline_womenage_sub_sub = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['all_periode'] == 1)]
    mi_data3_women = CPRT_baseline_womenage_sub_sub.set_index(
        ["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71"
    ]

    exog3_women = sm.add_constant(mi_data3_women[exog_vars3])

    mod3_women = PanelOLS(mi_data3_women.crime_rate_all_violent_p30,
                          exog3_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res3_women = mod3_women.fit(cov_type='clustered',
                                cluster=mi_data3_women["id_e"],
                                singletons=False)
    ##Table 5 column 4 women
    CPRT_baseline_womenage_sub4 = CPRT_baseline_womenage[(
        CPRT_baseline_womenage['mk_periode'] == 1)]
    mi_data4_women = CPRT_baseline_womenage_sub4.set_index(["id_a", "id_e_t"])
    ##had to delete nr. 71-86
    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70"
    ]

    exog4_women = sm.add_constant(mi_data4_women[exog_vars4])

    mod4_women = PanelOLS(mi_data4_women.crime_rate_all_violent_p30,
                          exog4_women,
                          entity_effects=True,
                          time_effects=True,
                          drop_absorbed=True,
                          singletons=False)
    res4_women = mod4_women.fit(cov_type='clustered',
                                cluster=CPRT_baseline_womenage_sub["id_e"],
                                singletons=False)
    return (compare(
        {
            'Full': res_women,
            'CC and MK': res2_women,
            'CC': res3_women,
            'MK': res4_women
        },
        stars=True))
Пример #23
0
    x = np.stack([calc_mat[:, 1], calc_mat[:, 2], calc_mat[:, 3], calc_mat[:, 4]])
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for elem in x[1:]:
        X = sm.add_constant(np.column_stack((elem, X)))
    res = sm.OLS(y,X).fit()
    print(res.summary())

    FE模型回归
    company_codes = []
    for each_file in file_list:
        company_code = each_file.split('.')[0]
        company_code = int(company_code)
        company_codes.append(company_code)
    time = [2019] * 50
    df = pd.DataFrame({
        'TDA': x[0],
        'CR5': x[1],
        'SIZE': x[2],
        'ROE': x[3],
        'REWARD': y,
        'YEAR': time,
        'CODE': company_codes
    })
    df.to_stata('Stock/res.dta')
    df = df.set_index(['CODE', 'YEAR'])
    exog_vars = ['TDA', 'LDA', 'SIZE', 'ROE']
    exog = sm.add_constant(df[exog_vars])
    model = PanelOLS(df['REWARD'], exog, entity_effects=True)
    fe = model.fit()
    print(fe)
Пример #24
0
import numpy as np
import linearmodels as lm
lm.WARN_ON_MISSING = False
from linearmodels import utility
utility.missing_warning(np.array([True, True, False]))

from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
data = wage_panel.load()
data = data.set_index(['nr','year'])
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))

print(data.columns)
Пример #25
0
jtrain2 = jtrain
jtrain2[:5]
## Define the ID and Time column for Panel Regression
jtrain2 = jtrain2.set_index(['fcode', 'year'])
print(jtrain2.head(5))
exog_vars = ['d88', 'd89', 'grant', 'grant_1']
grant_vars = ['grant']
exog = sm.add_constant(jtrain2[exog_vars])
grant0 = sm.add_constant(jtrain2[grant_vars])

## Model Pooled OLS
model_pool = PooledOLS(jtrain2.lscrap, exog)
pooled_res = model_pool.fit()
print(pooled_res)
## Model Fixed Effects -- Entity Effects - True
model_fe = PanelOLS(jtrain2.lscrap, exog, entity_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Model Fixed Effects -- Entity and Time Effects - True
model_fe = PanelOLS(jtrain2.lscrap,
                    exog,
                    entity_effects=True,
                    time_effects=True)
fe_res = model_fe.fit()
print(fe_res)
## Random Effects Model
model_re = RandomEffects(jtrain2.lscrap, exog)
re_res = model_re.fit()
print(fe_res)
#################################################
## Regress scrap~grant
Пример #26
0
import numpy as np
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
import sys

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df=pd.read_csv(DATA_FILE)

num_period=len(change_df.period.unique())
change_df['size']=change_df.groupby('Code')['Code'].transform('size')
change_df=change_df[change_df['size']==num_period]

change_df['Income_t0_log']=np.log10(change_df['Income_t0'])
change_df=change_df.set_index(['Code','date'])

exog_vars = ['Income_t0_log','nm_change','shm_change','ne_change','sum_adv_t0']
exog = sm.add_constant(change_df[exog_vars])
mod = PanelOLS(change_df.growth_rate, exog,entity_effects=True)
fe_res = mod.fit()
with open(OUTPUT_FILE,'w') as f:
    f.write(fe_res.summary.as_text())
Пример #27
0
import sys

import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

DATA_FILE = sys.argv[1]
OUTPUT_FILE = sys.argv[2]

change_df = pd.read_csv(DATA_FILE)
change_df = change_df.set_index(["Code", "date"])

exog_vars = ["Income_t0_log", "nm_change", "shm_change", "ne_change", "sum_adv_t0"]
exog = sm.add_constant(change_df[exog_vars])
mod = PanelOLS(change_df.growth_rate, exog)
fe_res = mod.fit()
with open(OUTPUT_FILE, "w") as f:
    f.write(fe_res.summary.as_text())
def baseline_results(df):
    ##first column of baseline
    mi_data = df.set_index(["id_e_t", "id_a"])
    exog_vars = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog_baseline = sm.add_constant(mi_data[exog_vars])

    mod = PanelOLS(mi_data.crime_rate_all_violent_p30,
                   exog_baseline,
                   entity_effects=True,
                   time_effects=True,
                   singletons=False)
    res = mod.fit(cov_type='clustered',
                  clusters=mi_data.id_e,
                  singletons=False)

    ##second column of baseline results

    CPRT_baseline_maleage_sub = df[(df['allmk_periode'] == 1)]
    mi_data2 = CPRT_baseline_maleage_sub.set_index(["id_a", "id_e_t"])

    exog_vars2 = [
        "kid012_all", "all_exp_13", "all_exp_14", "all_exp_15", "all_exp_16",
        "all_exp_17", "all_exp_18", "all_exp_19", "all_exp_20", "all_exp_21",
        "all_exp_22", "all_exp_23", "all_exp_24", "all_exp_25", "all_exp_26",
        "all_exp_27", "all_exp_28", "all_exp_29", "all_exp_30", "all_exp_31",
        "all_exp_32", "all_exp_33", "all_exp_34", "all_exp_35", "all_exp_36",
        "all_exp_37", "all_exp_38", "all_exp_39", "all_exp_40", "all_exp_41",
        "all_exp_42", "all_exp_43", "all_exp_44", "all_exp_45", "all_exp_46",
        "all_exp_47", "all_exp_48", "all_exp_49", "all_exp_50", "all_exp_51",
        "all_exp_52", "all_exp_53", "all_exp_54", "all_exp_55", "all_exp_56",
        "all_exp_57", "all_exp_58", "all_exp_59", "all_exp_60", "all_exp_61",
        "all_exp_62", "all_exp_63", "all_exp_64", "all_exp_65", "all_exp_66",
        "all_exp_67", "all_exp_68", "all_exp_69", "all_exp_70", "all_exp_71",
        "all_exp_72", "all_exp_73", "all_exp_74", "all_exp_75", "all_exp_76",
        "all_exp_77", "all_exp_78", "all_exp_79", "all_exp_80", "all_exp_81",
        "all_exp_82", "all_exp_83", "all_exp_84", "all_exp_85", "all_exp_86"
    ]
    exog2 = sm.add_constant(mi_data2[exog_vars2])

    mod2 = PanelOLS(mi_data2.crime_rate_all_violent_p30,
                    exog2,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res2 = mod2.fit(cov_type='clustered',
                    clusters=mi_data2.id_e,
                    singletons=False)

    ##third column of baseline results

    CPRT_baseline_maleage_sub_sub = df[(df['all_periode'] == 1)]
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.drop(
        ['kid012_all'], axis=1)
    CPRT_baseline_maleage_sub_sub = CPRT_baseline_maleage_sub_sub.rename(
        columns={"kid012": "kid012_all"})
    mi_data3 = CPRT_baseline_maleage_sub_sub.set_index(["id_a", "id_e_t"])

    exog_vars3 = [
        "kid012_all", "exp_all_13", "exp_all_14", "exp_all_15", "exp_all_16",
        "exp_all_17", "exp_all_18", "exp_all_19", "exp_all_20", "exp_all_21",
        "exp_all_22", "exp_all_23", "exp_all_24", "exp_all_25", "exp_all_26",
        "exp_all_27", "exp_all_28", "exp_all_29", "exp_all_30", "exp_all_31",
        "exp_all_32", "exp_all_33", "exp_all_34", "exp_all_35", "exp_all_36",
        "exp_all_37", "exp_all_38", "exp_all_39", "exp_all_40", "exp_all_41",
        "exp_all_42", "exp_all_43", "exp_all_44", "exp_all_45", "exp_all_46",
        "exp_all_47", "exp_all_48", "exp_all_49", "exp_all_50", "exp_all_51",
        "exp_all_52", "exp_all_53", "exp_all_54", "exp_all_55", "exp_all_56",
        "exp_all_57", "exp_all_58", "exp_all_59", "exp_all_60", "exp_all_61",
        "exp_all_62", "exp_all_63", "exp_all_64", "exp_all_65", "exp_all_66",
        "exp_all_67", "exp_all_68", "exp_all_69", "exp_all_70", "exp_all_71",
        "exp_all_72", "exp_all_73", "exp_all_74", "exp_all_75", "exp_all_76",
        "exp_all_77", "exp_all_78", "exp_all_79", "exp_all_80", "exp_all_81",
        "exp_all_82", "exp_all_83", "exp_all_84", "exp_all_85", "exp_all_86"
    ]
    exog3 = sm.add_constant(mi_data3[exog_vars3])

    mod3 = PanelOLS(mi_data3.crime_rate_all_violent_p30,
                    exog3,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res3 = mod3.fit(cov_type='clustered',
                    clusters=mi_data3.id_e,
                    singletons=False)

    ##4th column
    CPRT_baseline_maleage_sub4 = df[(df['mk_periode'] == 1)]
    mi_data4 = CPRT_baseline_maleage_sub4.set_index(["id_a", "id_e_t"])

    exog_vars4 = [
        "MK_kid012", "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16",
        "exp_mk_17", "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21",
        "exp_mk_22", "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26",
        "exp_mk_27", "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31",
        "exp_mk_32", "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36",
        "exp_mk_37", "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41",
        "exp_mk_42", "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46",
        "exp_mk_47", "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51",
        "exp_mk_52", "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56",
        "exp_mk_57", "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61",
        "exp_mk_62", "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66",
        "exp_mk_67", "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71",
        "exp_mk_72", "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76",
        "exp_mk_77", "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81",
        "exp_mk_82", "exp_mk_83"
    ]
    exp_mk4 = [
        "exp_mk_13", "exp_mk_14", "exp_mk_15", "exp_mk_16", "exp_mk_17",
        "exp_mk_18", "exp_mk_19", "exp_mk_20", "exp_mk_21", "exp_mk_22",
        "exp_mk_23", "exp_mk_24", "exp_mk_25", "exp_mk_26", "exp_mk_27",
        "exp_mk_28", "exp_mk_29", "exp_mk_30", "exp_mk_31", "exp_mk_32",
        "exp_mk_33", "exp_mk_34", "exp_mk_35", "exp_mk_36", "exp_mk_37",
        "exp_mk_38", "exp_mk_39", "exp_mk_40", "exp_mk_41", "exp_mk_42",
        "exp_mk_43", "exp_mk_44", "exp_mk_45", "exp_mk_46", "exp_mk_47",
        "exp_mk_48", "exp_mk_49", "exp_mk_50", "exp_mk_51", "exp_mk_52",
        "exp_mk_53", "exp_mk_54", "exp_mk_55", "exp_mk_56", "exp_mk_57",
        "exp_mk_58", "exp_mk_59", "exp_mk_60", "exp_mk_61", "exp_mk_62",
        "exp_mk_63", "exp_mk_64", "exp_mk_65", "exp_mk_66", "exp_mk_67",
        "exp_mk_68", "exp_mk_69", "exp_mk_70", "exp_mk_71", "exp_mk_72",
        "exp_mk_73", "exp_mk_74", "exp_mk_75", "exp_mk_76", "exp_mk_77",
        "exp_mk_78", "exp_mk_79", "exp_mk_80", "exp_mk_81", "exp_mk_82",
        "exp_mk_83", "exp_mk_84", "exp_mk_85", "exp_mk_86", "exp_mk_87",
        "exp_mk_88", "exp_mk_89", "exp_mk_90", "exp_mk_91", "exp_mk_92",
        "exp_mk_93", "exp_mk_94", "exp_mk_95", "exp_mk_96", "exp_mk_97",
        "exp_mk_98", "exp_mk_99"
    ]
    exog4 = sm.add_constant(mi_data4[exog_vars4])

    mod4 = PanelOLS(mi_data4.crime_rate_all_violent_p30,
                    exog4,
                    entity_effects=True,
                    time_effects=True,
                    singletons=False)
    res4 = mod4.fit(cov_type='clustered',
                    clusters=mi_data4.id_e,
                    singletons=False)
    ##presentation
    return (compare({
        'Full': res,
        'CC and MK': res2,
        'CC': res3,
        'MK': res4
    },
                    stars=True))