Пример #1
0
##############################################################################
##########################  SECTION 2: FIRST STAGE and SECOND STAGE
##########################  SECTION: 3.1.3 to 3.3.6 IN THE THESIS
##############################################################################

#################################  with packages linear models (for corrected standard error on coeffs)
# Define VARIABLES FOR 2OLS
dep = ['success']
endog = ['duration']
instr = ['choc']
exog= IV_numeric_val  + main_cat_vars + time_vars + location_vars
data = df[dep + endog + instr + exog]

# First stage
res = IV2SLS(data[endog], data[instr+exog], None, None).fit()
print(res)

# SECOND STAGE
res_2sls = IV2SLS(data[dep], data[exog], data[endog], data[instr]).fit()

fitted_values_ivreg = res_2sls.fitted_values.values.reshape(1, df.shape[0])[0]

ivreg_y_hat = np.where(fitted_values_ivreg>=0.5, 1 ,0)

RMSE_ivreg  = compute_RMSE(df['success'].values, ivreg_y_hat)

print(res_2sls.summary.as_latex())


#################################  with simple statsmodels
Пример #2
0
def test_wu_hausman_smoke(data):
    mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
    res = mod.fit()
    res.wu_hausman()
    res.wu_hausman([mod.endog.cols[1]])
Пример #3
0
def test_compare_single_single_parameter(data):
    res1 = IV2SLS(data.dep, data.exog[:, :1], None, None).fit()
    c = compare([res1])
    assert len(c.rsquared) == 1
    c.summary
Пример #4
0
 def test_no_regressors(self, data):
     with pytest.raises(ValueError):
         IV2SLS(data.dep, None, None, None)
Пример #5
0
def test_2sls_ols_equiv(data):
    mod = IV2SLS(data.dep, data.exog, None, None)
    res = mod.fit()
    params = pinv(data.exog) @ data.dep
    assert_allclose(res.params, params.ravel())
Пример #6
0
 def test_rank_deficient_exog(self, data):
     exog = data.exog.copy()
     exog[:, :2] = 1
     with pytest.raises(ValueError):
         IV2SLS(data.dep, exog, data.endog, data.instr)
Пример #7
0
def monte_carlo(file, grid_points):
    """This function estimates the ATE for a sample with different correlation
    structures between U1 and V. Two different strategies for (OLS,LATE) are
    implemented.
     """

    ATE = 0.5

    # Define a dictionary with a key for each estimation strategy
    effects = {}
    for key_ in ["grmpy", "ols", "true", "random", "rho", "iv", "means"]:
        effects[key_] = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, 0.99, grid_points):
        effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        endog, exog, exog_ols = df_mc["wage"], df_mc[X], df_mc[["state"] + X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]
        # Calculate true average treatment effect
        ATE = np.mean(df_mc["wage1"] - df_mc["wage0"])
        effects["true"] += [ATE]

        # Estimate  via grmpy
        rslt = fit(file)
        beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
        stat = np.dot(np.mean(exog), beta_diff)

        effects["grmpy"] += [stat]

        # Estimate via OLS
        ols = sm.OLS(endog, exog_ols).fit()
        stat = ols.params[0]
        effects["ols"] += [stat]

        # Estimate via 2SLS
        iv = IV2SLS(endog, exog, df_mc["state"], df_mc[instr]).fit()
        stat = iv.params["state"]
        effects["iv"] += [stat]

        # Estimate via random
        random = np.mean(df_mc[df_mc.state == 1]["wage"]) - np.mean(
            df_mc[df_mc.state == 0]["wage"]
        )
        stat = random
        effects["random"] += [stat]

        # outcomes
        stat = [
            [
                np.mean(df_mc[df_mc.state == 1]["wage"]),
                df_mc[df_mc.state == 1].shape[0],
            ],
            [
                np.mean(df_mc[df_mc.state == 0]["wage"]),
                df_mc[df_mc.state == 0].shape[0],
            ],
        ]
        effects["means"] += stat

    create_plots(effects, effects["true"])
Пример #8
0
 def test_invalid_weights(self, data):
     weights = np.zeros_like(data.dep)
     with pytest.raises(ValueError):
         IV2SLS(data.dep, data.exog, data.endog, data.instr, weights=weights)
Пример #9
0
def monte_carlo(file, which, grid_points=10):
    """
    This function conducts a Monte Carlo simulation to compare
    the true and estimated treatment parameters for increasing
    (absolute) correlation between U_1 and V (i.e essential
    heterogeneity).

    In the example here, the correlation between U_1 and V becomes
    increasingly more negative. As we consider the absolute value
    of the correlation coefficient, values closer to -1
    (or in the analogous case closer to +1)
    denote a higher degree of essential heterogeneity.

    The results of the Monte Carlo simulation can be used
    to evaluate the performance of different estimation strategies
    in the presence of essential heterogeneity.

    Depending on the specification of *which*, either the true ATE
    and TT, or an estimate of the ATE are returned.

    Options for *which*:

        Comparison of ATE and TT
        - "conventional_average_effects"

        Different estimation strategies for ATE
        - "randomization" ("random")
        - "ordinary_least_squares" ("ols")
        - "instrumental_variables" ("iv")
        - "grmpy_par" ("grmpy")
        - "grmpy_semipar"("grmpy-liv")

    Post-estimation: To plot the comparison between the true ATE
    and the respective parameter, use the function
    - plot_effects() for *which* = "conventional_average_effects", and
    - plot_estimates() else.

    Parameters
    ----------
    file: yaml
        grmpy initialization file, provides information for the simulation process.
    which: string
        String denoting whether conventional average effects shall be computed
        or, alternatively, which estimation approach shall be implemented for the ATE.
    grid_points: int, default 10
        Number of different values for rho, the correlation coefficient
        between U_1 and V, on the interval [0, -1), along which the parameters
        shall be evaluated.

    Returns
    -------
    effects: list
        If *which* = "conventional_average_effects",
            list of lenght *grid_points* x 2 containing the true ATE and TT.
        Else, list of length *grid_points* x 1 containing an estimate
            of the ATE.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between U_1 and V
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        _update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = _create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["randomization", "random"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"]
            )
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
Пример #10
0
def test_wooldridge_smoke(data):
    mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
    res = mod.fit()
    assert isinstance(res.wooldridge_regression, WaldTestStatistic)
    assert isinstance(res.wooldridge_score, WaldTestStatistic)
Пример #11
0
def test_first_stage_summary(data):
    res1 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit()
    assert isinstance(res1.first_stage.summary, Summary)
Пример #12
0
def test_no_regressors_exception(data):
    with pytest.raises(ValueError):
        IV2SLS(data.dep, None, None, None)
sns.boxplot(df_param[col])
plt.show()

plt.figure(figsize=[11, 11])

sns.heatmap(df_param.corr(), annot=True, cmap="Oranges")

#Corrélations des variables avec la marge 1
df_param.corr()['Marge_1']

# ## Estimation IV2SLS

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from linearmodels.iv import IV2SLS

reg = IV2SLS.from_formula(
    ' l1 ~ 1 + sucres1 + sucres1_carre + [prix_1 ~ sucres2+ sucres2_carre]',
    base_reg)
reg.fit()

# # 300 Villes
reg.fit()

# ## 600 Villes
reg.fit()

# ## 1000 Villes
reg.fit()
Пример #14
0
## First Stage
# Import and select the data
df4 = pd.read_stata(
    'https://github.com/QuantEcon/QuantEcon.lectures.code/raw/master/ols/maketable4.dta'
)
df4 = df4[df4['baseco'] == 1]
df4.head()

# add a constant variable
df4['const'] = 1

results_fs = sm.OLS(df4['avexpr'], df4[['const', 'logem4']],
                    missing='drop').fit()
print(results_fs.summary())

## second stage --> give unbiased and consistent estimates
# retrieve the predicted values of avexpri using .predict()
df4['predicted_avexpr'] = results_fs.predict()

results_ss = sm.OLS(df4['logpgp95'], df4[['const', 'predicted_avexpr']]).fit()
print(results_ss.summary())

### 2SLS Regression by IV2SLS
from linearmodels.iv import IV2SLS

iv = IV2SLS(dependent=df4['logpgp95'],
            exog=df4['const'],
            endog=df4['avexpr'],
            instruments=df4['logem4']).fit(cov_type='unadjusted')
print(iv.summary)
Пример #15
0
def test_anderson_rubin(data):
    res = IV2SLS(data.dep, data.exog, data.endog[['x1']],
                 data.instr).fit(cov_type='unadjusted')
    assert_allclose(res.nobs * (res._liml_kappa - 1), .176587, rtol=1e-4)
Пример #16
0
def monte_carlo(file, which, grid_points=10):
    """This function estimates various effect parameters for
    increasing presence of essential heterogeneity, which is reflected
    by increasing correlation between U_1 and V.
    """
    # simulate a new data set with essential heterogeneity present
    model_dict = read(file)
    original_correlation = model_dict["DIST"]["params"][2]

    model_dict["DIST"]["params"][2] = -0.191
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    effects = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, -0.99, grid_points):
        # effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(file, model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        treated = df_mc["D"] == 1
        Xvar = df_mc[X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]

        # We calculate our parameter of interest
        label = which.lower()

        if label == "conventional_average_effects":
            ATE = np.mean(df_mc["Y1"] - df_mc["Y0"])
            TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated])
            stat = (ATE, TT)

        elif label in ["random", "randomization"]:
            random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean(
                df_mc[df_mc.D == 0]["Y"])
            stat = random

        elif label in ["ordinary_least_squares", "ols"]:
            results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit()
            stat = results.params[1]

        elif label in ["instrumental_variables", "iv"]:
            iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit()
            stat = iv.params["D"]

        elif label in ["grmpy", "grmpy-par"]:
            rslt = grmpy.fit(file)
            beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
            stat = np.dot(np.mean(Xvar), beta_diff)

        elif label in ["grmpy-semipar", "grmpy-liv"]:
            rslt = grmpy.fit(file, semipar=True)

            y0_fitted = np.dot(rslt["X"], rslt["b0"])
            y1_fitted = np.dot(rslt["X"], rslt["b1"])

            mte_x_ = y1_fitted - y0_fitted
            mte_u = rslt["mte_u"]

            us = np.linspace(0.005, 0.995, len(rslt["quantiles"]))
            mte_mat = np.zeros((len(mte_x_), len(mte_u)))

            for i in range(len(mte_x_)):
                for j in range(len(mte_u)):
                    mte_mat[i, j] = mte_x_[i] + mte_u[j]

            ate_tilde_p = np.mean(mte_mat, axis=1)
            stat = ate_tilde_p.mean()

        else:
            raise NotImplementedError

        effects += [stat]

    # Restore original init file
    model_dict = read(file)
    model_dict["DIST"]["params"][2] = original_correlation
    print_dict(model_dict, file.replace(".grmpy.yml", ""))
    grmpy.simulate(file)

    return effects
Пример #17
0
def test_basmann_f(data):
    res = IV2SLS(data.dep, data.exog, data.endog[['x1']],
                 data.instr).fit(cov_type='unadjusted')
    assert_allclose(res.basmann_f.stat, .174821, rtol=1e-4)
    assert_allclose(res.basmann_f.pval, 0.6760, rtol=1e-3)
def test_firstdifference_ols(data):
    mod = FirstDifferenceOLS(data.y, data.x)
    res = mod.fit(debiased=False)

    y = mod.dependent.values3d
    x = mod.exog.values3d
    dy = np.array(y[0, 1:] - y[0, :-1])
    dy = pd.DataFrame(
        dy,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    dy = dy.T.stack()
    dy = dy.reindex(mod.dependent.index)

    dx = x[:, 1:] - x[:, :-1]
    _dx = {}
    for i, dxi in enumerate(dx):
        temp = pd.DataFrame(
            dxi,
            index=mod.dependent.panel.major_axis[1:],
            columns=mod.dependent.panel.minor_axis,
        )
        temp = temp.T.stack()
        temp = temp.reindex(mod.dependent.index)
        _dx[mod.exog.vars[i]] = temp
    dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index)
    for key in _dx:
        dx[key] = _dx[key]
    dx = dx[mod.exog.vars]
    drop = dy.isnull() | np.any(dx.isnull(), 1)
    dy = dy.loc[~drop]
    dx = dx.loc[~drop]

    ols_mod = IV2SLS(dy, dx, None, None)
    ols_res = ols_mod.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols_mod.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(data.vc1)
    fd = mod.dependent.first_difference()
    ols_clusters = ols_clusters.dataframe.loc[fd.index]
    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
    entity_clusters = mod.dependent.first_difference().entity_ids
    ols_res = ols_mod.fit(cov_type="clustered", clusters=entity_clusters)
    assert_results_equal(res, ols_res)

    ols_clusters["entity.clusters"] = entity_clusters
    ols_clusters = ols_clusters.astype(np.int32)
    res = mod.fit(cov_type="clustered",
                  cluster_entity=True,
                  clusters=data.vc1,
                  debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Пример #19
0
    def diagnostics(self):
        """
        Post estimation diagnostics of first-stage fit

        Returns
        -------
        res : DataFrame
            DataFrame where each endogenous variable appears as a row and
            the columns contain alternative measures.  The columns are:

            * rsquared - R-squared from regression of endogenous on exogenous
              and instruments
            * partial.rsquared - R-squared from regression of the exogenous
              variable on instruments where both the exogenous variable and
              the instrument have been orthogonalized to the exogenous
              regressors in the model.
            * f.stat - Test that all coefficients are zero in the model
              used to estimate the partial R-squared. Uses a standard F-test
              when the covariance estimator is unadjusted - otherwise uses a
              Wald test statistic with a chi2 distribution.
            * f.pval - P-value of the test that all coefficients are zero
              in the model used to estimate the partial R-squared
            * shea.rsquared - Shea's r-squared which measures the correlation
              between the projected and orthogonalized instrument on the
              orthogonalized endogenous regressor where the orthogonalization
              is with respect to the other included variables in the model.
        """
        from linearmodels.iv.model import _OLS, IV2SLS
        endog, exog, instr, weights = self.endog, self.exog, self.instr, self.weights
        w = sqrt(weights.ndarray)
        z = w * instr.ndarray
        x = w * exog.ndarray
        px = x @ pinv(x)
        ez = z - px @ z
        out = {}
        individual_results = self.individual
        for col in endog.pandas:
            inner = {}
            inner['rsquared'] = individual_results[col].rsquared
            y = w * endog.pandas[[col]].values
            ey = y - px @ y
            mod = _OLS(ey, ez)
            res = mod.fit(self._cov_type, **self._cov_config)
            inner['partial.rsquared'] = res.rsquared
            params = res.params.values
            params = params[:, None]
            stat = params.T @ inv(res.cov) @ params
            stat = float(stat.squeeze())
            w_test = WaldTestStatistic(stat, null='', df=params.shape[0])
            inner['f.stat'] = w_test.stat
            inner['f.pval'] = w_test.pval
            out[col] = Series(inner)
        out = DataFrame(out).T

        dep = self.dep
        r2sls = IV2SLS(dep, exog, endog, instr,
                       weights=weights).fit('unadjusted')
        rols = _OLS(dep, self._reg, weights=weights).fit('unadjusted')
        shea = (rols.std_errors / r2sls.std_errors)**2
        shea *= (1 - r2sls.rsquared) / (1 - rols.rsquared)
        out['shea.rsquared'] = shea[out.index]
        cols = [
            'rsquared', 'partial.rsquared', 'shea.rsquared', 'f.stat', 'f.pval'
        ]
        out = out[cols]
        for c in out:
            out[c] = to_numeric(out[c])

        return out
def test_firstdifference_ols_weighted(data):
    mod = FirstDifferenceOLS(data.y, data.x, weights=data.w)
    res = mod.fit(debiased=False)

    y = mod.dependent.values3d
    x = mod.exog.values3d
    dy = np.array(y[0, 1:] - y[0, :-1])
    dy = pd.DataFrame(
        dy,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    dy = dy.T.stack()
    dy = dy.reindex(mod.dependent.index)

    dx = x[:, 1:] - x[:, :-1]
    _dx = {}
    for i, dxi in enumerate(dx):
        temp = pd.DataFrame(
            dxi,
            index=mod.dependent.panel.major_axis[1:],
            columns=mod.dependent.panel.minor_axis,
        )
        temp = temp.T.stack()
        temp = temp.reindex(mod.dependent.index)
        _dx[mod.exog.vars[i]] = temp
    dx = pd.DataFrame(index=_dx[mod.exog.vars[i]].index)
    for key in _dx:
        dx[key] = _dx[key]
    dx = dx[mod.exog.vars]

    w = mod.weights.values3d
    w = 1.0 / w
    sw = w[0, 1:] + w[0, :-1]
    sw = pd.DataFrame(
        sw,
        index=mod.dependent.panel.major_axis[1:],
        columns=mod.dependent.panel.minor_axis,
    )
    sw = sw.T.stack()
    sw = sw.reindex(mod.dependent.index)
    sw = 1.0 / sw
    sw = sw / sw.mean()

    drop = dy.isnull() | np.any(dx.isnull(), 1) | sw.isnull()
    dy = dy.loc[~drop]
    dx = dx.loc[~drop]
    sw = sw.loc[~drop]

    ols_mod = IV2SLS(dy, dx, None, None, weights=sw)
    ols_res = ols_mod.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols_mod.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(data.vc1)
    fd = mod.dependent.first_difference()
    ols_clusters = ols_clusters.dataframe.loc[fd.index]

    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
sessions_AcceptedInvo['Total_Service_Duration'].mean()

( (sessions_AcceptedInvo[' outcome']==1).sum()+(sessions_AcceptedInvo[' outcome']==2).sum()) \
    /  (sessions_AcceptedInvo[' queue_sec'].sum())

#check with linear regression what influences outcome Y -----
#and also is data for other treatments
dataForRegression = pd.read_csv('DataForRegression.csv', index_col=0)

regresion_check = IV2SLS(dataForRegression.Y,\
                         dataForRegression[['queue_sec','invite_type', 'engagement_skill','target_skill','region','city','country','continent','user_os',\
                                             'browser','score','other_time','other_lines','other_number_words',\
                                            'inner_wait', 'visitor_duration',\
                                            'agent_duration', 'visitor_number_words', 'agent_number_words',\
                                         'visitor_lines', 'agent_lines',	\
                                            'total_canned_lines', 'average_sent', 'min_sent', 'max_sent', 'n_sent_pos', 'n_sent_neg', 'first_sent',\
                                            'last_sent', 'id_rep_code', \
                                            'Invitation_Acep_Day_of_week', 'Invitation_Acep_Hour', \
                                            'NumberofAssigned', 'NumberofAssignedwhenAssigned', \
                                            'Rho_atarrival',\
                                             ]], None, None).fit(cov_type='unadjusted')

print(regresion_check)

regresion_check2 = IV2SLS(dataForRegression.Y,\
                         dataForRegression[['queue_sec','invite_type', 'engagement_skill','target_skill','score','other_time',\
                                            'agent_number_words',\
                                         'visitor_lines', 'agent_lines',	\
                                             ]], None, None).fit(cov_type='unadjusted')
print(regresion_check2)
Пример #22
0
var_dependent = reputation_model[0][0]
all_exogenous = reputation_model[1]
all_endogeneous = reputation_model[2]
all_instrumental = reputation_model[3]

all_results = np.zeros((len(all_instrumental), len(all_endogeneous), 2))

for i, var_endogeneous in enumerate(all_endogeneous):

    print('*******' + var_endogeneous.upper() + '*******',
          file=open('Results/' + which_site + '_Results.txt', 'a'))

    print('OLS model with no control\n',
          file=open('Results/' + which_site + '_Results.txt', 'a'))
    res_ols = IV2SLS(df_covariates[var_dependent],
                     df_covariates[[var_endogeneous, 'const']], None,
                     None).fit(cov_type='unadjusted')
    print(res_ols, file=open('Results/' + which_site + '_Results.txt', 'a'))
    print(
        '*******************************************************************************\n',
        file=open('Results/' + which_site + '_Results.txt', 'a'))

    for j, var_instrumental in enumerate(all_instrumental):
        print('***' + var_endogeneous.upper() + ': ' + var_instrumental +
              '***',
              file=open('Results/' + which_site + '_Results.txt', 'a'))

        print('2SLS model with no control\n',
              file=open('Results/' + which_site + '_Results.txt', 'a'))
        # 2SLS function call: IV2SLS(dependent, exogeneous, endogeneous, instrumental)
        res_2sls = IV2SLS(
Пример #23
0
 def test_too_few_instruments(self, data):
     with pytest.raises(ValueError):
         IV2SLS(data.dep, data.exog, data.endog, None)
Пример #24
0
def test_wooldridge_score(data):
    res = IV2SLS(data.dep, data.exog, data.endog[['x1', 'x2']],
                 data.instr).fit(cov_type='robust')
    assert_allclose(res.wooldridge_score.stat, 22.684, rtol=1e-4)
    assert_allclose(res.wooldridge_score.pval, 0.0000, atol=1e-4)
Пример #25
0
def test_durbin_smoke(data):
    mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
    res = mod.fit()
    res.durbin()
    res.durbin([mod.endog.cols[1]])
Пример #26
0
def test_wooldridge_regression(data):
    mod = IV2SLS(data.dep, data.exog, data.endog[['x1', 'x2']], data.instr)
    res = mod.fit(cov_type='robust', debiased=True)
    # Scale to correct for F vs Wald treatment
    assert_allclose(res.wooldridge_regression.stat, 2 * 13.3461, rtol=1e-4)
    assert_allclose(res.wooldridge_regression.pval, 0.0000, atol=1e-4)
Пример #27
0
def test_wooldridge_smoke(data):
    mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
    res = mod.fit()
    res.wooldridge_regression
    res.wooldridge_score
Пример #28
0
def test_wooldridge_overid(data):
    res = IV2SLS(data.dep, data.exog, data.endog[['x1']],
                 data.instr).fit(cov_type='robust')
    assert_allclose(res.wooldridge_overid.stat, 0.221648, rtol=1e-4)
    assert_allclose(res.wooldridge_overid.pval, 0.6378, rtol=1e-3)
Пример #29
0
def test_first_stage_summary(data):
    res1 = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit()
    res1.first_stage.summary
Пример #30
0
    dif=abs(row[:-1]-dummT0V[:,:-1]).sum(axis=1)
    #index
    #dif.argmin()
    #value
    distance=row[-1]-dummT0V[dif.argmin(),-1]
    ITE.append(distance)
#Sum of the Treated/number of Treated
ATE_Matchingbii = float(sum(ITE))/float(len(dataFirstTB))

# ---------matching---------


#-----------IV's-------------
corralation=dataFirstTB.corr()

res_second = IV2SLS(dataFirstTB.Y,dataFirstTB[['invite_type','engagement_skill','Rho_atarrival','region','city','country','continent','user_os','browser','score','Invitation_Acep_Hour']],\
                    dataFirstTB.WaitTreatment, dataFirstTB.Invitation_Acep_Day_of_week).fit(cov_type='unadjusted')

print(res_second)

covariance(dataFirstT.queue_sec,dataFirstT.Rho_atarrival)
#-0.12

corralation=dataFirstT[['queue_sec','Rho_atarrival','invite_type',\
        'engagement_skill','target_skill']].corr()

#-----------IV's-------------


#FIRST TREATMENT ii -30 sec

dataFirstT = pd.read_csv('DataForFirstTreatmentii.csv', index_col=0)