Пример #1
0
def test_propensity_score(simulate_test_data):
    """
    Check whether propensity score has the same number of observation as
    the input data frame (for both the logit and probit model).
    """
    dict_, data = simulate_test_data

    ps_logit = estimate_treatment_propensity(dict_, data, logit=True)
    ps_probit = estimate_treatment_propensity(dict_, data, logit=False)

    np.testing.assert_equal(len(ps_logit), data.shape[0])
    np.testing.assert_equal(len(ps_logit), len(ps_probit))
Пример #2
0
def test_trim2(simulate_test_data):
    """
    Test whether trim function returns original data when common support
    is set to the entire unit interval.
    """
    dict_, data = simulate_test_data

    data = estimate_treatment_propensity(dict_, data, logit=True)

    logit, trim, reestimate_p = True, True, False
    prop_score = data["prop_score"]
    common_support = [0, 1]

    # Trim the data. Recommended.
    if trim is True:
        # data, prop_score = trim_data(prop_score, common_support, data)
        data_trim = data[
            (data.prop_score >= common_support[0])
            & (data.prop_score <= common_support[1])
        ]
        prop_score_trim = prop_score[
            (prop_score >= common_support[0]) & (prop_score <= common_support[1])
        ]

        # Optional. Not recommended
        # Re-estimate baseline propensity score on the trimmed sample
        if reestimate_p is True:
            # Re-estimate the parameters of the decision equation based
            # on the new trimmed data set
            data_trim = estimate_treatment_propensity(dict_, data_trim, logit)

        else:
            pass
    else:
        data_trim = data
        prop_score_trim = prop_score

    data_trim = data_trim.sort_values(by="prop_score", ascending=True)
    X_trim = data_trim[dict_["TREATED"]["order"]]
    Y_trim = data_trim[[dict_["ESTIMATION"]["dependent"]]]
    prop_score_trim = np.sort(prop_score_trim)

    X_expected, Y_expected, prop_score_expected = expected_data_no_trim(dict_, data)

    np.testing.assert_array_equal(X_trim, X_expected)
    np.testing.assert_array_equal(Y_trim, Y_expected)
    np.testing.assert_array_equal(prop_score_trim, prop_score_expected)
Пример #3
0
def test_trim(simulate_test_data):
    """
    Test whether original data is returned if *trim* is set to False
    but *reestimate_p* to True.
    """
    dict_, data = simulate_test_data

    data = estimate_treatment_propensity(dict_, data, logit=True)
    X_expected, Y_expected, prop_score_expected = expected_data_no_trim(dict_, data)

    logit, trim, reestimate_p = False, False, True
    X, Y, prop_score = trim_support(dict_, data, logit, 25, trim, reestimate_p)

    pytest.X_testing = X
    pytest.Y_testing = Y
    pytest.prop_score_testing = prop_score

    np.testing.assert_array_equal(X, X_expected)
    np.testing.assert_array_equal(Y, Y_expected)
    np.testing.assert_array_equal(prop_score, prop_score_expected)
Пример #4
0
def bootstrap(init_file, nboot):
    """
    This function generates bootsrapped standard errors
    given an init_file and the number of bootstraps to be drawn.

    Parameters
    ----------
    init_file: yaml
        Initialization file containing parameters for the estimation
        process.
    nboot: int
        Number of bootstrap iterations, i.e. number of times
        the MTE is computed via bootstrap.

    Returns
    -------
    mte_boot: np.ndarray
        Array containing *nbootstrap* estimates of the MTE.
    """
    check_presence_init(init_file)
    dict_ = read(init_file, semipar=True)

    # Process the information specified in the initialization file
    bins, logit, bandwidth, gridsize, startgrid, endgrid = process_primary_inputs(
        dict_)
    trim, rbandwidth, reestimate_p, show_output = process_secondary_inputs(
        dict_)

    # Suppress output
    show_output = False

    # Prepare empty array to store output values
    mte_boot = np.zeros([gridsize, nboot])

    # Load the baseline data
    data = read_data(dict_["ESTIMATION"]["file"])

    counter = 0
    while counter < nboot:
        boot_data = resample(data,
                             replace=True,
                             n_samples=len(data),
                             random_state=None)

        # Estimate propensity score P(z)
        boot_data = estimate_treatment_propensity(dict_, boot_data, logit,
                                                  show_output)
        prop_score = boot_data["prop_score"]
        if isinstance(prop_score, pd.Series):
            # Define common support and trim the data (if trim=True)
            X, Y, prop_score = trim_support(dict_,
                                            boot_data,
                                            logit,
                                            bins,
                                            trim,
                                            reestimate_p,
                                            show_output=False)

            b0, b1_b0 = double_residual_reg(X, Y, prop_score)

            # # Construct the MTE
            mte_x = mte_observed(X, b1_b0)
            mte_u = mte_unobserved_semipar(X, Y, b0, b1_b0, prop_score,
                                           bandwidth, gridsize, startgrid,
                                           endgrid)

            # Put the MTE together
            mte = mte_x.mean(axis=0) + mte_u
            mte_boot[:, counter] = mte

            counter += 1

        else:
            continue

    return mte_boot
Пример #5
0
def bootstrap(init_file, nbootstraps):
    """
    This function generates bootsrapped standard errors
    given an init_file and the number of bootsraps to be drawn.
    """
    check_presence_init(init_file)
    dict_ = read(init_file, semipar=True)

    # Process the information specified in the initialization file
    nbins, logit, bandwidth, gridsize, a, b = process_user_input(dict_)
    trim, rbandwidth, reestimate_p = process_default_input(dict_)

    # Suppress output
    show_output = False

    # Prepare empty array to store output values
    mte_boot = np.zeros([gridsize, nbootstraps])

    # Load the baseline data
    data = read_data(dict_["ESTIMATION"]["file"])

    counter = 0
    while counter < nbootstraps:
        boot_data = resample(data,
                             replace=True,
                             n_samples=len(data),
                             random_state=None)

        # Process the inputs for the decision equation
        indicator, D, Z = process_choice_data(dict_, boot_data)

        # Estimate propensity score P(z)
        ps = estimate_treatment_propensity(D, Z, logit, show_output)

        if isinstance(ps, np.ndarray):
            # Define common support and trim the data, if trim=True
            boot_data, ps = trim_support(
                dict_,
                boot_data,
                logit,
                ps,
                indicator,
                nbins,
                trim,
                reestimate_p,
                show_output,
            )

            # Estimate the observed and unobserved component of the MTE
            X, b1_b0, b0, mte_u = mte_components(dict_, boot_data, ps,
                                                 rbandwidth, bandwidth,
                                                 gridsize, a, b, show_output)

            # Calculate the MTE component that depends on X
            mte_x = np.dot(X, b1_b0).mean(axis=0)

            # Put the MTE together
            mte = mte_x + mte_u
            mte_boot[:, counter] = mte

            counter += 1

        else:
            continue

    return mte_boot