示例#1
0
def read(file_):
    """The function reads the initialization file and returns a dictionary with parameters for the
    simulation.
    """
    check_presence_init(file_)

    dict_ = {'varnames': []}
    for line in open(file_).readlines():

        list_ = shlex.split(line)

        is_empty = (list_ == [])

        if not is_empty:
            is_keyword = list_[0].isupper()
        else:
            continue

        if is_keyword:
            keyword = list_[0]
            dict_[keyword] = {}
            continue

        process(list_, dict_, keyword)

    dict_ = auxiliary(dict_)

    return dict_
示例#2
0
def read(file, semipar=False, include_constant=False):
    """This function processes the initialization file
    for the estimation process.
     """
    # Check if there is an init file with the specified filename
    check_presence_init(file)

    # Load the initialization file
    with open(file) as y:
        init_dict = yaml.load(y, Loader=yaml.FullLoader)

    # If missing, add generic covariance matrix of the unobservables
    if semipar is False:
        try:
            init_dict["DIST"]
        except KeyError:
            init_dict["DIST"] = {
                "params": np.array([0.1, 0.0, 0.0, 0.1, 0.0, 1.0])
            }
        else:
            pass
    else:
        pass

    # Process the initialization file
    attr_dict = create_attr_dict_est(init_dict, semipar, include_constant)

    return attr_dict
def par_fit(init_file):
    """The function estimates the coefficients of the simulated data set."""
    check_presence_init(init_file)

    dict_ = read(init_file)
    np.random.seed(dict_["SIMULATION"]["seed"])

    # We perform some basic consistency checks regarding the user's request.
    check_presence_estimation_dataset(dict_)
    #check_initialization_dict2(dict_)
    #check_init_file(dict_)

    # Distribute initialization information.
    data = read_data(dict_["ESTIMATION"]["file"])
    num_treated = dict_["AUX"]["num_covars_treated"]
    num_untreated = num_treated + dict_["AUX"]["num_covars_untreated"]

    _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, dict_)

    if dict_["ESTIMATION"]["maxiter"] == 0:
        option = "init"
    else:
        option = dict_["ESTIMATION"]["start"]

    # Read data frame

    # define starting values
    x0 = start_values(dict_, data, option)
    opts, method = optimizer_options(dict_)
    dict_["AUX"]["criteria"] = calculate_criteria(dict_, X1, X0, Z1, Z0, Y1,
                                                  Y0, x0)
    dict_["AUX"]["starting_values"] = backward_transformation(x0)
    rslt_dict = bfgs_dict()
    if opts["maxiter"] == 0:
        rslt = adjust_output(None, dict_, x0, X1, X0, Z1, Z0, Y1, Y0,
                             rslt_dict)
    else:
        opt_rslt = minimize(
            minimizing_interface,
            x0,
            args=(dict_, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated,
                  rslt_dict),
            method=method,
            options=opts,
        )
        rslt = adjust_output(opt_rslt, dict_, opt_rslt["x"], X1, X0, Z1, Z0,
                             Y1, Y0, rslt_dict)
    # Print Output files
    print_logfile(dict_, rslt)

    if "comparison" in dict_["ESTIMATION"].keys():
        if dict_["ESTIMATION"]["comparison"] == 0:
            pass
        else:
            write_comparison(data, rslt)
    else:
        write_comparison(data, rslt)

    return rslt
示例#4
0
def read_simulation(file):
    """Process initialization file for the simulation."""
    # Check if there is an init file with the specified filename
    check_presence_init(file)

    # Load the initialization file
    with open(file) as y:
        init_dict = yaml.load(y, Loader=yaml.FullLoader)

    # Process the initialization file
    attr_dict = create_attr_dict_sim(init_dict)

    return attr_dict
示例#5
0
def estimate(init_file):
    """The function estimates the coefficients of the simulated data set."""
    check_presence_init(init_file)

    dict_ = read(init_file)
    np.random.seed(dict_['SIMULATION']['seed'])

    # We perform some basic consistency checks regarding the user's request.
    check_presence_estimation_dataset(dict_)
    check_initialization_dict(dict_)
    check_init_file(dict_)

    # Distribute initialization information.
    data_file = dict_['ESTIMATION']['file']

    if dict_['ESTIMATION']['maxiter'] == 0:
        option = 'init'
    else:
        option = dict_['ESTIMATION']['start']

    # Read data frame
    data = read_data(data_file)

    # define starting values
    x0 = start_values(dict_, data, option)
    opts, method = optimizer_options(dict_)
    dict_['AUX']['criteria'] = calculate_criteria(dict_, data, x0)
    dict_['AUX']['starting_values'] = backward_transformation(x0)
    rslt_dict = bfgs_dict()
    if opts['maxiter'] == 0:
        rslt = adjust_output(None, dict_, x0, data, rslt_dict)
    else:
        opt_rslt = minimize(minimizing_interface,
                            x0,
                            args=(dict_, data, rslt_dict),
                            method=method,
                            options=opts)
        rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'], data, rslt_dict)
    # Print Output files
    print_logfile(dict_, rslt)

    if 'comparison' in dict_['ESTIMATION'].keys():
        if dict_['ESTIMATION']['comparison'] == 0:
            pass
        else:
            write_comparison(dict_, data, rslt)
    else:
        write_comparison(dict_, data, rslt)

    return rslt
示例#6
0
def read(file, semipar=False, include_constant=False):
    """This function processes the initialization file
    for the estimation process.
     """
    # Check if there is a init file with the specified filename
    check_presence_init(file)

    # Load the initialization file
    with open(file) as y:
        init_dict = yaml.load(y, Loader=yaml.FullLoader)

    # Process the initialization file
    attr_dict = create_attr_dict_est(init_dict, semipar, include_constant)

    return attr_dict
示例#7
0
def fit(init_file, semipar=False):
    """ """
    check_presence_init(init_file)

    dict_ = read(init_file)

    # Perform some consistency checks given the user's request
    check_presence_estimation_dataset(dict_)
    check_initialization_dict(dict_)

    # Semiparametric Model
    if semipar is True:
        quantiles, mte_u, X, b1_b0 = semipar_fit(init_file)  # change to dict_

        # Construct MTE
        # Calculate the MTE component that depends on X
        mte_x = np.dot(X, b1_b0)

        # Put the MTE together
        mte = mte_x.mean(axis=0) + mte_u

        # Accounting for variation in X
        mte_min = np.min(mte_x) + mte_u
        mte_max = np.max(mte_x) + mte_u

        rslt = {
            "quantiles": quantiles,
            "mte": mte,
            "mte_x": mte_x,
            "mte_u": mte_u,
            "mte_min": mte_min,
            "mte_max": mte_max,
            "X": X,
            "b1-b0": b1_b0,
        }

    # Parametric Normal Model
    else:
        check_par(dict_)
        rslt = par_fit(dict_)

    return rslt
示例#8
0
def fit(init_file, semipar=False):
    """This function estimates the MTE based on a parametric normal model
    or, alternatively, via the semiparametric method of
    local instrumental variables (LIV).
    """

    # Load the estimation file
    check_presence_init(init_file)
    dict_ = read(init_file, semipar)

    # Perform some consistency checks given the user's request
    check_presence_estimation_dataset(dict_)
    check_est_init_dict(dict_)

    # Semiparametric LIV Model
    if semipar is True:
        # Distribute initialization information.
        data = read_data(dict_["ESTIMATION"]["file"])
        dict_, data = check_append_constant(init_file,
                                            dict_,
                                            data,
                                            semipar=True)

        rslt = semipar_fit(dict_, data)

    # Parametric Normal Model
    else:
        # Perform some extra checks
        check_par_init_file(dict_)

        # Distribute initialization information.
        data = read_data(dict_["ESTIMATION"]["file"])
        dict_, data = check_append_constant(init_file,
                                            dict_,
                                            data,
                                            semipar=False)

        rslt = par_fit(dict_, data)

    return rslt
def bootstrap(init_file, nboot):
    """
    This function generates bootsrapped standard errors
    given an init_file and the number of bootstraps to be drawn.

    Parameters
    ----------
    init_file: yaml
        Initialization file containing parameters for the estimation
        process.
    nboot: int
        Number of bootstrap iterations, i.e. number of times
        the MTE is computed via bootstrap.

    Returns
    -------
    mte_boot: np.ndarray
        Array containing *nbootstrap* estimates of the MTE.
    """
    check_presence_init(init_file)
    dict_ = read(init_file, semipar=True)

    # Process the information specified in the initialization file
    bins, logit, bandwidth, gridsize, startgrid, endgrid = process_primary_inputs(
        dict_)
    trim, rbandwidth, reestimate_p, show_output = process_secondary_inputs(
        dict_)

    # Suppress output
    show_output = False

    # Prepare empty array to store output values
    mte_boot = np.zeros([gridsize, nboot])

    # Load the baseline data
    data = read_data(dict_["ESTIMATION"]["file"])

    counter = 0
    while counter < nboot:
        boot_data = resample(data,
                             replace=True,
                             n_samples=len(data),
                             random_state=None)

        # Estimate propensity score P(z)
        boot_data = estimate_treatment_propensity(dict_, boot_data, logit,
                                                  show_output)
        prop_score = boot_data["prop_score"]
        if isinstance(prop_score, pd.Series):
            # Define common support and trim the data (if trim=True)
            X, Y, prop_score = trim_support(dict_,
                                            boot_data,
                                            logit,
                                            bins,
                                            trim,
                                            reestimate_p,
                                            show_output=False)

            b0, b1_b0 = double_residual_reg(X, Y, prop_score)

            # # Construct the MTE
            mte_x = mte_observed(X, b1_b0)
            mte_u = mte_unobserved_semipar(X, Y, b0, b1_b0, prop_score,
                                           bandwidth, gridsize, startgrid,
                                           endgrid)

            # Put the MTE together
            mte = mte_x.mean(axis=0) + mte_u
            mte_boot[:, counter] = mte

            counter += 1

        else:
            continue

    return mte_boot
示例#10
0
def semipar_fit(init_file):
    """This functions estimates the MTE via Local Instrumental Variables"""
    check_presence_init(init_file)

    dict_ = read(init_file)
    # np.random.seed(dict_["SIMULATION"]["seed"]) # needed?

    check_presence_estimation_dataset(dict_)
    check_initialization_dict(dict_)

    # Distribute initialization information.
    data = read_data(dict_["ESTIMATION"]["file"])

    # Process data for the semiparametric estimation.
    indicator = dict_["ESTIMATION"]["indicator"]
    D = data[indicator].values
    Z = data[dict_["CHOICE"]["order"]]

    nbins = dict_["ESTIMATION"]["nbins"]
    trim = dict_["ESTIMATION"]["trim_support"]
    reestimate = dict_["ESTIMATION"]["reestimate_p"]
    rbandwidth = dict_["ESTIMATION"]["rbandwidth"]
    bandwidth = dict_["ESTIMATION"]["bandwidth"]
    gridsize = dict_["ESTIMATION"]["gridsize"]
    a = dict_["ESTIMATION"]["ps_range"][0]
    b = dict_["ESTIMATION"]["ps_range"][1]

    logit = dict_["ESTIMATION"]["logit"]
    show_output = dict_["ESTIMATION"]["show_output"]

    # The Local Instrumental Variables (LIV) approach

    # 1. Estimate propensity score P(z)
    ps = estimate_treatment_propensity(D, Z, logit, show_output)

    # 2a. Find common support
    treated, untreated, common_support = define_common_support(
        ps, indicator, data, nbins, show_output
    )

    # 2b. Trim the data
    if trim is True:
        data, ps = trim_data(ps, common_support, data)

    # 2c. Re-estimate baseline propensity score on the trimmed sample
    if reestimate is True:
        D = data[indicator].values
        Z = data[dict_["CHOICE"]["order"]]

        # Re-estimate propensity score P(z)
        ps = estimate_treatment_propensity(D, Z, logit, show_output)

    # 3. Double Residual Regression
    # Sort data by ps
    data = data.sort_values(by="ps", ascending=True)
    ps = np.sort(ps)

    X = data[dict_["TREATED"]["order"]]
    Xp = construct_Xp(X, ps)
    Y = data[[dict_["ESTIMATION"]["dependent"]]]

    b0, b1_b0 = double_residual_reg(ps, X, Xp, Y, rbandwidth, show_output)

    # Turn the X, Xp, and Y DataFrames into np.ndarrays
    X_arr = np.array(X)
    Xp_arr = np.array(Xp)
    Y_arr = np.array(Y).ravel()

    # 4. Compute the unobserved part of Y
    Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0)

    # 5. Estimate mte_u, the unobserved component of the MTE,
    # through a locally quadratic regression
    quantiles, mte_u = locpoly(ps, Y_tilde, 1, 2, bandwidth, gridsize, a, b)

    # 6. construct MTE
    # Calculate the MTE component that depends on X
    # mte_x = np.dot(X, b1_b0).mean(axis=0)

    # Put the MTE together
    # mte = mte_x + mte_u

    return quantiles, mte_u, X, b1_b0
示例#11
0
def bootstrap(init_file, nbootstraps):
    """
    This function generates bootsrapped standard errors
    given an init_file and the number of bootsraps to be drawn.
    """
    check_presence_init(init_file)
    dict_ = read(init_file, semipar=True)

    # Process the information specified in the initialization file
    nbins, logit, bandwidth, gridsize, a, b = process_user_input(dict_)
    trim, rbandwidth, reestimate_p = process_default_input(dict_)

    # Suppress output
    show_output = False

    # Prepare empty array to store output values
    mte_boot = np.zeros([gridsize, nbootstraps])

    # Load the baseline data
    data = read_data(dict_["ESTIMATION"]["file"])

    counter = 0
    while counter < nbootstraps:
        boot_data = resample(data,
                             replace=True,
                             n_samples=len(data),
                             random_state=None)

        # Process the inputs for the decision equation
        indicator, D, Z = process_choice_data(dict_, boot_data)

        # Estimate propensity score P(z)
        ps = estimate_treatment_propensity(D, Z, logit, show_output)

        if isinstance(ps, np.ndarray):
            # Define common support and trim the data, if trim=True
            boot_data, ps = trim_support(
                dict_,
                boot_data,
                logit,
                ps,
                indicator,
                nbins,
                trim,
                reestimate_p,
                show_output,
            )

            # Estimate the observed and unobserved component of the MTE
            X, b1_b0, b0, mte_u = mte_components(dict_, boot_data, ps,
                                                 rbandwidth, bandwidth,
                                                 gridsize, a, b, show_output)

            # Calculate the MTE component that depends on X
            mte_x = np.dot(X, b1_b0).mean(axis=0)

            # Put the MTE together
            mte = mte_x + mte_u
            mte_boot[:, counter] = mte

            counter += 1

        else:
            continue

    return mte_boot
示例#12
0
def bootstrap(init_file, nbootstraps, show_output=False):
    """
    This function generates bootsrapped standard errors
    given an init_file and the number of bootsraps to be drawn.
    """
    check_presence_init(init_file)
    dict_ = read(init_file)

    nbins = dict_["ESTIMATION"]["nbins"]
    trim = dict_["ESTIMATION"]["trim_support"]
    rbandwidth = dict_["ESTIMATION"]["rbandwidth"]
    bandwidth = dict_["ESTIMATION"]["bandwidth"]
    gridsize = dict_["ESTIMATION"]["gridsize"]
    a = dict_["ESTIMATION"]["ps_range"][0]
    b = dict_["ESTIMATION"]["ps_range"][1]

    logit = dict_["ESTIMATION"]["logit"]

    # Distribute initialization information.
    data = read_data(dict_["ESTIMATION"]["file"])

    # Prepare empty arrays to store output values
    mte_boot = np.zeros([gridsize, nbootstraps])

    counter = 0
    while counter < nbootstraps:
        boot = resample(data, replace=True, n_samples=len(data), random_state=None)

        # Process data for the semiparametric estimation.
        indicator = dict_["ESTIMATION"]["indicator"]
        D = boot[indicator].values
        Z = boot[dict_["CHOICE"]["order"]]

        # The Local Instrumental Variables (LIV) approach

        # 1. Estimate propensity score P(z)
        ps = estimate_treatment_propensity(D, Z, logit, show_output)

        if isinstance(ps, np.ndarray):  # & (np.min(ps) <= 0.3) & (np.max(ps) >= 0.7):

            # 2a. Find common support
            treated, untreated, common_support = define_common_support(
                ps, indicator, boot, nbins, show_output
            )

            # 2b. Trim the data
            if trim is True:
                boot, ps = trim_data(ps, common_support, boot)

            # 3. Double Residual Regression
            # Sort data by ps
            boot = boot.sort_values(by="ps", ascending=True)
            ps = np.sort(ps)

            X = boot[dict_["TREATED"]["order"]]
            Xp = construct_Xp(X, ps)
            Y = boot[[dict_["ESTIMATION"]["dependent"]]]

            b0, b1_b0 = double_residual_reg(ps, X, Xp, Y, rbandwidth, show_output)

            # Turn the X, Xp, and Y DataFrames into np.ndarrays
            X_arr = np.array(X)
            Xp_arr = np.array(Xp)
            Y_arr = np.array(Y).ravel()

            # 4. Compute the unobserved part of Y
            Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0)

            # 5. Estimate mte_u, the unobserved component of the MTE,
            # through a locally quadratic regression
            quantiles, mte_u = locpoly(ps, Y_tilde, 1, 2, bandwidth, gridsize, a, b)

            # 6. construct MTE
            # Calculate the MTE component that depends on X
            mte_x = np.dot(X, b1_b0).mean(axis=0)

            # Put the MTE together
            mte = mte_x + mte_u

            mte_boot[:, counter] = mte

            counter += 1

        else:
            continue

    return mte_boot