def read(file_): """The function reads the initialization file and returns a dictionary with parameters for the simulation. """ check_presence_init(file_) dict_ = {'varnames': []} for line in open(file_).readlines(): list_ = shlex.split(line) is_empty = (list_ == []) if not is_empty: is_keyword = list_[0].isupper() else: continue if is_keyword: keyword = list_[0] dict_[keyword] = {} continue process(list_, dict_, keyword) dict_ = auxiliary(dict_) return dict_
def read(file, semipar=False, include_constant=False): """This function processes the initialization file for the estimation process. """ # Check if there is an init file with the specified filename check_presence_init(file) # Load the initialization file with open(file) as y: init_dict = yaml.load(y, Loader=yaml.FullLoader) # If missing, add generic covariance matrix of the unobservables if semipar is False: try: init_dict["DIST"] except KeyError: init_dict["DIST"] = { "params": np.array([0.1, 0.0, 0.0, 0.1, 0.0, 1.0]) } else: pass else: pass # Process the initialization file attr_dict = create_attr_dict_est(init_dict, semipar, include_constant) return attr_dict
def par_fit(init_file): """The function estimates the coefficients of the simulated data set.""" check_presence_init(init_file) dict_ = read(init_file) np.random.seed(dict_["SIMULATION"]["seed"]) # We perform some basic consistency checks regarding the user's request. check_presence_estimation_dataset(dict_) #check_initialization_dict2(dict_) #check_init_file(dict_) # Distribute initialization information. data = read_data(dict_["ESTIMATION"]["file"]) num_treated = dict_["AUX"]["num_covars_treated"] num_untreated = num_treated + dict_["AUX"]["num_covars_untreated"] _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, dict_) if dict_["ESTIMATION"]["maxiter"] == 0: option = "init" else: option = dict_["ESTIMATION"]["start"] # Read data frame # define starting values x0 = start_values(dict_, data, option) opts, method = optimizer_options(dict_) dict_["AUX"]["criteria"] = calculate_criteria(dict_, X1, X0, Z1, Z0, Y1, Y0, x0) dict_["AUX"]["starting_values"] = backward_transformation(x0) rslt_dict = bfgs_dict() if opts["maxiter"] == 0: rslt = adjust_output(None, dict_, x0, X1, X0, Z1, Z0, Y1, Y0, rslt_dict) else: opt_rslt = minimize( minimizing_interface, x0, args=(dict_, X1, X0, Z1, Z0, Y1, Y0, num_treated, num_untreated, rslt_dict), method=method, options=opts, ) rslt = adjust_output(opt_rslt, dict_, opt_rslt["x"], X1, X0, Z1, Z0, Y1, Y0, rslt_dict) # Print Output files print_logfile(dict_, rslt) if "comparison" in dict_["ESTIMATION"].keys(): if dict_["ESTIMATION"]["comparison"] == 0: pass else: write_comparison(data, rslt) else: write_comparison(data, rslt) return rslt
def read_simulation(file): """Process initialization file for the simulation.""" # Check if there is an init file with the specified filename check_presence_init(file) # Load the initialization file with open(file) as y: init_dict = yaml.load(y, Loader=yaml.FullLoader) # Process the initialization file attr_dict = create_attr_dict_sim(init_dict) return attr_dict
def estimate(init_file): """The function estimates the coefficients of the simulated data set.""" check_presence_init(init_file) dict_ = read(init_file) np.random.seed(dict_['SIMULATION']['seed']) # We perform some basic consistency checks regarding the user's request. check_presence_estimation_dataset(dict_) check_initialization_dict(dict_) check_init_file(dict_) # Distribute initialization information. data_file = dict_['ESTIMATION']['file'] if dict_['ESTIMATION']['maxiter'] == 0: option = 'init' else: option = dict_['ESTIMATION']['start'] # Read data frame data = read_data(data_file) # define starting values x0 = start_values(dict_, data, option) opts, method = optimizer_options(dict_) dict_['AUX']['criteria'] = calculate_criteria(dict_, data, x0) dict_['AUX']['starting_values'] = backward_transformation(x0) rslt_dict = bfgs_dict() if opts['maxiter'] == 0: rslt = adjust_output(None, dict_, x0, data, rslt_dict) else: opt_rslt = minimize(minimizing_interface, x0, args=(dict_, data, rslt_dict), method=method, options=opts) rslt = adjust_output(opt_rslt, dict_, opt_rslt['x'], data, rslt_dict) # Print Output files print_logfile(dict_, rslt) if 'comparison' in dict_['ESTIMATION'].keys(): if dict_['ESTIMATION']['comparison'] == 0: pass else: write_comparison(dict_, data, rslt) else: write_comparison(dict_, data, rslt) return rslt
def read(file, semipar=False, include_constant=False): """This function processes the initialization file for the estimation process. """ # Check if there is a init file with the specified filename check_presence_init(file) # Load the initialization file with open(file) as y: init_dict = yaml.load(y, Loader=yaml.FullLoader) # Process the initialization file attr_dict = create_attr_dict_est(init_dict, semipar, include_constant) return attr_dict
def fit(init_file, semipar=False): """ """ check_presence_init(init_file) dict_ = read(init_file) # Perform some consistency checks given the user's request check_presence_estimation_dataset(dict_) check_initialization_dict(dict_) # Semiparametric Model if semipar is True: quantiles, mte_u, X, b1_b0 = semipar_fit(init_file) # change to dict_ # Construct MTE # Calculate the MTE component that depends on X mte_x = np.dot(X, b1_b0) # Put the MTE together mte = mte_x.mean(axis=0) + mte_u # Accounting for variation in X mte_min = np.min(mte_x) + mte_u mte_max = np.max(mte_x) + mte_u rslt = { "quantiles": quantiles, "mte": mte, "mte_x": mte_x, "mte_u": mte_u, "mte_min": mte_min, "mte_max": mte_max, "X": X, "b1-b0": b1_b0, } # Parametric Normal Model else: check_par(dict_) rslt = par_fit(dict_) return rslt
def fit(init_file, semipar=False): """This function estimates the MTE based on a parametric normal model or, alternatively, via the semiparametric method of local instrumental variables (LIV). """ # Load the estimation file check_presence_init(init_file) dict_ = read(init_file, semipar) # Perform some consistency checks given the user's request check_presence_estimation_dataset(dict_) check_est_init_dict(dict_) # Semiparametric LIV Model if semipar is True: # Distribute initialization information. data = read_data(dict_["ESTIMATION"]["file"]) dict_, data = check_append_constant(init_file, dict_, data, semipar=True) rslt = semipar_fit(dict_, data) # Parametric Normal Model else: # Perform some extra checks check_par_init_file(dict_) # Distribute initialization information. data = read_data(dict_["ESTIMATION"]["file"]) dict_, data = check_append_constant(init_file, dict_, data, semipar=False) rslt = par_fit(dict_, data) return rslt
def bootstrap(init_file, nboot): """ This function generates bootsrapped standard errors given an init_file and the number of bootstraps to be drawn. Parameters ---------- init_file: yaml Initialization file containing parameters for the estimation process. nboot: int Number of bootstrap iterations, i.e. number of times the MTE is computed via bootstrap. Returns ------- mte_boot: np.ndarray Array containing *nbootstrap* estimates of the MTE. """ check_presence_init(init_file) dict_ = read(init_file, semipar=True) # Process the information specified in the initialization file bins, logit, bandwidth, gridsize, startgrid, endgrid = process_primary_inputs( dict_) trim, rbandwidth, reestimate_p, show_output = process_secondary_inputs( dict_) # Suppress output show_output = False # Prepare empty array to store output values mte_boot = np.zeros([gridsize, nboot]) # Load the baseline data data = read_data(dict_["ESTIMATION"]["file"]) counter = 0 while counter < nboot: boot_data = resample(data, replace=True, n_samples=len(data), random_state=None) # Estimate propensity score P(z) boot_data = estimate_treatment_propensity(dict_, boot_data, logit, show_output) prop_score = boot_data["prop_score"] if isinstance(prop_score, pd.Series): # Define common support and trim the data (if trim=True) X, Y, prop_score = trim_support(dict_, boot_data, logit, bins, trim, reestimate_p, show_output=False) b0, b1_b0 = double_residual_reg(X, Y, prop_score) # # Construct the MTE mte_x = mte_observed(X, b1_b0) mte_u = mte_unobserved_semipar(X, Y, b0, b1_b0, prop_score, bandwidth, gridsize, startgrid, endgrid) # Put the MTE together mte = mte_x.mean(axis=0) + mte_u mte_boot[:, counter] = mte counter += 1 else: continue return mte_boot
def semipar_fit(init_file): """This functions estimates the MTE via Local Instrumental Variables""" check_presence_init(init_file) dict_ = read(init_file) # np.random.seed(dict_["SIMULATION"]["seed"]) # needed? check_presence_estimation_dataset(dict_) check_initialization_dict(dict_) # Distribute initialization information. data = read_data(dict_["ESTIMATION"]["file"]) # Process data for the semiparametric estimation. indicator = dict_["ESTIMATION"]["indicator"] D = data[indicator].values Z = data[dict_["CHOICE"]["order"]] nbins = dict_["ESTIMATION"]["nbins"] trim = dict_["ESTIMATION"]["trim_support"] reestimate = dict_["ESTIMATION"]["reestimate_p"] rbandwidth = dict_["ESTIMATION"]["rbandwidth"] bandwidth = dict_["ESTIMATION"]["bandwidth"] gridsize = dict_["ESTIMATION"]["gridsize"] a = dict_["ESTIMATION"]["ps_range"][0] b = dict_["ESTIMATION"]["ps_range"][1] logit = dict_["ESTIMATION"]["logit"] show_output = dict_["ESTIMATION"]["show_output"] # The Local Instrumental Variables (LIV) approach # 1. Estimate propensity score P(z) ps = estimate_treatment_propensity(D, Z, logit, show_output) # 2a. Find common support treated, untreated, common_support = define_common_support( ps, indicator, data, nbins, show_output ) # 2b. Trim the data if trim is True: data, ps = trim_data(ps, common_support, data) # 2c. Re-estimate baseline propensity score on the trimmed sample if reestimate is True: D = data[indicator].values Z = data[dict_["CHOICE"]["order"]] # Re-estimate propensity score P(z) ps = estimate_treatment_propensity(D, Z, logit, show_output) # 3. Double Residual Regression # Sort data by ps data = data.sort_values(by="ps", ascending=True) ps = np.sort(ps) X = data[dict_["TREATED"]["order"]] Xp = construct_Xp(X, ps) Y = data[[dict_["ESTIMATION"]["dependent"]]] b0, b1_b0 = double_residual_reg(ps, X, Xp, Y, rbandwidth, show_output) # Turn the X, Xp, and Y DataFrames into np.ndarrays X_arr = np.array(X) Xp_arr = np.array(Xp) Y_arr = np.array(Y).ravel() # 4. Compute the unobserved part of Y Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0) # 5. Estimate mte_u, the unobserved component of the MTE, # through a locally quadratic regression quantiles, mte_u = locpoly(ps, Y_tilde, 1, 2, bandwidth, gridsize, a, b) # 6. construct MTE # Calculate the MTE component that depends on X # mte_x = np.dot(X, b1_b0).mean(axis=0) # Put the MTE together # mte = mte_x + mte_u return quantiles, mte_u, X, b1_b0
def bootstrap(init_file, nbootstraps): """ This function generates bootsrapped standard errors given an init_file and the number of bootsraps to be drawn. """ check_presence_init(init_file) dict_ = read(init_file, semipar=True) # Process the information specified in the initialization file nbins, logit, bandwidth, gridsize, a, b = process_user_input(dict_) trim, rbandwidth, reestimate_p = process_default_input(dict_) # Suppress output show_output = False # Prepare empty array to store output values mte_boot = np.zeros([gridsize, nbootstraps]) # Load the baseline data data = read_data(dict_["ESTIMATION"]["file"]) counter = 0 while counter < nbootstraps: boot_data = resample(data, replace=True, n_samples=len(data), random_state=None) # Process the inputs for the decision equation indicator, D, Z = process_choice_data(dict_, boot_data) # Estimate propensity score P(z) ps = estimate_treatment_propensity(D, Z, logit, show_output) if isinstance(ps, np.ndarray): # Define common support and trim the data, if trim=True boot_data, ps = trim_support( dict_, boot_data, logit, ps, indicator, nbins, trim, reestimate_p, show_output, ) # Estimate the observed and unobserved component of the MTE X, b1_b0, b0, mte_u = mte_components(dict_, boot_data, ps, rbandwidth, bandwidth, gridsize, a, b, show_output) # Calculate the MTE component that depends on X mte_x = np.dot(X, b1_b0).mean(axis=0) # Put the MTE together mte = mte_x + mte_u mte_boot[:, counter] = mte counter += 1 else: continue return mte_boot
def bootstrap(init_file, nbootstraps, show_output=False): """ This function generates bootsrapped standard errors given an init_file and the number of bootsraps to be drawn. """ check_presence_init(init_file) dict_ = read(init_file) nbins = dict_["ESTIMATION"]["nbins"] trim = dict_["ESTIMATION"]["trim_support"] rbandwidth = dict_["ESTIMATION"]["rbandwidth"] bandwidth = dict_["ESTIMATION"]["bandwidth"] gridsize = dict_["ESTIMATION"]["gridsize"] a = dict_["ESTIMATION"]["ps_range"][0] b = dict_["ESTIMATION"]["ps_range"][1] logit = dict_["ESTIMATION"]["logit"] # Distribute initialization information. data = read_data(dict_["ESTIMATION"]["file"]) # Prepare empty arrays to store output values mte_boot = np.zeros([gridsize, nbootstraps]) counter = 0 while counter < nbootstraps: boot = resample(data, replace=True, n_samples=len(data), random_state=None) # Process data for the semiparametric estimation. indicator = dict_["ESTIMATION"]["indicator"] D = boot[indicator].values Z = boot[dict_["CHOICE"]["order"]] # The Local Instrumental Variables (LIV) approach # 1. Estimate propensity score P(z) ps = estimate_treatment_propensity(D, Z, logit, show_output) if isinstance(ps, np.ndarray): # & (np.min(ps) <= 0.3) & (np.max(ps) >= 0.7): # 2a. Find common support treated, untreated, common_support = define_common_support( ps, indicator, boot, nbins, show_output ) # 2b. Trim the data if trim is True: boot, ps = trim_data(ps, common_support, boot) # 3. Double Residual Regression # Sort data by ps boot = boot.sort_values(by="ps", ascending=True) ps = np.sort(ps) X = boot[dict_["TREATED"]["order"]] Xp = construct_Xp(X, ps) Y = boot[[dict_["ESTIMATION"]["dependent"]]] b0, b1_b0 = double_residual_reg(ps, X, Xp, Y, rbandwidth, show_output) # Turn the X, Xp, and Y DataFrames into np.ndarrays X_arr = np.array(X) Xp_arr = np.array(Xp) Y_arr = np.array(Y).ravel() # 4. Compute the unobserved part of Y Y_tilde = Y_arr - np.dot(X_arr, b0) - np.dot(Xp_arr, b1_b0) # 5. Estimate mte_u, the unobserved component of the MTE, # through a locally quadratic regression quantiles, mte_u = locpoly(ps, Y_tilde, 1, 2, bandwidth, gridsize, a, b) # 6. construct MTE # Calculate the MTE component that depends on X mte_x = np.dot(X, b1_b0).mean(axis=0) # Put the MTE together mte = mte_x + mte_u mte_boot[:, counter] = mte counter += 1 else: continue return mte_boot