def test_type_proportions(model, type_proportions): nine_years_or_less = type_proportions[0] ten_years_or_more = type_proportions[1] params, options = rp.get_example_model(model, with_data=False) options["n_periods"] = 1 options["simulated_agents"] = 10_000 simulate = rp.get_simulate_func(params, options) df = simulate(params) np.testing.assert_allclose( df.loc[df.Experience_School.le(9), "Type"].value_counts(normalize=True, sort=False).sort_index(), nine_years_or_less, atol=0.05, ) np.testing.assert_allclose( df.loc[df.Experience_School.ge(10), "Type"].value_counts(normalize=True, sort=False).sort_index(), ten_years_or_more, atol=0.05, )
def test_data_variables(model): """Value function components in df add up to internally computed values.""" _, _, df = rp.get_example_model(model) for choice in df.Choice.unique(): choice = choice.capitalize() # Shocks in working choices are already included in the wage. df["Shock_Nonpec"] = np.where(df[f"Wage_{choice}"].isna(), df[f"Shock_Reward_{choice}"], 0) df[f"Flow_Utility_{choice}_"] = (df[f"Wage_{choice}"].fillna(0) + df[f"Nonpecuniary_Reward_{choice}"] + df["Shock_Nonpec"]) df[f"Value_Function_{choice}_"] = ( df[f"Flow_Utility_{choice}_"] + df["Discount_Rate"] * df[f"Continuation_Value_{choice}"]) pd.testing.assert_series_equal( df[f"Flow_Utility_{choice}_"], df[f"Flow_Utility_{choice}"], check_names=False, ) pd.testing.assert_series_equal( df[f"Value_Function_{choice}_"], df[f"Value_Function_{choice}"], check_names=False, )
def main(): """Evaluate the criterion function multiple times for a scalability report. The criterion function is evaluated ``maxfun``-times. The number of threads used is limited by environment variables. **respy** has to be imported after the environment variables are set as Numpy, Numba and others load them at import time. """ model = sys.argv[1] maxfun = int(sys.argv[2]) n_threads = int(sys.argv[3]) # Validate input. assert maxfun >= 0, "Maximum number of function evaluations cannot be negative." assert n_threads >= 1 or n_threads == -1, ( "Use -1 to impose no restrictions on maximum number of threads or choose a " "number higher than zero.") # Set number of threads os.environ["NUMBA_NUM_THREADS"] = f"{n_threads}" os.environ["MKL_NUM_THREADS"] = f"{n_threads}" os.environ["OMP_NUM_THREADS"] = f"{n_threads}" os.environ["NUMEXPR_NUM_THREADS"] = f"{n_threads}" # Late import of respy to ensure that environment variables are read by Numpy, etc.. import respy as rp # Get model params, options = rp.get_example_model(model, with_data=False) # Simulate the data simulate = rp.get_simulate_func(params, options) df = simulate(params) # Get the criterion function and the parameter vector. crit_func = rp.get_log_like_func(params, options, df) # Run the estimation start = dt.datetime.now() for _ in range(maxfun): crit_func(params) end = dt.datetime.now() # Aggregate information output = { "model": model, "maxfun": maxfun, "n_threads": n_threads, "start": str(start), "end": str(end), "duration": str(end - start), } # Save time to file with open("scalability_results.txt", "a+") as file: file.write(json.dumps(output)) file.write("\n")
def test_table_6_exact_solution_row_mean_and_sd(model, subsidy): """Replicate the first two rows of Table 6 in Keane and Wolpin (1994). In more detail, the mean effects and the standard deviations of a 500, 1000, and 2000 dollar tuition subsidy on years of schooling and of experience in occupation a and occupation b based on 40 samples of 100 individuals using true parameters are tested. """ params, options = rp.get_example_model(model, with_data=False) options["simulation_agents"] = 4000 simulate = rp.get_simulate_func(params, options) df_wo_ts = simulate(params) params.loc[("nonpec_edu", "at_least_twelve_exp_edu"), "value"] += subsidy df_w_ts = simulate(params) columns = [ "Bootstrap_Sample", "Experience_Edu", "Experience_A", "Experience_B" ] # Calculate the statistics based on 40 bootstrap samples á 100 individuals. # Assign bootstrap sample number. for df in [df_wo_ts, df_w_ts]: df["Bootstrap_Sample"] = pd.cut(df.index.get_level_values(0), bins=40, labels=np.arange(1, 41)) # Calculate mean experiences. mean_exp_wo_ts = (df_wo_ts.query("Period == 39")[columns].groupby( "Bootstrap_Sample").mean()) mean_exp_w_ts = (df_w_ts.query("Period == 39")[columns].groupby( "Bootstrap_Sample").mean()) # Calculate bootstrap statistics. diff = (mean_exp_w_ts.subtract(mean_exp_wo_ts).assign( Data=model).reset_index().set_index(["Data", "Bootstrap_Sample" ]).stack().unstack([0, 2])) rp_replication = diff.agg(["mean", "std"]) # Expected values are taken from Table 6 in the paper. kw_94_table_6 = pd.read_csv(TEST_RESOURCES_DIR / "kw_94_table_6.csv", index_col=0, header=[0, 1], nrows=2) # Test that standard deviations are very close. np.testing.assert_allclose(rp_replication[model].iloc[1], kw_94_table_6[model].iloc[1], atol=0.05) # Test that difference lies within one standard deviation. diff = (rp_replication[model].iloc[0].to_numpy() - kw_94_table_6[model].iloc[0].to_numpy()) assert (np.abs(diff) < kw_94_table_6[model].iloc[1]).all()
def process_model_or_seed(model_or_seed, **kwargs): if isinstance(model_or_seed, str): params, options = rp.get_example_model(model_or_seed, with_data=False) else: np.random.seed(model_or_seed) params, options = generate_random_model(**kwargs) if "kw_97" in str(model_or_seed): options["n_periods"] = 10 return params, options
def scaling_model_specification(base_model, num_periods=None, add_occ=None, add_types=None): params, options = rp.get_example_model(base_model, with_data=False) if num_periods is not None: options = _modify_periods(options, num_periods) if add_occ is not None: params, options = _add_occupations(params, options, add_occ) if add_types is not None: params = _add_types(params, add_types) return params, options
def test_dense_period_choice(): params, options = rp.get_example_model("kw_94_one", with_data=False) options["negative_choice_set"] = {} options["negative_choice_set"]["b"] = ["period < 5"] optim_paras, options = process_params_and_options(params, options) state_space = create_state_space_class(optim_paras, options) check = _create_dense_period_choice(state_space.core, state_space.dense, state_space.core_key_to_core_indices, state_space.core_key_to_complex, optim_paras, options) for key in check: if key[0] < 5: assert ~key[1][1]
def get_quantity_of_interest(sample): # We need the baseline options and a grid for the indices. It does not matter which of the # three KW94 specifications we use here. base_params, base_options = rp.get_example_model("kw_94_one", with_data=False) index = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv", sep=",")["parameter"].values sample = pd.Series(data=sample, index=index) param_sample = transform_params_kw94_respy(sample) param_sample = pd.DataFrame(param_sample, columns=["value"]) policy_edu, _ = model_wrapper_kw_94(param_sample, base_options, 500.0) base_edu, _ = model_wrapper_kw_94(param_sample, base_options, 0.0) return policy_edu - base_edu
def process_model_or_seed(model_or_seed=None, **kwargs): if isinstance(model_or_seed, str): params, options = rp.get_example_model(model_or_seed, with_data=False) elif isinstance(model_or_seed, int): np.random.seed(model_or_seed) params, options = generate_random_model(**kwargs) else: raise ValueError if "kw_94" in str(model_or_seed): options["n_periods"] = 10 if "kw_97" in str(model_or_seed): options["n_periods"] = 5 elif "kw_2000" in str(model_or_seed): options["n_periods"] = 3 return params, options
def test_transform_datasets(): """ Test whether the transformations work for the baseline parameterization. """ for count, dataset in enumerate(["one", "two"]): par_name = f"kw_94_{dataset}" csv_name = f"{INPUT_DIR}/table4{count + 1}_kw_94.csv" par_respy, _ = rp.get_example_model(par_name, with_data=False) par_respy = par_respy["value"].to_numpy() df = pd.read_csv(csv_name, sep=",") par_uq = pd.Series(data=df["true"].values, index=df["parameter"].values) par_uq = transform_params_kw94_respy(par_uq).to_numpy() # TODO: For some reason this test fails for the third dataset. This needs to be further # investigated later. if dataset != 'three': np.testing.assert_almost_equal(par_respy, par_uq)
def run(args): # We need to take stock for baseline parameters and store them for future processing. base_params, base_options = rp.get_example_model("kw_94_one", with_data=False) policy_edu, _ = model_wrapper_kw_94(base_params, base_options, 500) base_edu, _ = model_wrapper_kw_94(base_params, base_options, 0) base_quantity = policy_edu - base_edu base_quantity = pd.DataFrame(base_quantity, columns=['avg_schooling'], index=[0]) base_quantity.to_pickle(RSLT_DIR / "base_quantity.uq.pkl") base_params.to_pickle(RSLT_DIR / "base_params.uq.pkl") # We need to set up the covariance matrix and the estimated parameters from the paper. df = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv", sep=",") mean, cov = df["true"].values, np.diag((df["sd"] ** 2).values) # We are ready to draw the random points of evaluation. np.random.seed(args.seed) distribution = cp.MvNormal(loc=mean, scale=cov) samples = list() for _ in range(args.num_draws): samples.append(distribution.sample()) quantities = mp.Pool(args.num_procs).map(get_quantity_of_interest, samples) # We now store the random parameters and the quantity of interest for further processing. index = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv", sep=",")["parameter"].values params = list() for sample in samples: sample = pd.Series(data=sample, index=index) param_sample = pd.DataFrame(transform_params_kw94_respy(sample), columns=["value"]) params.append(param_sample) mc_params = pd.concat(params, keys=range(args.num_draws), names=['iteration']) mc_quantities = pd.DataFrame(quantities, columns=['avg_schooling'], index=range(args.num_draws)) mc_quantities.index.name = 'iteration' mc_quantities.to_pickle("mc_quantity.uq.pkl") mc_params.to_pickle("mc_params.uq.pkl")
def process_model_or_seed(model_or_seed=None, **kwargs): if isinstance(model_or_seed, str): params, options = rp.get_example_model(model_or_seed, with_data=False) elif isinstance(model_or_seed, int): np.random.seed(model_or_seed) params, options = generate_random_model(**kwargs) else: raise ValueError if "kw_94" in str(model_or_seed): options["n_periods"] = 10 elif "kw_97" in str(model_or_seed): options["n_periods"] = 5 elif "kw_2000" in str(model_or_seed): options["n_periods"] = 3 elif "robinson_crusoe_extended" in str(model_or_seed): options["n_periods"] = 5 elif "robinson_crusoe_with_observed_characteristics" in str(model_or_seed): options["n_periods"] = 5 return params, options
def test_distribution_of_lagged_choices(): params, options, actual_df = rp.get_example_model("kw_97_extended") options["n_periods"] = 1 options["simulated_agents"] = 10_000 simulate = rp.get_simulate_func(params, options) df = simulate(params) actual_df = actual_df.query("Period == 0") expected = pd.crosstab(actual_df.Lagged_Choice_1, actual_df.Experience_School, normalize="columns") df = df.query("Period == 0") calculated = pd.crosstab(df.Lagged_Choice_1, df.Experience_School, normalize="columns") # Allow for 4% differences which likely for small subsets. np.testing.assert_allclose(expected, calculated, atol=0.04)
def generate_data(model, present_bias=1): """Generate and save simulated data from specified model, with specified present-bias parameter. Parameters ---------- model: string "kw_94_one", "kw_94_two", "kw_94_three" according to the desired Keane and Wolpin (1994) specification. present bias: float 1 for exponential discounting, < 1 for hyperbolic discounting. """ params, options = rp.get_example_model(model, with_data=False) params.loc[("beta", "beta"), ["value", "comment"]] = [ present_bias, "present-bias parameter", ] simulation_seeds = np.linspace(0, 99, 100) solution_seeds = np.linspace(1000, 1099, 100) # Generate datasets for simulation, solution in zip(simulation_seeds, solution_seeds): options["simulation_seed"] = int(simulation) options["solution_seed"] = int(solution) simulate = rp.get_simulate_func(params, options) df = simulate(params) # Save datasets (require paths to exist) if present_bias == 1: df.to_pickle( f"respy_datasets/exp_datasets/{model}/seed_sim_{str(int(simulation))}_sol_seed_{str(int(solution))}.pickle" ) else: df.to_pickle( f"respy_datasets/hyp_datasets/{model}/seed_sim_{str(int(simulation))}_sol_seed_{str(int(solution))}.pickle" )
def test_replication_of_choice_probabilities(model, table): """Replicate choice probabilities in Tables 2.1-2.3. in Keane and Wolpin (1994b). For each of the three parameterizations a data set is simulated and the choice probabilities for each period are compared to the numbers in the paper. """ # Get choice probabilities from paper. expected = pd.read_csv(TEST_RESOURCES_DIR / table, index_col="period") # Simulate data for choice probabilities with more individuals to stabilize choice # probabilities. Also, more draws in the solution for better approximation of EMAX. params, options = rp.get_example_model(model, with_data=False) options["simulated_agents"] = 10_000 simulate = rp.get_simulate_func(params, options) df = simulate(params) result = (df.groupby("Period").Choice.value_counts( normalize=True).unstack().fillna(0)) np.testing.assert_allclose(expected, result, atol=0.1)
def task_get_history_delta_wage_moments(produces): np.random.seed(123) params, options, data_stored = rp.get_example_model("kw_94_one") params.loc[("delta", "delta")] model_to_simulate = rp.get_simulate_func(params, options) parameter_true = {"delta_delta": 0.95} pseudo_observed_data = compute_model( parameter_true, model_to_simulate=model_to_simulate, parameter_for_simulation=params, options_for_simulation=options, descriptives="wage_moments", ) population_size = 500 max_nr_populations = 10 minimum_epsilon = 0.05 delta_prior_low = 0.9 delta_prior_length = 0.09 parameters_prior = { "delta_delta": [[delta_prior_low, delta_prior_length], "uniform"] } history = respyabc( model=compute_model, parameters_prior=parameters_prior, data=pseudo_observed_data, distance_abc=compute_mean_squared_distance, descriptives="wage_moments", population_size_abc=population_size, max_nr_populations_abc=max_nr_populations, minimum_epsilon_abc=minimum_epsilon, ) with open(produces, "wb") as out_file: pickle.dump(history, out_file)
df_occ = df_occ[~cond] df_occ = df_occ.unstack() return df_occ def calc_wage_distribution_overall(df): """Compute choice frequencies.""" df_ove = df.groupby(["Period"])["Wage"].describe()[["mean", "std"]] df_ove["Choice"] = "all" df_ove.set_index(["Choice"], append=True, inplace=True) df_ove = df_ove.reorder_levels(["Period", "Choice"]) df_ove = df_ove.unstack() return df_ove params, options, df_emp = rp.get_example_model("kw_97_extended_respy") # We want to reduce the computational burden for debugging purposes and our continuous # integration pipeline. if IS_DEBUG: options["n_periods"] = 12 simulate_func = rp.get_simulate_func(params, options) df_sim = simulate_func(params) df_descriptives = None for label, df in [("empirical", df_emp), ("simulated", df_sim)]: df_occ = calc_wage_distribution_occupation(df) df_ove = calc_wage_distribution_overall(df)
def test_table_6_exact_solution_row_mean_and_sd(): """Replicate the first two rows of Table 6 in Keane and Wolpin (1994). In more detail, the mean effects and the standard deviations of a 500, 1000, and 2000 dollar tuition subsidy on years of schooling and of experience in occupation a and occupation b based on 40 samples of 100 individuals using true parameters are tested. """ # Specify the three different data sets. models = np.repeat(["one", "two", "three"], 2) tuition_subsidies = [0, 500, 0, 1000, 0, 2000] # Generate the 3 * 2 data sets as list of DataFrames by simulating with respective # tuition subsidy. data_frames = [] for model, subsidy in zip(models, tuition_subsidies): params, options = rp.get_example_model(f"kw_94_{model}", with_data=False) options["simulation_agents"] = 4000 simulate = rp.get_simulate_func(params, options) params.loc[("nonpec_edu", "at_least_twelve_exp_edu"), "value"] += subsidy data_frames.append(simulate(params)) columns = [ "Bootstrap_Sample", "Experience_Edu", "Experience_A", "Experience_B" ] # Calculate the statistics based on 40 bootstrap samples á 100 individuals. bootstrapped_statistics = [] for i, title in zip(range(0, 6, 2), ["kw_94_one", "kw_94_two", "kw_94_three"]): # Select sample with and without tuition subsidy. df_wo_ts = data_frames[i] df_w_ts = data_frames[i + 1] # Assign bootstrap sample number. df_wo_ts["Bootstrap_Sample"] = pd.cut(df_wo_ts.Identifier, bins=40, labels=np.arange(1, 41)) df_w_ts["Bootstrap_Sample"] = pd.cut(df_w_ts.Identifier, bins=40, labels=np.arange(1, 41)) # Calculate mean experiences. mean_exp_wo_ts = ( df_wo_ts.loc[df_wo_ts.Period.eq(39), columns].groupby("Bootstrap_Sample").mean()) mean_exp_w_ts = ( df_w_ts.loc[df_w_ts.Period.eq(39), columns].groupby("Bootstrap_Sample").mean()) # Calculate bootstrap statistics. diff = (mean_exp_w_ts.subtract(mean_exp_wo_ts).assign( Data=title).reset_index().set_index(["Data", "Bootstrap_Sample" ]).stack().unstack([0, 2])) bootstrapped_statistics.append(diff) rp_replication = pd.concat( [bs.agg(["mean", "std"]) for bs in bootstrapped_statistics], axis=1) # Expected values are taken from csv of table 6. kw_94_table_6 = pd.read_csv(TEST_RESOURCES_DIR / "kw_94_table_6.csv", index_col=0, header=[0, 1], nrows=2) # Test that standard deviations are very close. np.testing.assert_allclose(rp_replication.iloc[1], kw_94_table_6.iloc[1], atol=0.05) # Test that difference lies within one standard deviation. diff = rp_replication.iloc[0].to_numpy() - kw_94_table_6.iloc[0].to_numpy() assert (np.abs(diff) < kw_94_table_6.iloc[1]).all()
import respy as rp # We create a grid for the KW94 additions. params, options, data = rp.get_example_model("kw_94_one") occ_grid_kw94 = params.loc[(f"wage_a", slice(None)), :].copy() occ_grid_kw94.reset_index(inplace=True) occ_grid_kw94["category"] = "wage_aa" occ_grid_kw94.set_index(["category", "name"], inplace=True) occ_grid_kw94.to_pickle("occ_grid_kw_94.pkl") # We create a grid for the KW97 additions. params, options, data = rp.get_example_model("kw_97_extended") labels = list() labels += [ "constant", "exp_school", "exp_white_collar", "exp_white_collar_square" ] labels += ["exp_blue_collar", "exp_blue_collar_squared", "exp_military"] labels += ["type_1", "type_2", "type_3"] occ_grid_kw97 = params.loc[(f"wage_white_collar", labels), :].copy() occ_grid_kw97.reset_index(inplace=True) occ_grid_kw97.loc[:, "category"] = "wage_aa" occ_grid_kw97.set_index(["category", "name"], inplace=True) occ_grid_kw97.to_pickle("occ_grid_kw_97.pkl")
# extract choices choices = df.groupby("Period").Choice.value_counts( normalize=True).unstack() # extract wages (mean and std) wages = df[col_to_keep].groupby("Period").describe().loc[:, ( slice(None), ["mean", "std"])] res = pd.concat([wages, choices], axis=1) return res if __name__ == "__main__": # load params params, options = rp.get_example_model("kw_94_three", with_data=False) options["simulation_agents"] = 10_000 params_dict = { "true": { "delta": 0.95, "beta": 0.8 }, "miss_exp": { "delta": 0.938, "beta": 1 }, "miss_1": { "delta": 0.948, "beta": 0.83 },
import os import pandas as pd import respy as rp params, options, df_obs = rp.get_example_model("kw_97_basic") params = pd.read_pickle("params_revised.pkl") label = ("nonpec_school", "hs_graduate") # We need to save on memory when running the script under CI. if "CI" in os.environ: params, options, df_obs = rp.get_example_model("kw_94_one") label = ("nonpec_edu", "at_least_twelve_exp_edu") simulate_func = rp.get_simulate_func(params, options) df_sim = simulate_func(params) df_sim.to_pickle("df_sim.pkl") params_pol = params.copy() params_pol.loc[label, "value"] += 2000 df_pol = simulate_func(params_pol) df_pol.to_pickle("df_pol.pkl")
"""Auxiliary code for bootsrap.""" import respy as rp from calibration_maximum_likelihood import run_bootstrap NUM_BOOTS = 1000 # Get the basic model setup params_base, options_base, df = rp.get_example_model("robinson", with_data=True) params_base["lower"] = [0.9, 0.00, -0.20, 1.00, 0.0050, 0.001, -0.2] params_base["upper"] = [1.0, 0.10, 0.00, 1.10, 0.0150, 0.030, +0.2] # We will use estimagic and fix all parameters at their true values. constr_base = [ { "loc": ("shocks_sdcorr", "sd_fishing"), "type": "fixed" }, { "loc": ("shocks_sdcorr", "sd_hammock"), "type": "fixed" }, { "loc": ("shocks_sdcorr", "corr_hammock_fishing"), "type": "fixed" }, { "loc": "wage_fishing", "type": "fixed" },
def test_n_step_ahead_simulation_with_data(model): params, options, df = rp.get_example_model(model) options["n_periods"] = 11 simulate = rp.get_simulate_func(params, options, "n_step_ahead_with_data", df) _ = simulate(params)
def test_one_step_ahead_simulation(model): params, options, df = rp.get_example_model(model) options["n_periods"] = 11 simulate = rp.get_simulate_func(params, options, "one_step_ahead", df) _ = simulate(params)
def transform_params_kw94_respy(kw94_params): assert len(kw94_params) == 26, "Length of KW94 vector must be 26." params, _ = rp.get_example_model("kw_94_one", with_data=False) rp_params = pd.Series(data=np.full(len(params["value"].values), np.nan), index=params.index) # Copy values that are not in KW94 from respy paramters. rp_params[("delta", "delta")] = params.loc[("delta", "delta"), "value"] rp_params[("meas_error", "sd_a")] = params.loc[("meas_error", "sd_a"), "value"] rp_params[("meas_error", "sd_b")] = params.loc[("meas_error", "sd_b"), "value"] rp_params[("lagged_choice_1_edu", "edu_ten")] = params.loc[("lagged_choice_1_edu", "edu_ten"), "value"] rp_params[("initial_exp_edu", "10")] = params.loc[("initial_exp_edu", "10"), "value"] rp_params[("maximum_exp", "edu")] = params.loc[("maximum_exp", "edu"), "value"] # Set values that are transformed with *(-1) by respy # square experiences alphas rp_params[("wage_a", "exp_a_square")] = -kw94_params["alpha13"] rp_params[("wage_a", "exp_b_square")] = -kw94_params["alpha15"] rp_params[("wage_b", "exp_b_square")] = -kw94_params["alpha23"] rp_params[("wage_b", "exp_a_square")] = -kw94_params["alpha25"] # betas rp_params[("nonpec_edu", "at_least_twelve_exp_edu")] = -kw94_params["beta1"] rp_params[("nonpec_edu", "not_edu_last_period")] = -kw94_params["beta2"] # Set SDs and Corrs that are Cholesky elements in KW94. chol = np.zeros((4, 4)) np.fill_diagonal(chol, kw94_params[["a11", "a22", "a33", "a44"]]) chol[1, 0] = kw94_params["a21"] chol[2, :2] = [kw94_params["a31"], kw94_params["a32"]] chol[3, :3] = [kw94_params["a41"], kw94_params["a42"], kw94_params["a43"]] cov = np.matmul(chol, chol.T) sd = np.sqrt(np.diag(cov)) rp_params[("shocks_sdcorr", "sd_a")] = sd[0] rp_params[("shocks_sdcorr", "sd_b")] = sd[1] rp_params[("shocks_sdcorr", "sd_edu")] = sd[2] rp_params[("shocks_sdcorr", "sd_home")] = sd[3] rp_params[("shocks_sdcorr", "corr_b_a")] = cov[1, 0] / (sd[1] * sd[0]) rp_params[("shocks_sdcorr", "corr_edu_a")] = cov[2, 0] / (sd[2] * sd[0]) rp_params[("shocks_sdcorr", "corr_edu_b")] = cov[2, 1] / (sd[2] * sd[1]) rp_params[("shocks_sdcorr", "corr_home_a")] = cov[3, 0] / (sd[3] * sd[0]) rp_params[("shocks_sdcorr", "corr_home_b")] = cov[3, 1] / (sd[3] * sd[1]) rp_params[("shocks_sdcorr", "corr_home_edu")] = cov[3, 2] / (sd[3] * sd[2]) # Fill in KW94 paramters that are not transformed. # alphas rp_params[("wage_a", "constant")] = kw94_params["alpha10"] rp_params[("wage_a", "exp_edu")] = kw94_params["alpha11"] rp_params[("wage_a", "exp_a")] = kw94_params["alpha12"] rp_params[("wage_a", "exp_b")] = kw94_params["alpha14"] rp_params[("wage_b", "constant")] = kw94_params["alpha20"] rp_params[("wage_b", "exp_edu")] = kw94_params["alpha21"] # second number behind alpha switched compared to above rp_params[("wage_b", "exp_a")] = kw94_params["alpha24"] rp_params[("wage_b", "exp_b")] = kw94_params["alpha22"] # betas rp_params[("nonpec_edu", "constant")] = kw94_params["beta0"] # gamma rp_params[("nonpec_home", "constant")] = kw94_params["gamma0"] return rp_params
def test_one_step_ahead_simulation(): params, options, df = rp.get_example_model("kw_97_basic") options["n_periods"] = 11 simulate = rp.get_simulate_func(params, options, "one_step_ahead", df) df = simulate(params)
from auxiliary import TAGS if __name__ == "__main__": comm = MPI.Comm.Get_parent() num_slaves, rank = comm.Get_size(), comm.Get_rank() status = MPI.Status() # We need some additional task-specific information. prob_info = comm.bcast(None) subdir = f"subdir_child_{rank}" os.mkdir(subdir) os.chdir(subdir) # We now set up the simulation function of `respy` and receive some task-specific information. params, options, df = rp.get_example_model("kw_94_one") simulate = rp.get_simulate_func(params, options) rslt = list() while True: # Signal readiness comm.send(None, dest=0) # Receive instructions and act accordingly. comm.recv(status=status) tag = status.Get_tag() if tag == TAGS.EXIT: # We set up a container to store the results. df = pd.DataFrame(rslt, columns=["qoi", "delta", "exp_edu"])
import respy as rp import sys import pandas as pd import os if __name__ == '__main__': if not os.path.exists('simulated_data'): os.mkdir('simulated_data') model_name = sys.argv[1] params, options, _ = rp.get_example_model(model_name) options['solution_draws'] = 250 options['simulation_agents'] = 750 state_space, data = rp.simulate(params, options) pd.to_pickle(data, os.path.join("simulated_data", f"{model_name}.pickle"))
import pandas as pd import respy as rp GRID_TAU = [0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001] GRID_AGENTS = [100, 1000, 10000] GRID_DRAWS = [100, 1000, 10000] index = [] for num_agents in GRID_AGENTS: for num_draws in GRID_DRAWS: for tau in GRID_TAU: index.append((num_agents, num_draws, tau)) index = pd.MultiIndex.from_tuples(index, names=("agents", "draws", "tau")) rslts = pd.DataFrame(index=index, columns=["delta"]) params_base, options_base = rp.get_example_model("robinson", False) delta_true = params_base.loc[("delta", "delta"), "value"] for num_agents in GRID_AGENTS: options = options_base.copy() options["estimation_draws"] = num_draws options["solution_draws"] = num_draws for num_draws in GRID_DRAWS: simulate = rp.get_simulate_func(params_base, options) df = simulate(params_base) for tau in GRID_TAU:
def main(): """Run the estimation of a model using a number of threads and a maximum of function evaluations. Currently, we circumvent the optimization by setting maxfun to 0 and just looping over the estimation. """ version = sys.argv[1] model = sys.argv[2] maxfun = int(sys.argv[3]) num_procs = int(sys.argv[4]) num_threads = int(sys.argv[5]) # Test commandline input assert maxfun >= 0, "Maximum number of function evaluations cannot be negative." assert num_threads >= 1 or num_threads == -1, ( "Use -1 to impose no restrictions on maximum number of threads or choose a " "number higher than zero.") # Set number of threads if not num_threads == -1 and version == "python": os.environ["NUMBA_NUM_THREADS"] = f"{num_threads}" os.environ["MKL_NUM_THREADS"] = f"{num_threads}" os.environ["OMP_NUM_THREADS"] = f"{num_threads}" os.environ["NUMEXPR_NUM_THREADS"] = f"{num_threads}" # Late import of respy to ensure that environment variables are read. from respy import RespyCls, get_example_model from respy.python.interface import respy_interface from respy.fortran.interface import resfort_interface # Get model options_spec, params_spec = get_example_model(model) # Adjust options options_spec["program"]["version"] = version options_spec["estimation"]["maxfun"] = 0 if version == "fortran": options_spec["program"]["procs"] = num_procs options_spec["program"]["threads"] = num_threads # Go into temporary folder folder = f"__{num_threads}" if Path(folder).exists(): shutil.rmtree(folder) Path(folder).mkdir() os.chdir(folder) # Initialize the class respy_obj = RespyCls(params_spec, options_spec) # Simulate the data state_space, simulated_data = respy_interface(respy_obj, "simulate") # Run the estimation print( f"Start. Program: {version}, Model: {model}, Maxfun: {maxfun}, Procs: " f"{num_procs}, Threads: {num_threads}.") start = dt.datetime.now() for _ in range(maxfun): if version == "python": respy_interface(respy_obj, "estimate", simulated_data) else: resfort_interface(respy_obj, "estimate", simulated_data) end = dt.datetime.now() print(f"End. Duration: {end - start} seconds.") # Aggregate information output = { "version": version, "model": model, "maxfun": maxfun, "num_procs": num_procs, "num_threads": num_threads, "start": str(start), "end": str(end), "duration": str(end - start), } # Step out of temp folder and delete it os.chdir("..") shutil.rmtree(folder) # Save time to file with open("scalability_results.txt", "a+") as file: file.write(json.dumps(output)) file.write("\n")