def test_6(self): """ Test short estimation tasks. """ num_agents = np.random.randint(5, 100) constr = { "simulation": { "agents": num_agents }, "num_periods": np.random.randint(1, 4), "estimation": { "maxfun": np.random.randint(0, 5), "agents": num_agents }, } # Simulate a dataset params_spec, options_spec = generate_random_model(point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) write_interpolation_grid(respy_obj) # Run estimation task. simulate_observed(respy_obj) base_x, base_val = respy_obj.fit() # We also check whether updating the class instance and a single evaluation of # the criterion function give the same result. respy_obj.update_optim_paras(base_x) respy_obj.attr["maxfun"] = 0 alt_x, alt_val = respy_obj.fit() for arg in [(alt_val, base_val), (alt_x, base_x)]: np.testing.assert_almost_equal(arg[0], arg[1])
def test_4(self): """ Test the evaluation of the criterion function for random requests, not just at the true values. """ # Constraints that ensure that two alternative initialization files can be used # for the same simulated data. num_agents = np.random.randint(5, 100) constr = { "simulation": { "agents": num_agents }, "num_periods": np.random.randint(1, 4), "edu_spec": { "start": [7], "max": 15, "share": [1.0] }, "estimation": { "maxfun": 0, "agents": num_agents }, } # Simulate a dataset params_spec, options_spec = generate_random_model(point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) # Evaluate at different points, ensuring that the simulated dataset still fits. params_spec, options_spec = generate_random_model(point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) respy_obj.fit()
def test_2(self): """ This test ensures that the record files are identical. """ # Generate random initialization file. The number of periods is higher than # usual as only FORTRAN implementations are used to solve the random request. # This ensures that also some cases of interpolation are explored. constr = { "program": { "version": "fortran" }, "num_periods": np.random.randint(3, 10), "estimation": { "maxfun": 0 }, } params_spec, options_spec = generate_random_model(point_constr=constr) base_sol_log, base_est_info_log = None, None base_est_log = None for is_parallel in [False, True]: options_spec["program"]["threads"] = 1 options_spec["program"]["procs"] = 1 if is_parallel: if IS_PARALLELISM_OMP: options_spec["program"]["threads"] = np.random.randint( 2, 5) if IS_PARALLELISM_MPI: options_spec["program"]["procs"] = np.random.randint(2, 5) respy_obj = RespyCls(params_spec, options_spec) file_sim = respy_obj.get_attr("file_sim") simulate_observed(respy_obj) respy_obj.fit() # Check for identical records fname = file_sim + ".respy.sol" if base_sol_log is None: base_sol_log = open(fname, "r").read() assert open(fname, "r").read() == base_sol_log if base_est_info_log is None: base_est_info_log = open("est.respy.info", "r").read() assert open("est.respy.info", "r").read() == base_est_info_log if base_est_log is None: base_est_log = open("est.respy.log", "r").readlines() compare_est_log(base_est_log)
def test_9(self): """ This test just locks in the evaluation of the criterion function for the original Keane & Wolpin data. We create an additional initialization files that include numerous types and initial conditions. """ # This ensures that the experience effect is taken care of properly. open(".restud.respy.scratch", "w").close() kw_spec, result = random.choice([ ("kw_data_one", 10.45950941513551), ("kw_data_two", 45.04552402391903), ("kw_data_three", 74.28253652773714), ("kw_data_one_types", 9.098738585839529), ("kw_data_one_initial", 7.965979149372883), ]) base_path = TEST_RESOURCES_DIR / kw_spec # Evaluate criterion function at true values. respy_obj = RespyCls(base_path.with_suffix(".csv"), base_path.with_suffix(".json")) respy_obj.unlock() respy_obj.set_attr("maxfun", 0) respy_obj.lock() simulate_observed(respy_obj, is_missings=False) _, val = respy_obj.fit() np.testing.assert_allclose(val, result)
def test_5(self): """ Test the scripts. """ # Constraints that ensure that two alternative initialization files can be used # for the same simulated data. for _ in range(10): num_agents = np.random.randint(5, 100) constr = { "simulation": { "agents": num_agents }, "num_periods": np.random.randint(1, 4), "edu_spec": { "start": [7], "max": 15, "share": [1.0] }, "estimation": { "maxfun": 0, "agents": num_agents }, } # Simulate a dataset params_spec, options_spec = generate_random_model( point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) # Create output to process a baseline. respy_obj.unlock() respy_obj.set_attr("maxfun", 0) respy_obj.lock() respy_obj.fit() # Potentially evaluate at different points. params_spec, options_spec = generate_random_model( point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) single = np.random.choice([True, False]) scripts_check("estimate", respy_obj) scripts_estimate(single, respy_obj)
def test_2(self): """Ensure that the evaluation of the criterion is equal across versions.""" max_draws = np.random.randint(10, 100) # It seems to be important that max_draws and max_agents is the same # number because otherwise some functions that read draws from a file # to ensure compatibility of fortran and python versions won't work. bound_constr = {"max_draws": max_draws, "max_agents": max_draws} point_constr = { "interpolation": {"flag": False}, "program": {"procs": 1, "threads": 1, "version": "python"}, "estimation": {"maxfun": 0}, } params_spec, options_spec = generate_random_model( point_constr=point_constr, bound_constr=bound_constr ) respy_obj = RespyCls(params_spec, options_spec) num_agents_sim, optim_paras = dist_class_attributes( respy_obj, "num_agents_sim", "optim_paras" ) type_shares = optim_paras["type_shares"] # Simulate a dataset simulate_observed(respy_obj) # Iterate over alternative implementations base_x, base_val = None, None num_periods = options_spec["num_periods"] write_draws(num_periods, max_draws) write_types(type_shares, num_agents_sim) for version in ["python", "fortran"]: respy_obj.unlock() respy_obj.set_attr("version", version) respy_obj.lock() x, val = respy_obj.fit() # Check for the returned parameters. if base_x is None: base_x = x np.testing.assert_allclose(base_x, x) # Check for the value of the criterion function. if base_val is None: base_val = val np.testing.assert_allclose(base_val, val)
def create_single(idx): """ This function creates a single test. """ dirname = get_random_dirname(5) os.mkdir(dirname) os.chdir(dirname) # The late import is required so a potentially just compiled FORTRAN implementation # is recognized. This is important for the creation of the regression vault as we # want to include FORTRAN use cases. from respy import RespyCls # We impose a couple of constraints that make the requests manageable. np.random.seed(idx) version = np.random.choice(["python", "fortran"]) # only choose from constraint optimizers because we always have some bounds if version == "python": optimizer = "SCIPY-LBFGSB" else: optimizer = "FORTE-BOBYQA" constr = { "program": { "version": version }, "preconditioning": { "type": np.random.choice(["identity", "magnitudes"]) }, "estimation": { "maxfun": int(np.random.choice(range(6), p=[0.5, 0.1, 0.1, 0.1, 0.1, 0.1])), "optimizer": optimizer, }, } constr["flag_estimation"] = True param_spec, options_spec = generate_random_model(point_constr=constr) respy_obj = RespyCls(param_spec, options_spec) simulate_observed(respy_obj) crit_val = respy_obj.fit()[1] # In rare instances, the value of the criterion function might be too large and thus # printed as a string. This occurred in the past, when the gradient preconditioning # had zero probability observations. We now generate random initialization files # with smaller gradient step sizes. if not isinstance(crit_val, float): raise AssertionError(" ... value of criterion function too large.") # Cleanup of temporary directories.from os.chdir("../") shutil.rmtree(dirname) return respy_obj.attr, crit_val
def check_single(tests, idx): """ This function checks a single test from the dictionary. """ # Distribute test information. attr, crit_val = tests[idx] if not IS_PARALLELISM_OMP or not IS_FORTRAN: attr["num_threads"] = 1 if not IS_PARALLELISM_MPI or not IS_FORTRAN: attr["num_procs"] = 1 if not IS_FORTRAN: attr["version"] = "python" # In the past we also had the problem that some of the testing machines report # selective failures when the regression vault was created on another machine. msg = " ... test is known to fail on this machine" if "zeus" in socket.gethostname() and idx in []: print(msg) return None if "acropolis" in socket.gethostname() and idx in []: print(msg) return None if "pontos" in socket.gethostname() and idx in []: print(msg) return None # We need to create an temporary directory, so the multiprocessing does not # interfere with any of the files that are printed and used during the small # estimation request. dirname = get_random_dirname(5) os.mkdir(dirname) os.chdir(dirname) # The late import is required so a potentially just compiled FORTRAN implementation # is recognized. This is important for the creation of the regression vault as we # want to include FORTRAN use cases. from respy import RespyCls params_spec = _params_spec_from_attributes(attr) options_spec = _options_spec_from_attributes(attr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) est_val = respy_obj.fit()[1] is_success = np.isclose(est_val, crit_val, rtol=TOL, atol=TOL) # Cleanup of temporary directories.from os.chdir("../") shutil.rmtree(dirname) return is_success
def test_3(self): """ Testing whether the a simulated dataset and the evaluation of the criterion function are the same for a tiny delta and a myopic agent. """ constr = {"estimation": {"maxfun": 0}} params_spec, options_spec = generate_random_model(point_constr=constr, myopic=True) respy_obj = RespyCls(params_spec, options_spec) optim_paras, num_agents_sim, edu_spec = dist_class_attributes( respy_obj, "optim_paras", "num_agents_sim", "edu_spec") write_types(optim_paras["type_shares"], num_agents_sim) write_edu_start(edu_spec, num_agents_sim) write_lagged_start(num_agents_sim) # Iterate over alternative discount rates. base_data, base_val = None, None for delta in [0.00, 0.000001]: respy_obj = RespyCls(params_spec, options_spec) respy_obj.unlock() respy_obj.attr["optim_paras"]["delta"] = np.array([delta]) respy_obj.lock() simulate_observed(respy_obj) # This parts checks the equality of simulated dataset for the different # versions of the code. data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True) if base_data is None: base_data = data_frame.copy() assert_frame_equal(base_data, data_frame) # This part checks the equality of an evaluation of the criterion function. _, crit_val = respy_obj.fit() if base_val is None: base_val = crit_val np.testing.assert_allclose(base_val, crit_val, rtol=1e-03, atol=1e-03)
def run(hours): start, timeout = datetime.now(), timedelta(hours=hours) count = 0 while True: print("COUNT", count) count += 1 # Generate random initialization file constr = { "program": { "version": "fortran" }, "estimation": { "maxfun": np.random.randint(0, 50), "optimizer": "FORT-BOBYQA", }, } params_spec, options_spec = generate_random_model(point_constr=constr) base = None for is_parallel in [True, False]: if is_parallel is False: options_spec["program"]["threads"] = 1 options_spec["program"]["procs"] = 1 else: if IS_PARALLELISM_OMP: options_spec["program"]["threads"] = np.random.randint( 2, 5) if IS_PARALLELISM_MPI: options_spec["program"]["procs"] = np.random.randint(2, 5) respy_obj = RespyCls(params_spec, options_spec) respy_obj = simulate_observed(respy_obj) _, crit_val = respy_obj.fit() if base is None: base = crit_val np.testing.assert_equal(base, crit_val) # Timeout. if timeout < datetime.now() - start: break
def test_1(self): """Ensure that it makes no difference whether the criterion function is evaluated in parallel or not. """ # Generate random initialization file constr = { "program": { "version": "fortran" }, "estimation": { "maxfun": np.random.randint(0, 50) }, } params_spec, options_spec = generate_random_model(point_constr=constr) # If delta is a not fixed, we need to ensure a bound-constraint optimizer. # However, this is not the standard flag_estimation as the number of function # evaluation is possibly much larger to detect and differences in the updates of # the optimizer steps depending on the implementation. if params_spec.loc[("delta", "delta"), "fixed"] is False: options_spec["estimation"]["optimizer"] = "FORT-BOBYQA" base = None for is_parallel in [True, False]: options_spec["program"]["threads"] = 1 options_spec["program"]["procs"] = 1 if is_parallel: if IS_PARALLELISM_OMP: options_spec["program"]["threads"] = np.random.randint( 2, 5) if IS_PARALLELISM_MPI: options_spec["program"]["procs"] = np.random.randint(2, 5) respy_obj = RespyCls(params_spec, options_spec) respy_obj = simulate_observed(respy_obj) _, crit_val = respy_obj.fit() if base is None: base = crit_val np.testing.assert_equal(base, crit_val)
def test_single_regression(regression_vault, index): """Run a single regression test.""" attr, crit_val = regression_vault[index] if not IS_PARALLELISM_OMP or not IS_FORTRAN: attr["num_threads"] = 1 if not IS_PARALLELISM_MPI or not IS_FORTRAN: attr["num_procs"] = 1 if not IS_FORTRAN: attr["version"] = "python" if attr["optimizer_used"] not in OPT_EST_PYTH: attr["optimizer_used"] = OPT_EST_PYTH[2] params_spec = _params_spec_from_attributes(attr) options_spec = _options_spec_from_attributes(attr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) est_val = respy_obj.fit()[1] assert np.isclose(est_val, crit_val, rtol=TOL, atol=TOL)
def run(request, is_compile, is_background, is_strict, num_procs): """ Run the regression tests. """ if is_compile: compile_package(True) # We can set up a multiprocessing pool right away. mp_pool = mp.Pool(num_procs) # The late import is required so a potentially just compiled FORTRAN implementation # is recognized. This is important for the creation of the regression vault as we # want to include FORTRAN use cases. from respy import RespyCls # Process command line arguments is_creation = False is_investigation, is_check = False, False num_tests, idx = None, None if request[0] == "create": is_creation, num_tests = True, int(request[1]) elif request[0] == "check": is_check, num_tests = True, int(request[1]) elif request[0] == "investigate": is_investigation, idx = True, int(request[1]) else: raise AssertionError("request in [create, check. investigate]") if num_tests is not None: assert num_tests > 0 if idx is not None: assert idx >= 0 if is_investigation: fname = TEST_RESOURCES_DIR / "regression_vault.pickle" with open(fname, "rb") as p: tests = pickle.load(p) attr, crit_val = tests[idx] params_spec = _params_spec_from_attributes(attr) options_spec = _options_spec_from_attributes(attr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) result = respy_obj.fit()[1] np.testing.assert_almost_equal(result, crit_val, decimal=DECIMALS) if is_creation: # We maintain the separate execution in the case of a single processor for # debugging purposes. The error messages are generally much more informative. if num_procs == 1: tests = [] for idx in range(num_tests): tests += [create_single(idx)] else: tests = mp_pool.map(create_single, range(num_tests)) with open(TEST_RESOURCES_DIR / "regression_vault.pickle", "wb") as p: pickle.dump(tests, p) return if is_check: fname = TEST_RESOURCES_DIR / "regression_vault.pickle" with open(fname, "rb") as p: tests = pickle.load(p) run_single = partial(check_single, tests) indices = list(range(num_tests)) # We maintain the separate execution in the case of a single processor for # debugging purposes. The error messages are generally much more informative. if num_procs == 1: ret = [] for index in indices: ret += [run_single(index)] # We need an early termination if a strict test run is requested. if is_strict and (False in ret): break else: ret = [] for chunk in get_chunks(indices, num_procs): ret += mp_pool.map(run_single, chunk) # We need an early termination if a strict test run is requested. So we # check whether there are any failures in the last batch. if is_strict and (False in ret): break # This allows to call this test from another script, that runs other tests as # well. idx_failures = [i for i, x in enumerate(ret) if x not in [True, None]] is_failure = False in ret if len(idx_failures) > 0: is_failure = True if not is_background: send_notification( "regression", is_failed=is_failure, idx_failures=idx_failures ) return not is_failure
def run_robustness_test(seed, is_investigation): """Run a single robustness test.""" passed = True error_message = None np.random.seed(seed) old_dir = os.getcwd() t = str(time())[-6:] if is_investigation is True: new_dir = join(old_dir, str(seed)) if exists(new_dir): rmtree(new_dir) os.mkdir(new_dir) else: new_dir = join(old_dir, str(seed) + "_" + t) os.mkdir(new_dir) for file in ["career_data.respy.dat", "career_data.respy.pkl"]: copy(join(old_dir, file), join(new_dir, file)) os.chdir(new_dir) # We need to impose some constraints so that the random initialization file does # meet the structure of the empirical dataset. We need to be particularly careful # with the construction of the maximum level of schooling as we need to rule out # that anyone in the estimation sample has a value larger then the specified maximum # value. version = np.random.choice(["python", "fortran"]) if version == "python": max_periods = 3 else: max_periods = 10 num_periods = np.random.randint(1, max_periods) agents = np.random.randint(500, 1372 + 1) edu_start = np.random.choice(range(7, 12)) constr = { "num_periods": num_periods, "edu_spec": { "start": [int(edu_start)], "max": np.random.randint(edu_start + num_periods, 30), }, "estimation": { "file": "career_data.respy.dat", "agents": agents, "maxfun": np.random.randint(1, 5), }, "program": { "version": version }, } if version == "fortran": constr["estimation"]["optimizer"] = "FORT-BOBYQA" if version == "python": constr["estimation"]["optimizer"] = "SCIPY-LBFGSB" params_spec, options_spec = generate_random_model(point_constr=constr) try: respy_obj = RespyCls(params_spec, options_spec) if is_investigation: write_out_model_spec(respy_obj.attr, str(seed)) respy_obj.fit() except: tb = traceback.format_exc() passed = False error_message = str(tb) os.chdir(old_dir) if is_investigation is False: rmtree(new_dir) return passed, error_message
def test_2(self): """Compare results from an evaluation of the criterion function at the initial values.""" args = generate_constraints_dict() params_spec, options_spec = generate_random_model(**args) params_spec, options_spec = adjust_model_spec(params_spec, options_spec) max_draws = args["bound_constr"]["max_draws"] # At this point, the random initialization file does only provide diagonal # covariances. cov_sampled = np.random.uniform(0, 0.01, size=(4, 4)) + np.diag( np.random.uniform(1.0, 1.5, size=4) ) chol = np.linalg.cholesky(cov_sampled) coeffs = chol[np.tril_indices(4)] params_spec.loc["shocks", "para"] = coeffs params_spec.loc["shocks", "upper"] = np.nan params_spec.loc["shocks", "lower"] = np.nan respy_obj = RespyCls(params_spec, options_spec) # This flag aligns the random components between the RESTUD program and RESPY # package. The existence of the file leads to the RESTUD program to write out # the random components. ( optim_paras, edu_spec, num_agents_est, num_periods, num_draws_emax, num_draws_prob, tau, num_agents_sim, ) = dist_class_attributes( respy_obj, "optim_paras", "edu_spec", "num_agents_est", "num_periods", "num_draws_emax", "num_draws_prob", "tau", "num_agents_sim", ) shocks_cholesky = optim_paras["shocks_cholesky"] cov = np.matmul(shocks_cholesky, shocks_cholesky.T) # Simulate sample model using RESTUD code. transform_respy_to_restud_sim( optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov ) open(".restud.testing.scratch", "a").close() cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim") subprocess.check_call(cmd, shell=True) transform_respy_to_restud_est( optim_paras, edu_spec, num_agents_est, num_draws_prob, tau, num_periods, num_draws_emax, cov, ) filenames = ["in.txt", TEST_RESOURCES_DIR / "in_bottom.txt"] with open("in1.txt", "w") as outfile: for fname in filenames: with open(fname) as infile: outfile.write(infile.read()) draws_standard = np.random.multivariate_normal( np.zeros(4), np.identity(4), (num_periods, max_draws) ) with open(".draws.respy.test", "w") as file_: for period in range(num_periods): for i in range(max_draws): fmt = " {0:15.10f} {1:15.10f} {2:15.10f} {3:15.10f}\n" line = fmt.format(*draws_standard[period, i, :]) file_.write(line) # We always need the seed.txt shutil.copy(str(TEST_RESOURCES_DIR / "seed.txt"), "seed.txt") cmd = str(TEST_RESOURCES_BUILD / "kw_dpml4a") subprocess.check_call(cmd, shell=True) Path("seed.txt").unlink() with open("output1.txt", "r") as searchfile: # Search file for strings, trim lines and save as variables for line in searchfile: if "OLD LOGLF=" in line: stat = float(shlex.split(line)[2]) break # Now we also evaluate the criterion function with the RESPY package. restud_sample_to_respy() respy_obj = respy.RespyCls(params_spec, options_spec) respy_obj.attr["file_est"] = "ftest.respy.dat" open(".restud.respy.scratch", "a").close() _, val = respy_obj.fit() Path(".restud.respy.scratch").unlink() # This ensure that the two values are within 1% of the RESPY value. np.testing.assert_allclose( abs(stat), abs(val * num_agents_est), rtol=0.01, atol=0.00 )
def test_10(self): """ This test ensures that the order of the initial schooling level specified in the initialization files does not matter for the simulation of a dataset and subsequent evaluation of the criterion function. Warning ------- This test fails if types have the identical intercept as no unique ordering is determined than. """ point_constr = { "estimation": { "maxfun": 0 }, # We cannot allow for interpolation as the order of states within each # period changes and thus the prediction model is altered even if the same # state identifier is used. "interpolation": { "flag": False }, } params_spec, options_spec = generate_random_model( point_constr=point_constr) respy_obj = RespyCls(params_spec, options_spec) edu_baseline_spec, num_types, num_paras, optim_paras = dist_class_attributes( respy_obj, "edu_spec", "num_types", "num_paras", "optim_paras") # We want to randomly shuffle the list of initial schooling but need to maintain # the order of the shares. edu_shuffled_start = np.random.permutation( edu_baseline_spec["start"]).tolist() edu_shuffled_share, edu_shuffled_lagged = [], [] for start in edu_shuffled_start: idx = edu_baseline_spec["start"].index(start) edu_shuffled_lagged += [edu_baseline_spec["lagged"][idx]] edu_shuffled_share += [edu_baseline_spec["share"][idx]] edu_shuffled_spec = copy.deepcopy(edu_baseline_spec) edu_shuffled_spec["lagged"] = edu_shuffled_lagged edu_shuffled_spec["start"] = edu_shuffled_start edu_shuffled_spec["share"] = edu_shuffled_share # We are only looking at a single evaluation as otherwise the reordering affects # the optimizer that is trying better parameter values one-by-one. The # reordering might also violate the bounds. for i in range(53, num_paras): optim_paras["paras_bounds"][i] = [None, None] optim_paras["paras_fixed"][i] = False # We need to ensure that the baseline type is still in the first position. types_order = [0] + np.random.permutation(range(1, num_types)).tolist() type_shares = [] for i in range(num_types): lower, upper = i * 2, (i + 1) * 2 type_shares += [optim_paras["type_shares"][lower:upper].tolist()] optim_paras_baseline = copy.deepcopy(optim_paras) optim_paras_shuffled = copy.deepcopy(optim_paras) list_ = [ optim_paras["type_shifts"][i, :].tolist() for i in types_order ] optim_paras_shuffled["type_shifts"] = np.array(list_) list_ = [type_shares[i] for i in types_order] optim_paras_shuffled["type_shares"] = np.array(list_).flatten() base_data, base_val = None, None k = 0 for optim_paras in [optim_paras_baseline, optim_paras_shuffled]: for edu_spec in [edu_baseline_spec, edu_shuffled_spec]: respy_obj.unlock() respy_obj.set_attr("edu_spec", edu_spec) respy_obj.lock() # There is some more work to do to update the coefficients as we # distinguish between the economic and optimization version of the # parameters. x = get_optim_paras(optim_paras, num_paras, "all", True) shocks_cholesky, _ = extract_cholesky(x) shocks_coeffs = cholesky_to_coeffs(shocks_cholesky) x[43:53] = shocks_coeffs respy_obj.update_optim_paras(x) respy_obj.reset() simulate_observed(respy_obj) # This part checks the equality of simulated dataset. data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True) if base_data is None: base_data = data_frame.copy() assert_frame_equal(base_data, data_frame) # This part checks the equality of a single function evaluation. _, val = respy_obj.fit() if base_val is None: base_val = val np.testing.assert_almost_equal(base_val, val) respy_obj.reset() k += 1
def test_3(self): """Ensure that the log looks exactly the same for different versions.""" max_draws = np.random.randint(10, 100) bound_constr = {"max_draws": max_draws, "max_agents": max_draws} point_constr = { "interpolation": {"flag": False}, "program": {"procs": 1, "threads": 1, "version": "python"}, "estimation": {"maxfun": 0}, } params_spec, options_spec = generate_random_model( point_constr=point_constr, bound_constr=bound_constr ) respy_obj = RespyCls(params_spec, options_spec) num_agents_sim, optim_paras, file_sim = dist_class_attributes( respy_obj, "num_agents_sim", "optim_paras", "file_sim" ) # Iterate over alternative implementations base_sol_log, base_est_info, base_est_log = None, None, None base_sim_log = None type_shares = respy_obj.attr["optim_paras"]["type_shares"] num_periods = options_spec["num_periods"] edu_spec = options_spec["edu_spec"] write_draws(num_periods, max_draws) write_types(type_shares, num_agents_sim) write_edu_start(edu_spec, num_agents_sim) write_lagged_start(num_agents_sim) for version in ["fortran", "python"]: respy_obj.unlock() respy_obj.set_attr("version", version) respy_obj.lock() simulate_observed(respy_obj) # Check for identical logging fname = file_sim + ".respy.sol" if base_sol_log is None: base_sol_log = open(fname, "r").read() assert open(fname, "r").read() == base_sol_log # Check for identical logging fname = file_sim + ".respy.sim" if base_sim_log is None: base_sim_log = open(fname, "r").read() assert open(fname, "r").read() == base_sim_log respy_obj.fit() if base_est_info is None: base_est_info = open("est.respy.info", "r").read() assert open("est.respy.info", "r").read() == base_est_info if base_est_log is None: base_est_log = open("est.respy.log", "r").readlines() compare_est_log(base_est_log)
def test_8(self): """ We ensure that the number of initial conditions does not matter for the evaluation of the criterion function if a weight of one is put on the first group. """ num_agents = np.random.randint(5, 100) constr = { "simulation": { "agents": num_agents }, "num_periods": np.random.randint(1, 4), "edu_spec": { "max": np.random.randint(15, 25, size=1).tolist()[0] }, "estimation": { "maxfun": 0, "agents": num_agents }, "interpolation": { "flag": False }, } params_spec, options_spec = generate_random_model(point_constr=constr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) base_val, edu_start_base = (None, np.random.randint(1, 5, size=1).tolist()[0]) # We need to ensure that the initial lagged activity always has the same # distribution. edu_lagged_base = np.random.uniform(size=5).tolist() for num_edu_start in [1, np.random.choice([2, 3, 4]).tolist()]: # We always need to ensure that a weight of one is on the first level of # initial schooling. options_spec["edu_spec"]["share"] = [ 1.0 ] + [0.0] * (num_edu_start - 1) options_spec["edu_spec"][ "lagged"] = edu_lagged_base[:num_edu_start] # We need to make sure that the baseline level of initial schooling is # always included. At the same time we cannot have any duplicates. edu_start = np.random.choice(range(1, 10), size=num_edu_start, replace=False).tolist() if edu_start_base in edu_start: edu_start.remove(edu_start_base) edu_start.insert(0, edu_start_base) else: edu_start[0] = edu_start_base options_spec["edu_spec"]["start"] = edu_start respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) _, val = respy_obj.fit() if base_val is None: base_val = val np.testing.assert_almost_equal(base_val, val)
def test_1(self): """ Testing the equality of an evaluation of the criterion function for a random request. """ # Run evaluation for multiple random requests. is_deterministic = np.random.choice([True, False], p=[0.10, 0.9]) is_interpolated = bool(np.random.choice([True, False], p=[0.10, 0.9])) is_myopic = np.random.choice([True, False], p=[0.10, 0.9]) max_draws = np.random.randint(11, 100) num_agents = np.random.randint(10, max_draws) bound_constr = {"max_draws": max_draws} point_constr = { "interpolation": {"flag": is_interpolated}, "program": {"procs": 1, "threads": 1, "version": "python"}, "estimation": {"maxfun": 0, "agents": num_agents}, "simulation": {"agents": num_agents}, "num_periods": np.random.randint(1, 5), } num_types = np.random.randint(2, 5) if is_interpolated: point_constr["num_periods"] = np.random.randint(3, 5) params_spec, options_spec = generate_random_model( bound_constr=bound_constr, point_constr=point_constr, deterministic=is_deterministic, myopic=is_myopic, num_types=num_types, ) edu_spec = options_spec["edu_spec"] num_periods = point_constr["num_periods"] # The use of the interpolation routines is a another special case. Constructing # a request that actually involves the use of the interpolation routine is a # little involved as the number of interpolation points needs to be lower than # the actual number of states. And to know the number of states each period, I # need to construct the whole state space. if is_interpolated: state_space = StateSpace( num_periods, num_types, edu_spec["start"], edu_spec["max"] ) max_states_period = state_space.states_per_period.max() options_spec["interpolation"]["points"] = np.random.randint( 10, max_states_period ) # Write out random components and interpolation grid to align the three # implementations. write_draws(num_periods, max_draws) respy_obj = RespyCls(params_spec, options_spec) write_interpolation_grid(respy_obj) type_shares = respy_obj.attr["optim_paras"]["type_shares"] write_types(type_shares, num_agents) write_edu_start(edu_spec, num_agents) write_lagged_start(num_agents) # Clean evaluations based on interpolation grid, base_val, base_data = None, None for version in ["python", "fortran"]: respy_obj = RespyCls(params_spec, options_spec) # Modify the version of the program for the different requests. respy_obj.unlock() respy_obj.set_attr("version", version) respy_obj.lock() # Solve the model respy_obj = simulate_observed(respy_obj) # This parts checks the equality of simulated dataset for the different # versions of the code. data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True) if base_data is None: base_data = data_frame.copy() assert_frame_equal(base_data, data_frame) # This part checks the equality of an evaluation of the criterion function. _, crit_val = respy_obj.fit() if base_val is None: base_val = crit_val np.testing.assert_allclose(base_val, crit_val, rtol=1e-05, atol=1e-06) # We know even more for the deterministic case. if is_deterministic: assert crit_val in [-1.0, 0.0]
def run_estimation(which): """ Run an estimation with the respective release. """ os.chdir(which) import numpy as np from respy import RespyCls from respy.pre_processing.model_processing import write_init_file # We need to make sure that the function simulate_observed() is imported from the original # package. Otherwise dependencies might not work properly. import respy sys.path.insert(0, os.path.dirname(respy.__file__) + "/tests") from respy.tests.codes.auxiliary import simulate_observed init_dict = json.load(open("init_dict.respy.json", "r")) # There was a change in the setup for releases after 1.00. This is only required when # comparing to v1.0.0. if "1.0.0" in sys.executable: init_dict["SHOCKS"]["fixed"] = np.array(init_dict["SHOCKS"]["fixed"]) write_init_file(init_dict) respy_obj = RespyCls("test.respy.ini") # This flag ensures a clean switch to the synthetic simulation for cases where the # simulate_observed() was changed in between releases. if os.path.exists("../.simulate_observed.cfg"): respy_obj.simulate() else: simulate_observed(respy_obj) # This flag ensures that the change in the truncation of the wage variable has no effect. We # simply copy the dataset from the new release to the old. if ("2.0.0.dev20" in sys.executable) and ("/new" in os.getcwd()): fnames = glob.glob("data.respy.*") for fname in fnames: shutil.copy("../old/" + fname, ".") # Moving from 2.0.0.dev17 to 2.0.0.dev18 breaks the equality because the simulated datasets # differ. So, we just copy the one from old. However, this is only relevant if 2.0.0.dev18 is # the candidate. if ("2.0.0.dev18" in sys.executable) and ("/new" in os.getcwd()): os.chdir("../old") files = glob.glob("data.respy.*") for file in files: shutil.copy(file, "../new") os.chdir("../new") _, crit_val = respy_obj.fit() # There was a bug in version 1.0 which might lead to crit_val not to actually take the lowest # value that was visited by the optimizer. So, we reprocess the log file again to be sure. if "1.0.0" in sys.executable: crit_val = 1e10 with open("est.respy.log") as infile: for line in infile.readlines(): list_ = shlex.split(line) # Skip empty lines if not list_: continue # Process candidate value if list_[0] == "Criterion": try: value = float(list_[1]) if value < crit_val: crit_val = value except ValueError: pass pkl.dump(crit_val, open("crit_val.respy.pkl", "wb")) os.chdir("../")