Пример #1
0
    def test_9(self):
        """ This test just locks in the evaluation of the criterion function for the
        original Keane & Wolpin data. We create an additional initialization files that
        include numerous types and initial conditions.

        """
        # This ensures that the experience effect is taken care of properly.
        open(".restud.respy.scratch", "w").close()

        kw_spec, result = random.choice([
            ("kw_data_one", 10.45950941513551),
            ("kw_data_two", 45.04552402391903),
            ("kw_data_three", 74.28253652773714),
            ("kw_data_one_types", 9.098738585839529),
            ("kw_data_one_initial", 7.965979149372883),
        ])

        base_path = TEST_RESOURCES_DIR / kw_spec

        # Evaluate criterion function at true values.
        respy_obj = RespyCls(base_path.with_suffix(".csv"),
                             base_path.with_suffix(".json"))

        respy_obj.unlock()
        respy_obj.set_attr("maxfun", 0)
        respy_obj.lock()

        simulate_observed(respy_obj, is_missings=False)

        _, val = respy_obj.fit()
        np.testing.assert_allclose(val, result)
Пример #2
0
    def test_6(self):
        """ Test short estimation tasks.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "estimation": {
                "maxfun": np.random.randint(0, 5),
                "agents": num_agents
            },
        }

        # Simulate a dataset

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)

        write_interpolation_grid(respy_obj)

        # Run estimation task.
        simulate_observed(respy_obj)
        base_x, base_val = respy_obj.fit()

        # We also check whether updating the class instance and a single evaluation of
        # the criterion function give the same result.
        respy_obj.update_optim_paras(base_x)
        respy_obj.attr["maxfun"] = 0

        alt_x, alt_val = respy_obj.fit()

        for arg in [(alt_val, base_val), (alt_x, base_x)]:
            np.testing.assert_almost_equal(arg[0], arg[1])
Пример #3
0
    def test_4(self):
        """ Test the evaluation of the criterion function for random requests, not just
        at the true values.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "start": [7],
                "max": 15,
                "share": [1.0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
        }

        # Simulate a dataset
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        # Evaluate at different points, ensuring that the simulated dataset still fits.
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj.fit()
Пример #4
0
    def test_2(self):
        """Ensure that the evaluation of the criterion is equal across versions."""
        max_draws = np.random.randint(10, 100)

        # It seems to be important that max_draws and max_agents is the same
        # number because otherwise some functions that read draws from a file
        # to ensure compatibility of fortran and python versions won't work.
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]

        # Simulate a dataset
        simulate_observed(respy_obj)

        # Iterate over alternative implementations
        base_x, base_val = None, None

        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)

        for version in ["python", "fortran"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            x, val = respy_obj.fit()

            # Check for the returned parameters.
            if base_x is None:
                base_x = x
            np.testing.assert_allclose(base_x, x)

            # Check for the value of the criterion function.
            if base_val is None:
                base_val = val
            np.testing.assert_allclose(base_val, val)
def create_single(idx):
    """ This function creates a single test.
    """
    dirname = get_random_dirname(5)
    os.mkdir(dirname)
    os.chdir(dirname)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    # We impose a couple of constraints that make the requests manageable.
    np.random.seed(idx)

    version = np.random.choice(["python", "fortran"])

    # only choose from constraint optimizers because we always have some bounds
    if version == "python":
        optimizer = "SCIPY-LBFGSB"
    else:
        optimizer = "FORTE-BOBYQA"

    constr = {
        "program": {
            "version": version
        },
        "preconditioning": {
            "type": np.random.choice(["identity", "magnitudes"])
        },
        "estimation": {
            "maxfun":
            int(np.random.choice(range(6), p=[0.5, 0.1, 0.1, 0.1, 0.1, 0.1])),
            "optimizer":
            optimizer,
        },
    }
    constr["flag_estimation"] = True

    param_spec, options_spec = generate_random_model(point_constr=constr)
    respy_obj = RespyCls(param_spec, options_spec)
    simulate_observed(respy_obj)
    crit_val = respy_obj.fit()[1]

    # In rare instances, the value of the criterion function might be too large and thus
    # printed as a string. This occurred in the past, when the gradient preconditioning
    # had zero probability observations. We now generate random initialization files
    # with smaller gradient step sizes.
    if not isinstance(crit_val, float):
        raise AssertionError(" ... value of criterion function too large.")

    # Cleanup of temporary directories.from
    os.chdir("../")
    shutil.rmtree(dirname)

    return respy_obj.attr, crit_val
Пример #6
0
    def test_2(self):
        """ This test ensures that the record files are identical.
        """
        # Generate random initialization file. The number of periods is higher than
        # usual as only FORTRAN implementations are used to solve the random request.
        # This ensures that also some cases of interpolation are explored.
        constr = {
            "program": {
                "version": "fortran"
            },
            "num_periods": np.random.randint(3, 10),
            "estimation": {
                "maxfun": 0
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        base_sol_log, base_est_info_log = None, None
        base_est_log = None

        for is_parallel in [False, True]:

            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)

            file_sim = respy_obj.get_attr("file_sim")

            simulate_observed(respy_obj)

            respy_obj.fit()

            # Check for identical records
            fname = file_sim + ".respy.sol"

            if base_sol_log is None:
                base_sol_log = open(fname, "r").read()

            assert open(fname, "r").read() == base_sol_log

            if base_est_info_log is None:
                base_est_info_log = open("est.respy.info", "r").read()
            assert open("est.respy.info", "r").read() == base_est_info_log

            if base_est_log is None:
                base_est_log = open("est.respy.log", "r").readlines()
            compare_est_log(base_est_log)
def check_single(tests, idx):
    """ This function checks a single test from the dictionary.
    """
    # Distribute test information.
    attr, crit_val = tests[idx]

    if not IS_PARALLELISM_OMP or not IS_FORTRAN:
        attr["num_threads"] = 1

    if not IS_PARALLELISM_MPI or not IS_FORTRAN:
        attr["num_procs"] = 1

    if not IS_FORTRAN:
        attr["version"] = "python"

    # In the past we also had the problem that some of the testing machines report
    # selective failures when the regression vault was created on another machine.
    msg = " ... test is known to fail on this machine"
    if "zeus" in socket.gethostname() and idx in []:
        print(msg)
        return None
    if "acropolis" in socket.gethostname() and idx in []:
        print(msg)
        return None
    if "pontos" in socket.gethostname() and idx in []:
        print(msg)
        return None

    # We need to create an temporary directory, so the multiprocessing does not
    # interfere with any of the files that are printed and used during the small
    # estimation request.
    dirname = get_random_dirname(5)
    os.mkdir(dirname)
    os.chdir(dirname)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    params_spec = _params_spec_from_attributes(attr)
    options_spec = _options_spec_from_attributes(attr)
    respy_obj = RespyCls(params_spec, options_spec)

    simulate_observed(respy_obj)

    est_val = respy_obj.fit()[1]

    is_success = np.isclose(est_val, crit_val, rtol=TOL, atol=TOL)

    # Cleanup of temporary directories.from
    os.chdir("../")
    shutil.rmtree(dirname)

    return is_success
Пример #8
0
    def test_3(self):
        """ Testing whether the a simulated dataset and the evaluation of the criterion function
        are the same for a tiny delta and a myopic agent.
        """
        constr = {"estimation": {"maxfun": 0}}
        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          myopic=True)
        respy_obj = RespyCls(params_spec, options_spec)

        optim_paras, num_agents_sim, edu_spec = dist_class_attributes(
            respy_obj, "optim_paras", "num_agents_sim", "edu_spec")

        write_types(optim_paras["type_shares"], num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        # Iterate over alternative discount rates.
        base_data, base_val = None, None

        for delta in [0.00, 0.000001]:

            respy_obj = RespyCls(params_spec, options_spec)

            respy_obj.unlock()

            respy_obj.attr["optim_paras"]["delta"] = np.array([delta])

            respy_obj.lock()

            simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val,
                                       crit_val,
                                       rtol=1e-03,
                                       atol=1e-03)
Пример #9
0
    def test_2(self):
        """ If there is no random variation in rewards then the number of draws to simulate the
        expected future value should have no effect.
        """
        params_spec, options_spec = generate_random_model(deterministic=True)

        # Initialize auxiliary objects
        base = None

        for _ in range(2):
            num_draws_emax = np.random.randint(1, 100)
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj.unlock()
            respy_obj.set_attr("num_draws_emax", num_draws_emax)
            respy_obj.lock()
            respy_obj = simulate_observed(respy_obj)
            periods_emax = respy_obj.get_attr("periods_emax")

            if base is None:
                base = periods_emax.copy()

            diff = np.max(
                abs(
                    np.ma.masked_invalid(base) -
                    np.ma.masked_invalid(periods_emax)))
            np.testing.assert_almost_equal(diff, 0.0)
Пример #10
0
    def test_2(self):
        """ This test compares the results from a solution using the interpolation code
        for the special case where the number of interpolation points is exactly the
        number of states in the final period. In this case the interpolation code is run
        and then all predicted values replaced with their actual values.
        """
        # Set initial constraints
        # Set initial constraints
        constr = {"interpolation": {"flag": False}}

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):
            # Process and solve
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax")

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration
            options_spec["interpolation"]["points"] = max(states_number_period)
            options_spec["interpolation"]["flag"] = True
Пример #11
0
    def test_5(self):
        """ Test the scripts.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        for _ in range(10):
            num_agents = np.random.randint(5, 100)
            constr = {
                "simulation": {
                    "agents": num_agents
                },
                "num_periods": np.random.randint(1, 4),
                "edu_spec": {
                    "start": [7],
                    "max": 15,
                    "share": [1.0]
                },
                "estimation": {
                    "maxfun": 0,
                    "agents": num_agents
                },
            }
            # Simulate a dataset
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)

            # Create output to process a baseline.
            respy_obj.unlock()
            respy_obj.set_attr("maxfun", 0)
            respy_obj.lock()

            respy_obj.fit()

            # Potentially evaluate at different points.
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)

            single = np.random.choice([True, False])

            scripts_check("estimate", respy_obj)
            scripts_estimate(single, respy_obj)
Пример #12
0
def test_single_regression(regression_vault, index):
    """Run a single regression test."""
    attr, crit_val = regression_vault[index]

    if not IS_PARALLELISM_OMP or not IS_FORTRAN:
        attr["num_threads"] = 1

    if not IS_PARALLELISM_MPI or not IS_FORTRAN:
        attr["num_procs"] = 1

    if not IS_FORTRAN:
        attr["version"] = "python"
        if attr["optimizer_used"] not in OPT_EST_PYTH:
            attr["optimizer_used"] = OPT_EST_PYTH[2]

    params_spec = _params_spec_from_attributes(attr)
    options_spec = _options_spec_from_attributes(attr)
    respy_obj = RespyCls(params_spec, options_spec)

    simulate_observed(respy_obj)

    est_val = respy_obj.fit()[1]

    assert np.isclose(est_val, crit_val, rtol=TOL, atol=TOL)
Пример #13
0
    def test_10(self):
        """ Function that calculates the number of observations by individual.
        """
        for _ in range(2):
            params_spec, options_spec = generate_random_model()
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            num_agents_est = respy_obj.get_attr("num_agents_est")

            data_array = process_dataset(respy_obj).to_numpy()

            py = np.bincount(data_array[:, 0].astype(int))
            f90 = fort_debug.wrapper_get_num_obs_agent(data_array,
                                                       num_agents_est)

            assert_almost_equal(py, f90)
Пример #14
0
def run(hours):

    start, timeout = datetime.now(), timedelta(hours=hours)

    count = 0
    while True:
        print("COUNT", count)
        count += 1
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50),
                "optimizer": "FORT-BOBYQA",
            },
        }
        params_spec, options_spec = generate_random_model(point_constr=constr)

        base = None
        for is_parallel in [True, False]:

            if is_parallel is False:
                options_spec["program"]["threads"] = 1
                options_spec["program"]["procs"] = 1
            else:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)

        #  Timeout.
        if timeout < datetime.now() - start:
            break
Пример #15
0
    def test_1(self):
        """Ensure that it makes no difference whether the
        criterion function is evaluated in parallel or not.
        """
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50)
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        # If delta is a not fixed, we need to ensure a bound-constraint optimizer.
        # However, this is not the standard flag_estimation as the number of function
        # evaluation is possibly much larger to detect and differences in the updates of
        # the optimizer steps depending on the implementation.
        if params_spec.loc[("delta", "delta"), "fixed"] is False:
            options_spec["estimation"]["optimizer"] = "FORT-BOBYQA"

        base = None
        for is_parallel in [True, False]:
            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)
Пример #16
0
    def test_1(self):
        """ This is the special case where the EMAX better be equal to the MAXE.
        """
        # Set initial constraints
        constr = {
            "interpolation": {
                "flag": False
            },
            "num_periods": np.random.randint(3, 6),
        }

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)

        baseline = None

        a = []

        # Solve with and without interpolation code
        for _ in range(2):
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax, state_space = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax",
                "state_space")

            a.append(state_space)

            # Store and check results
            if baseline is None:
                baseline = periods_emax.copy()
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration. This ensures that there is at least one
            # interpolation taking place.
            options_spec["interpolation"]["points"] = max(
                states_number_period) - 1
            options_spec["interpolation"]["flag"] = True
Пример #17
0
    def test_1(self):
        """Compare simulation results from the RESTUD program and the RESPY package."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        # Indicate RESTUD code the special case of zero disturbance.
        open(".restud.testing.scratch", "a").close()

        # We need to indicate to the RESFORT code to rescale the experience covariates.
        open(".restud.respy.scratch", "a").close()

        # Perform toolbox actions
        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_sim,
            num_periods,
            num_draws_emax,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_sim",
            "num_periods",
            "num_draws_emax",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        # Solve model using RESTUD code.
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        # We need to ensure for RESPY that the lagged activity variable indicates that
        # the individuals were in school the period before entering the model.
        types = np.random.choice([3], size=num_agents_sim)
        np.savetxt(".initial_lagged.respy.test", types, fmt="%i")

        # Solve model using RESPY package.
        simulate_observed(respy_obj, is_missings=False)

        # Compare the simulated dataset generated by the programs.
        column_labels = []
        column_labels += ["Experience_A", "Experience_B"]
        column_labels += ["Years_Schooling", "Lagged_Choice"]

        py = pd.read_csv(
            "data.respy.dat",
            delim_whitespace=True,
            header=0,
            na_values=".",
            usecols=column_labels,
        ).astype(np.float)

        fort = pd.DataFrame(
            np.array(np.genfromtxt("ftest.txt", missing_values="."), ndmin=2)[:, -4:],
            columns=column_labels,
        ).astype(np.float)

        # The simulated dataset from FORTRAN includes an indicator for the lagged
        # activities.
        py["Lagged_Choice"] = py["Lagged_Choice"].map({1: 0.0, 2: 0.0, 3: 1.0, 4: 0.0})

        assert_frame_equal(py, fort)
Пример #18
0
    def test_10(self):
        """ This test ensures that the order of the initial schooling level specified in
        the initialization files does not matter for the simulation of a dataset and
        subsequent evaluation of the criterion function.

        Warning
        -------
        This test fails if types have the identical intercept as no unique ordering is
        determined than.

        """
        point_constr = {
            "estimation": {
                "maxfun": 0
            },
            # We cannot allow for interpolation as the order of states within each
            # period changes and thus the prediction model is altered even if the same
            # state identifier is used.
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)

        respy_obj = RespyCls(params_spec, options_spec)

        edu_baseline_spec, num_types, num_paras, optim_paras = dist_class_attributes(
            respy_obj, "edu_spec", "num_types", "num_paras", "optim_paras")

        # We want to randomly shuffle the list of initial schooling but need to maintain
        # the order of the shares.
        edu_shuffled_start = np.random.permutation(
            edu_baseline_spec["start"]).tolist()

        edu_shuffled_share, edu_shuffled_lagged = [], []
        for start in edu_shuffled_start:
            idx = edu_baseline_spec["start"].index(start)
            edu_shuffled_lagged += [edu_baseline_spec["lagged"][idx]]
            edu_shuffled_share += [edu_baseline_spec["share"][idx]]

        edu_shuffled_spec = copy.deepcopy(edu_baseline_spec)
        edu_shuffled_spec["lagged"] = edu_shuffled_lagged
        edu_shuffled_spec["start"] = edu_shuffled_start
        edu_shuffled_spec["share"] = edu_shuffled_share

        # We are only looking at a single evaluation as otherwise the reordering affects
        # the optimizer that is trying better parameter values one-by-one. The
        # reordering might also violate the bounds.
        for i in range(53, num_paras):
            optim_paras["paras_bounds"][i] = [None, None]
            optim_paras["paras_fixed"][i] = False

        # We need to ensure that the baseline type is still in the first position.
        types_order = [0] + np.random.permutation(range(1, num_types)).tolist()

        type_shares = []
        for i in range(num_types):
            lower, upper = i * 2, (i + 1) * 2
            type_shares += [optim_paras["type_shares"][lower:upper].tolist()]

        optim_paras_baseline = copy.deepcopy(optim_paras)
        optim_paras_shuffled = copy.deepcopy(optim_paras)

        list_ = [
            optim_paras["type_shifts"][i, :].tolist() for i in types_order
        ]
        optim_paras_shuffled["type_shifts"] = np.array(list_)

        list_ = [type_shares[i] for i in types_order]
        optim_paras_shuffled["type_shares"] = np.array(list_).flatten()

        base_data, base_val = None, None

        k = 0

        for optim_paras in [optim_paras_baseline, optim_paras_shuffled]:
            for edu_spec in [edu_baseline_spec, edu_shuffled_spec]:

                respy_obj.unlock()
                respy_obj.set_attr("edu_spec", edu_spec)
                respy_obj.lock()

                # There is some more work to do to update the coefficients as we
                # distinguish between the economic and optimization version of the
                # parameters.
                x = get_optim_paras(optim_paras, num_paras, "all", True)
                shocks_cholesky, _ = extract_cholesky(x)
                shocks_coeffs = cholesky_to_coeffs(shocks_cholesky)
                x[43:53] = shocks_coeffs
                respy_obj.update_optim_paras(x)

                respy_obj.reset()

                simulate_observed(respy_obj)

                # This part checks the equality of simulated dataset.
                data_frame = pd.read_csv("data.respy.dat",
                                         delim_whitespace=True)

                if base_data is None:
                    base_data = data_frame.copy()

                assert_frame_equal(base_data, data_frame)

                # This part checks the equality of a single function evaluation.
                _, val = respy_obj.fit()
                if base_val is None:
                    base_val = val
                np.testing.assert_almost_equal(base_val, val)

                respy_obj.reset()
                k += 1
Пример #19
0
    def test_3(self):
        """Ensure that the log looks exactly the same for different versions."""
        max_draws = np.random.randint(10, 100)

        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras, file_sim = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras", "file_sim"
        )

        # Iterate over alternative implementations
        base_sol_log, base_est_info, base_est_log = None, None, None
        base_sim_log = None

        type_shares = respy_obj.attr["optim_paras"]["type_shares"]
        num_periods = options_spec["num_periods"]

        edu_spec = options_spec["edu_spec"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        for version in ["fortran", "python"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            simulate_observed(respy_obj)

            # Check for identical logging
            fname = file_sim + ".respy.sol"
            if base_sol_log is None:
                base_sol_log = open(fname, "r").read()
            assert open(fname, "r").read() == base_sol_log

            # Check for identical logging
            fname = file_sim + ".respy.sim"
            if base_sim_log is None:
                base_sim_log = open(fname, "r").read()
            assert open(fname, "r").read() == base_sim_log

            respy_obj.fit()

            if base_est_info is None:
                base_est_info = open("est.respy.info", "r").read()
                assert open("est.respy.info", "r").read() == base_est_info

            if base_est_log is None:
                base_est_log = open("est.respy.log", "r").readlines()
            compare_est_log(base_est_log)
Пример #20
0
    def test_1(self):
        """ Testing the equality of an evaluation of the criterion function for a random
        request.
        """
        # Run evaluation for multiple random requests.
        is_deterministic = np.random.choice([True, False], p=[0.10, 0.9])
        is_interpolated = bool(np.random.choice([True, False], p=[0.10, 0.9]))
        is_myopic = np.random.choice([True, False], p=[0.10, 0.9])
        max_draws = np.random.randint(11, 100)
        num_agents = np.random.randint(10, max_draws)

        bound_constr = {"max_draws": max_draws}
        point_constr = {
            "interpolation": {"flag": is_interpolated},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0, "agents": num_agents},
            "simulation": {"agents": num_agents},
            "num_periods": np.random.randint(1, 5),
        }

        num_types = np.random.randint(2, 5)

        if is_interpolated:
            point_constr["num_periods"] = np.random.randint(3, 5)

        params_spec, options_spec = generate_random_model(
            bound_constr=bound_constr,
            point_constr=point_constr,
            deterministic=is_deterministic,
            myopic=is_myopic,
            num_types=num_types,
        )

        edu_spec = options_spec["edu_spec"]
        num_periods = point_constr["num_periods"]

        # The use of the interpolation routines is a another special case. Constructing
        #  a request that actually involves the use of the interpolation routine is a
        #  little involved as the number of interpolation points needs to be lower than
        #  the actual number of states. And to know the number of states each period, I
        #  need to construct the whole state space.
        if is_interpolated:
            state_space = StateSpace(
                num_periods, num_types, edu_spec["start"], edu_spec["max"]
            )

            max_states_period = state_space.states_per_period.max()

            options_spec["interpolation"]["points"] = np.random.randint(
                10, max_states_period
            )

        # Write out random components and interpolation grid to align the three
        # implementations.
        write_draws(num_periods, max_draws)
        respy_obj = RespyCls(params_spec, options_spec)
        write_interpolation_grid(respy_obj)

        type_shares = respy_obj.attr["optim_paras"]["type_shares"]

        write_types(type_shares, num_agents)
        write_edu_start(edu_spec, num_agents)
        write_lagged_start(num_agents)

        # Clean evaluations based on interpolation grid,
        base_val, base_data = None, None

        for version in ["python", "fortran"]:
            respy_obj = RespyCls(params_spec, options_spec)

            # Modify the version of the program for the different requests.
            respy_obj.unlock()
            respy_obj.set_attr("version", version)
            respy_obj.lock()

            # Solve the model
            respy_obj = simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val, crit_val, rtol=1e-05, atol=1e-06)

            # We know even more for the deterministic case.
            if is_deterministic:
                assert crit_val in [-1.0, 0.0]
Пример #21
0
    def test_4(self):
        """ This test ensures that the scaling matrix is identical between the
        alternative versions.
        """
        max_draws = np.random.randint(11, 300)

        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}
        num_agents = np.random.randint(10, max_draws)

        point_constr = {
            "program": {"version": "python"},
            "estimation": {"maxfun": np.random.randint(1, 6), "agents": num_agents},
            "simulation": {"agents": num_agents},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_base = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_base, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]
        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_interpolation_grid(respy_base)
        write_types(type_shares, num_agents_sim)

        simulate_observed(respy_base)

        base_scaling_matrix = None
        for version in ["fortran", "python"]:
            respy_obj = copy.deepcopy(respy_base)

            # The actual optimizer does not matter for the scaling matrix. We also need
            # to make sure that PYTHON is only called with a single processor.
            if version == "python":
                optimizer_used = "SCIPY-LBFGSB"
                num_procs = 1
            else:
                num_procs = respy_obj.get_attr("num_procs")
                optimizer_used = "FORT-BOBYQA"

            # Create output to process a baseline.
            respy_obj.unlock()
            respy_obj.set_attr("optimizer_used", optimizer_used)
            respy_obj.set_attr("num_procs", num_procs)
            respy_obj.set_attr("version", version)
            respy_obj.set_attr("maxfun", 1)
            respy_obj.lock()

            respy_obj.fit()

            if base_scaling_matrix is None:
                base_scaling_matrix = np.genfromtxt("scaling.respy.out")

            scaling_matrix = np.genfromtxt("scaling.respy.out")
            assert_almost_equal(base_scaling_matrix, scaling_matrix)
Пример #22
0
    def test_8(self):
        """ We ensure that the number of initial conditions does not matter for the
        evaluation of the criterion function if a weight of one is put on the first
        group.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "max": np.random.randint(15, 25, size=1).tolist()[0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        base_val, edu_start_base = (None,
                                    np.random.randint(1, 5,
                                                      size=1).tolist()[0])

        # We need to ensure that the initial lagged activity always has the same
        # distribution.
        edu_lagged_base = np.random.uniform(size=5).tolist()

        for num_edu_start in [1, np.random.choice([2, 3, 4]).tolist()]:

            # We always need to ensure that a weight of one is on the first level of
            # initial schooling.
            options_spec["edu_spec"]["share"] = [
                1.0
            ] + [0.0] * (num_edu_start - 1)
            options_spec["edu_spec"][
                "lagged"] = edu_lagged_base[:num_edu_start]

            # We need to make sure that the baseline level of initial schooling is
            # always included. At the same time we cannot have any duplicates.
            edu_start = np.random.choice(range(1, 10),
                                         size=num_edu_start,
                                         replace=False).tolist()
            if edu_start_base in edu_start:
                edu_start.remove(edu_start_base)
                edu_start.insert(0, edu_start_base)
            else:
                edu_start[0] = edu_start_base

            options_spec["edu_spec"]["start"] = edu_start

            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)
            _, val = respy_obj.fit()
            if base_val is None:
                base_val = val

            np.testing.assert_almost_equal(base_val, val)
Пример #23
0
 def test_1(self):
     """Test if random model specifications can be simulated and processed."""
     params_spec, options_spec = generate_random_model()
     respy_obj = RespyCls(params_spec, options_spec)
     simulate_observed(respy_obj)
     process_dataset(respy_obj)
Пример #24
0
def run(request, is_compile, is_background, is_strict, num_procs):
    """ Run the regression tests.
    """
    if is_compile:
        compile_package(True)

    # We can set up a multiprocessing pool right away.
    mp_pool = mp.Pool(num_procs)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    # Process command line arguments
    is_creation = False
    is_investigation, is_check = False, False
    num_tests, idx = None, None

    if request[0] == "create":
        is_creation, num_tests = True, int(request[1])
    elif request[0] == "check":
        is_check, num_tests = True, int(request[1])
    elif request[0] == "investigate":
        is_investigation, idx = True, int(request[1])
    else:
        raise AssertionError("request in [create, check. investigate]")
    if num_tests is not None:
        assert num_tests > 0
    if idx is not None:
        assert idx >= 0

    if is_investigation:
        fname = TEST_RESOURCES_DIR / "regression_vault.pickle"
        with open(fname, "rb") as p:
            tests = pickle.load(p)

        attr, crit_val = tests[idx]
        params_spec = _params_spec_from_attributes(attr)
        options_spec = _options_spec_from_attributes(attr)

        respy_obj = RespyCls(params_spec, options_spec)

        simulate_observed(respy_obj)

        result = respy_obj.fit()[1]
        np.testing.assert_almost_equal(result, crit_val, decimal=DECIMALS)

    if is_creation:
        # We maintain the separate execution in the case of a single processor for
        # debugging purposes. The error messages are generally much more informative.
        if num_procs == 1:
            tests = []
            for idx in range(num_tests):
                tests += [create_single(idx)]
        else:
            tests = mp_pool.map(create_single, range(num_tests))

        with open(TEST_RESOURCES_DIR / "regression_vault.pickle", "wb") as p:
            pickle.dump(tests, p)
        return

    if is_check:
        fname = TEST_RESOURCES_DIR / "regression_vault.pickle"
        with open(fname, "rb") as p:
            tests = pickle.load(p)

        run_single = partial(check_single, tests)
        indices = list(range(num_tests))

        # We maintain the separate execution in the case of a single processor for
        # debugging purposes. The error messages are generally much more informative.
        if num_procs == 1:
            ret = []
            for index in indices:
                ret += [run_single(index)]
                # We need an early termination if a strict test run is requested.
                if is_strict and (False in ret):
                    break
        else:
            ret = []
            for chunk in get_chunks(indices, num_procs):
                ret += mp_pool.map(run_single, chunk)
                # We need an early termination if a strict test run is requested. So we
                # check whether there are any failures in the last batch.
                if is_strict and (False in ret):
                    break

        # This allows to call this test from another script, that runs other tests as
        # well.
        idx_failures = [i for i, x in enumerate(ret) if x not in [True, None]]
        is_failure = False in ret

        if len(idx_failures) > 0:
            is_failure = True

        if not is_background:
            send_notification(
                "regression", is_failed=is_failure, idx_failures=idx_failures
            )

        return not is_failure
Пример #25
0
    def test_5(self):
        """ This methods ensures that the core functions yield the same results across
        implementations.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        max_states_period = write_interpolation_grid(respy_obj)

        # Extract class attributes
        (
            num_periods,
            edu_spec,
            optim_paras,
            num_draws_emax,
            is_debug,
            is_interpolated,
            num_points_interp,
            is_myopic,
            num_agents_sim,
            num_draws_prob,
            tau,
            seed_sim,
            num_agents_est,
            optimizer_options,
            file_sim,
            num_types,
            num_paras,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "edu_spec",
            "optim_paras",
            "num_draws_emax",
            "is_debug",
            "is_interpolated",
            "num_points_interp",
            "is_myopic",
            "num_agents_sim",
            "num_draws_prob",
            "tau",
            "seed_sim",
            "num_agents_est",
            "optimizer_options",
            "file_sim",
            "num_types",
            "num_paras",
        )

        min_idx = edu_spec["max"] + 1
        shocks_cholesky = optim_paras["shocks_cholesky"]
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_home = optim_paras["coeffs_home"]
        coeffs_edu = optim_paras["coeffs_edu"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        type_spec_shares = optim_paras["type_shares"]
        type_spec_shifts = optim_paras["type_shifts"]

        # Write out random components and interpolation grid to align the three
        # implementations.
        max_draws = max(num_agents_sim, num_draws_emax, num_draws_prob)
        write_types(type_spec_shares, num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_draws(num_periods, max_draws)
        write_lagged_start(num_agents_sim)

        # It is critical that the model is simulated after all files have been written
        # to the disk because they are picked up in the subroutines.
        respy_obj = simulate_observed(respy_obj)

        periods_draws_emax = read_draws(num_periods, num_draws_emax)
        periods_draws_prob = read_draws(num_periods, num_draws_prob)
        periods_draws_sims = read_draws(num_periods, num_agents_sim)

        fort, _ = resfort_interface(respy_obj, "simulate")

        state_space = pyth_solve(
            is_interpolated,
            num_points_interp,
            num_periods,
            is_debug,
            periods_draws_emax,
            edu_spec,
            optim_paras,
            file_sim,
            num_types,
        )

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        py = (
            periods_rewards_systematic,
            state_space.states_per_period,
            mapping_state_idx,
            periods_emax,
            states_all,
        )

        f2py = fort_debug.wrapper_solve(
            is_interpolated,
            num_points_interp,
            num_draws_emax,
            num_periods,
            is_myopic,
            is_debug,
            periods_draws_emax,
            min_idx,
            edu_spec["start"],
            edu_spec["max"],
            coeffs_common,
            coeffs_a,
            coeffs_b,
            coeffs_edu,
            coeffs_home,
            shocks_cholesky,
            delta,
            file_sim,
            max_states_period,
            num_types,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py[0], fort[0])
        assert_allclose(py[1], fort[1])
        assert_allclose(py[2], fort[2])
        assert_allclose(py[3], fort[3])
        assert_allclose(py[4], fort[4])

        assert_allclose(py[0], f2py[0])
        assert_allclose(py[1], f2py[1])
        assert_allclose(py[2], f2py[2])
        assert_allclose(py[3], f2py[3])
        assert_allclose(py[4], f2py[4])

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        simulated_data = pyth_simulate(
            state_space,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec,
            optim_paras,
            is_debug,
        )
        py = simulated_data.copy().fillna(MISSING_FLOAT).values

        data_array = process_dataset(respy_obj).to_numpy()

        # Is is very important to cut the data array down to the size of the estimation
        # sample for the calculation of contributions.
        data_array = py[:num_agents_est * num_periods, :]

        f2py = fort_debug.wrapper_simulate(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            num_periods,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            edu_spec["lagged"],
            optim_paras["coeffs_common"],
            optim_paras["coeffs_a"],
            optim_paras["coeffs_b"],
            shocks_cholesky,
            delta,
            num_types,
            type_spec_shares,
            type_spec_shifts,
            is_debug,
        )
        assert_allclose(py, f2py)

        # We have to cut the simulated data to `num_agents_est` as the Python
        # implementation calculates the likelihood contributions for all agents in the
        # data.
        simulated_data = simulated_data.loc[simulated_data.Identifier.lt(
            num_agents_est)]

        py = pyth_contributions(state_space, simulated_data,
                                periods_draws_prob, tau, optim_paras)

        num_obs_agent = np.bincount(simulated_data.Identifier.to_numpy())

        f2py = fort_debug.wrapper_contributions(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            data_array,
            periods_draws_prob,
            tau,
            num_periods,
            num_draws_prob,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cholesky,
            delta,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py, f2py)

        # Evaluation of criterion function
        x0 = get_optim_paras(optim_paras, num_paras, "all", is_debug)

        py = pyth_criterion(
            x0,
            is_interpolated,
            num_points_interp,
            is_debug,
            simulated_data,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            state_space,
        )

        f2py = fort_debug.wrapper_criterion(
            x0,
            is_interpolated,
            num_draws_emax,
            num_periods,
            num_points_interp,
            is_myopic,
            is_debug,
            data_array,
            num_draws_prob,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            states_all,
            state_space.states_per_period,
            mapping_state_idx,
            max_states_period,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            type_spec_shares,
            type_spec_shifts,
            num_paras,
        )

        assert_allclose(py, f2py)
Пример #26
0
    def test_1(self):
        """ Compare the evaluation of the criterion function for the ambiguity
        optimization and the simulated expected future value between the FORTRAN and
        PYTHON implementations. These tests are set up a separate test case due to the
        large setup cost to construct the ingredients for the interface.
        """
        # Generate constraint periods
        constr = {"program": {"version": "python"}}
        # Generate random initialization file
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj = simulate_observed(respy_obj)

        # Extract class attributes
        (
            state_space,
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
            num_periods,
            num_draws_emax,
            edu_spec,
            optim_paras,
            num_types,
        ) = dist_class_attributes(
            respy_obj,
            "state_space",
            "states_all",
            "mapping_state_idx",
            "periods_rewards_systematic",
            "periods_emax",
            "num_periods",
            "num_draws_emax",
            "edu_spec",
            "optim_paras",
            "num_types",
        )

        # Sample draws
        draws_emax_standard = np.random.multivariate_normal(
            np.zeros(4), np.identity(4), num_draws_emax)
        draws_emax_risk = transform_disturbances(
            draws_emax_standard, np.zeros(4), optim_paras["shocks_cholesky"])

        # Sampling of random period and admissible state index
        period = np.random.choice(range(num_periods))
        k = np.random.choice(range(state_space.states_per_period[period]))

        # Select systematic rewards
        rewards_systematic = periods_rewards_systematic[period, k, :]

        # Evaluation of simulated expected future values. Limit to one individual as the
        # Fortran version.
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)[k]
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[k, :4]
        max_education_period = (state_space.get_attribute_from_period(
            "states", period)[k, 3] >= edu_spec["max"])

        py = construct_emax_risk(
            rewards_period[-2:],
            rewards_period[:4],
            emaxs_period,
            draws_emax_risk,
            optim_paras["delta"],
            max_education_period,
        )

        f90 = fort_debug.wrapper_construct_emax_risk(
            num_periods,
            num_draws_emax,
            period,
            k,
            draws_emax_risk,
            rewards_systematic,
            periods_emax,
            states_all,
            mapping_state_idx,
            edu_spec["start"],
            edu_spec["max"],
            optim_paras["delta"],
            optim_paras["coeffs_common"],
            optim_paras["coeffs_a"],
            optim_paras["coeffs_b"],
            num_types,
        )

        assert_allclose(py, f90)
Пример #27
0
def run_estimation(which):
    """ Run an estimation with the respective release.
    """
    os.chdir(which)

    import numpy as np

    from respy import RespyCls

    from respy.pre_processing.model_processing import write_init_file

    # We need to make sure that the function simulate_observed() is imported from the original
    # package. Otherwise dependencies might not work properly.
    import respy

    sys.path.insert(0, os.path.dirname(respy.__file__) + "/tests")
    from respy.tests.codes.auxiliary import simulate_observed

    init_dict = json.load(open("init_dict.respy.json", "r"))

    # There was a change in the setup for releases after 1.00. This is only required when
    # comparing to v1.0.0.
    if "1.0.0" in sys.executable:
        init_dict["SHOCKS"]["fixed"] = np.array(init_dict["SHOCKS"]["fixed"])

    write_init_file(init_dict)

    respy_obj = RespyCls("test.respy.ini")

    # This flag ensures a clean switch to the synthetic simulation for cases where the
    # simulate_observed() was changed in between releases.
    if os.path.exists("../.simulate_observed.cfg"):
        respy_obj.simulate()
    else:
        simulate_observed(respy_obj)

    # This flag ensures that the change in the truncation of the wage variable has no effect. We
    # simply copy the dataset from the new release to the old.
    if ("2.0.0.dev20" in sys.executable) and ("/new" in os.getcwd()):
        fnames = glob.glob("data.respy.*")
        for fname in fnames:
            shutil.copy("../old/" + fname, ".")

    # Moving from 2.0.0.dev17 to 2.0.0.dev18 breaks the equality because the simulated datasets
    # differ. So, we just copy the one from old. However, this is only relevant if 2.0.0.dev18 is
    # the candidate.
    if ("2.0.0.dev18" in sys.executable) and ("/new" in os.getcwd()):
        os.chdir("../old")
        files = glob.glob("data.respy.*")
        for file in files:
            shutil.copy(file, "../new")
        os.chdir("../new")

    _, crit_val = respy_obj.fit()

    # There was a bug in version 1.0 which might lead to crit_val not to actually take the lowest
    # value that was visited by the optimizer. So, we reprocess the log file again to be sure.
    if "1.0.0" in sys.executable:
        crit_val = 1e10
        with open("est.respy.log") as infile:
            for line in infile.readlines():
                list_ = shlex.split(line)
                # Skip empty lines
                if not list_:
                    continue
                # Process candidate value
                if list_[0] == "Criterion":
                    try:
                        value = float(list_[1])
                        if value < crit_val:
                            crit_val = value
                    except ValueError:
                        pass

    pkl.dump(crit_val, open("crit_val.respy.pkl", "wb"))

    os.chdir("../")
Пример #28
0
    def test_6(self):
        """ Further tests for the interpolation routines.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj = simulate_observed(respy_obj)

        # Extract class attributes
        (
            periods_rewards_systematic,
            mapping_state_idx,
            seed_prob,
            periods_emax,
            num_periods,
            states_all,
            num_points_interp,
            edu_spec,
            num_draws_emax,
            is_myopic,
            is_debug,
            is_interpolated,
            optim_paras,
            optimizer_options,
            file_sim,
            num_types,
        ) = dist_class_attributes(
            respy_obj,
            "periods_rewards_systematic",
            "mapping_state_idx",
            "seed_prob",
            "periods_emax",
            "num_periods",
            "states_all",
            "num_points_interp",
            "edu_spec",
            "num_draws_emax",
            "is_myopic",
            "is_debug",
            "is_interpolated",
            "optim_paras",
            "optimizer_options",
            "file_sim",
            "num_types",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        shocks_cov = shocks_cholesky.dot(shocks_cholesky.T)
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        # Add some additional objects required for the interfaces to the functions.
        period = np.random.choice(num_periods)

        periods_draws_emax = create_draws(num_periods, num_draws_emax,
                                          seed_prob, is_debug)

        draws_emax_standard = periods_draws_emax[period, :, :]

        draws_emax_risk = transform_disturbances(draws_emax_standard,
                                                 np.zeros(4), shocks_cholesky)

        # Initialize Python version and solve.
        state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                                 edu_spec["max"], optim_paras)

        # Integrate periods_emax in state_space
        state_space.emaxs = np.column_stack((
            np.zeros((state_space.num_states, 4)),
            periods_emax[~np.isnan(periods_emax)
                         & (periods_emax != MISSING_FLOAT)],
        ))

        # Fill emaxs_a - emaxs_home in the requested period
        states_period = state_space.get_attribute_from_period("states", period)

        # Do not get the emaxs from the previous period if we are in the last one.
        if period != state_space.num_periods - 1:
            state_space.emaxs = get_emaxs_of_subsequent_period(
                states_period, state_space.indexer, state_space.emaxs,
                edu_spec["max"])

        num_states = state_space.states_per_period[period]

        shifts = np.random.randn(4)

        # Slight modification of request which assures that the interpolation code is
        # working.
        num_points_interp = min(num_points_interp, num_states)

        # Get the IS_SIMULATED indicator for the subset of points which are used for the
        # predication model.
        is_simulated = get_simulated_indicator(num_points_interp, num_states,
                                               period, is_debug)

        # Unpack necessary attributes
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[:, :4]
        max_education = (state_space.get_attribute_from_period(
            "states", period)[:, 3] >= edu_spec["max"])

        # Construct the exogenous variables for all points of the state space.
        exogenous, max_emax = get_exogenous_variables(rewards_period,
                                                      emaxs_period, shifts,
                                                      optim_paras["delta"],
                                                      max_education)

        # Align output between Python and Fortran version.
        py = (exogenous, max_emax)

        f90 = fort_debug.wrapper_get_exogenous_variables(
            period,
            num_periods,
            num_states,
            periods_rewards_systematic,
            shifts,
            mapping_state_idx,
            periods_emax,
            states_all,
            edu_spec["start"],
            edu_spec["max"],
            delta,
            coeffs_common,
            coeffs_a,
            coeffs_b,
            num_types,
        )

        assert_almost_equal(py[0], f90[0])
        assert_almost_equal(py[1], f90[1])

        # Construct endogenous variable so that the prediction model can be fitted.
        endogenous = get_endogenous_variable(
            rewards_period,
            emaxs_period,
            max_emax,
            is_simulated,
            draws_emax_risk,
            optim_paras["delta"],
            max_education,
        )

        f90 = fort_debug.wrapper_get_endogenous_variable(
            period,
            num_periods,
            num_states,
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            is_simulated,
            num_draws_emax,
            max_emax,
            draws_emax_risk,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cov,
            delta,
            coeffs_common,
            coeffs_a,
            coeffs_b,
        )
        assert_almost_equal(endogenous, replace_missing_values(f90))

        py = get_predictions(endogenous, exogenous, max_emax, is_simulated)

        f90 = fort_debug.wrapper_get_predictions(
            endogenous,
            exogenous,
            max_emax,
            is_simulated,
            num_points_interp,
            num_states,
            file_sim,
            False,
        )

        # This assertion fails if a column is all zeros.
        if not exogenous.any(axis=0).any():
            assert_array_almost_equal(py, f90)