示例#1
0
    def check_estimation(self):
        """Check model attributes that are only relevant for estimation tasks."""
        # Check that class instance is locked.
        assert self.get_attr("is_locked")

        # Check that no other estimations are currently running in this directory.
        assert not os.path.exists(".estimation.respy.scratch")

        # Distribute class attributes
        (
            optimizer_options,
            optimizer_used,
            optim_paras,
            version,
            maxfun,
            num_paras,
            file_est,
        ) = dist_class_attributes(
            self,
            "optimizer_options",
            "optimizer_used",
            "optim_paras",
            "version",
            "maxfun",
            "num_paras",
            "file_est",
        )

        # Ensure that at least one parameter is free.
        if sum(optim_paras["paras_fixed"]) == num_paras:
            raise UserError("Estimation requires at least one free parameter")

        # Make sure the estimation dataset exists
        if not os.path.exists(file_est):
            raise UserError("Estimation dataset does not exist")

        if maxfun > 0:
            assert optimizer_used in optimizer_options.keys()

            # Make sure the requested optimizer is valid
            if version == "python":
                assert optimizer_used in OPT_EST_PYTH
            elif version == "fortran":
                assert optimizer_used in OPT_EST_FORT
            else:
                raise AssertionError

        return self
示例#2
0
def stop():
    """ This function sends a signal to the package that the estimation is to be stopped
    immediately. It results in a gentle termination.
    """
    if os.path.exists(".estimation.respy.scratch"):
        open(".stop.respy.scratch", "w").close()
    else:
        raise UserError("... no estimation running at this time")
示例#3
0
def dist_input_arguments(parser):
    """ Check input for script.
    """
    # Parse arguments
    args = parser.parse_args()

    # Distribute arguments
    init_file = args.init_file

    # Checks
    if not os.path.exists(init_file):
        raise UserError("Initialization file does not exist")
    if not os.path.exists("est.respy.info"):
        raise UserError(
            "Information on parameter values from last step unavailable")

    # Finishing
    return init_file
示例#4
0
def scripts_update(init_file):
    """ Update model parametrization in initialization file.
    """
    # Collect baseline update
    init_dict = read_init_file(init_file)

    paras_steps = get_est_info()["paras_step"]

    # While sometimes useful, we cannot use this script if there are missing values in
    # the parameters due to too large values.
    if "---" in paras_steps.tolist():
        raise UserError("Missing values in est.respy.info")

    # We need to make sure that the size of the parameter vector does fit the
    # initialization file. For example, this might not be the case when the number of
    # types is changed in the initialization file and an update is requested with an
    # earlier logfile.
    num_types, num_paras = (
        len(init_dict["TYPE SHARES"]["coeffs"]) / 2 + 1,
        len(paras_steps),
    )
    if num_paras != 53 + (num_types - 1) * 6:
        raise UserError("Info does not fit the current model specification")

    optim_paras = distribute_parameters(paras_steps, True)
    shocks_coeffs = paras_steps[43:53]

    # Update initialization dictionary
    init_dict["COMMON"]["coeffs"] = optim_paras["coeffs_common"]
    init_dict["OCCUPATION A"]["coeffs"] = optim_paras["coeffs_a"]
    init_dict["OCCUPATION B"]["coeffs"] = optim_paras["coeffs_b"]
    init_dict["EDUCATION"]["coeffs"] = optim_paras["coeffs_edu"]
    init_dict["HOME"]["coeffs"] = optim_paras["coeffs_home"]
    init_dict["BASICS"]["coeffs"] = optim_paras["delta"]
    init_dict["SHOCKS"]["coeffs"] = shocks_coeffs
    init_dict["TYPE SHARES"]["coeffs"] = optim_paras["type_shares"][2:]
    init_dict["TYPE SHIFTS"]["coeffs"] = optim_paras["type_shifts"].flatten(
    )[4:]

    # We first print to an intermediate file as otherwise the original file is lost in
    # case a problem during printing occurs.
    write_init_file(init_dict, ".model.respy.ini")
    shutil.move(".model.respy.ini", init_file)
示例#5
0
def scripts_check(request, respy_obj):
    """ Wrapper for the estimation.
    """

    # Distribute model parameters
    num_periods, edu_spec, num_types, optim_paras = dist_class_attributes(
        respy_obj, "num_periods", "edu_spec", "num_types", "optim_paras"
    )

    # We need to run additional checks if an estimation is requested.
    if request == "estimate":
        # Create the grid of the admissible states.
        state_space = StateSpace(
            num_periods, num_types, edu_spec["start"], edu_spec["max"], optim_paras
        )

        # We also check the structure of the dataset.
        data_array = process_dataset(respy_obj).to_numpy()
        num_rows = data_array.shape[0]

        for j in range(num_rows):
            period = int(data_array[j, 1])
            # Extract observable components of state space as well as agent decision.
            exp_a, exp_b, edu, choice_lagged = data_array[j, 4:].astype(int)

            # First of all, we need to ensure that all observed years of schooling are
            # larger than the initial condition of the model.
            try:
                np.testing.assert_equal(edu >= 0, True)
            except AssertionError:
                raise UserError(ERR_MSG)

            # Get state indicator to obtain the systematic component of the agents
            # rewards. This might fail either because the state is simply infeasible at
            # any period or just not defined for the particular period requested.
            try:
                k = state_space.indexer[period, exp_a, exp_b, edu, choice_lagged - 1]
                np.testing.assert_equal(k >= 0, True)
            except (IndexError, AssertionError):
                raise UserError(ERR_MSG)

        # We also take a special look at the optimizer options.
        respy_obj.check_estimation()
def dist_input_arguments(parser):
    """ Check input for estimation script.
    """
    # Parse arguments
    args = parser.parse_args()

    # Distribute arguments
    init_file = args.init_file
    file_sim = args.file_sim

    # Check attributes
    if not os.path.exists(init_file):
        raise UserError("Initialization file does not exist")

    # Finishing
    return init_file, file_sim
def get_est_info():
    """Read the parameters from the last step of a previous estimation run."""
    def _process_value(input_, type_):
        try:
            if type_ == "float":
                value = float(input_)
            elif type_ == "int":
                value = int(input_)
        except ValueError:
            value = "---"

        return value

    # We need to make sure that the updating file actually exists.
    if not os.path.exists("est.respy.info"):
        msg = "Parameter update impossible as "
        msg += "file est.respy.info does not exist"
        raise UserError(msg)

    # Initialize container and ensure a fresh start processing the file
    linecache.clearcache()
    rslt = {}

    # Value of the criterion function
    line = shlex.split(linecache.getline("est.respy.info", 6))
    for key_ in ["start", "step", "current"]:
        rslt["value_" + key_] = _process_value(line.pop(0), "float")

    # Total number of evaluations and steps
    line = shlex.split(linecache.getline("est.respy.info", 49))
    rslt["num_step"] = _process_value(line[3], "int")

    line = shlex.split(linecache.getline("est.respy.info", 51))
    rslt["num_eval"] = _process_value(line[3], "int")

    # Parameter values
    for i, key_ in enumerate(["start", "step", "current"]):
        rslt["paras_" + key_] = []
        for j in range(13, 99):
            line = shlex.split(linecache.getline("est.respy.info", j))
            if not line:
                break
            rslt["paras_" + key_] += [_process_value(line[i + 1], "float")]
        rslt["paras_" + key_] = np.array(rslt["paras_" + key_])

    return rslt
示例#8
0
def dist_input_arguments(parser):
    """ Check input for estimation script.
    """
    # Parse arguments
    args = parser.parse_args()

    # Distribute arguments
    init_file = args.init_file
    single = args.single

    # Check attributes
    assert single in [True, False]
    if not os.path.exists(init_file):
        raise UserError("Initialization file does not exist")

    # Finishing
    return single, init_file
def scripts_compare(base_init, is_update):
    """Construct some model fit statistics by comparing the observed and simulated
    dataset."""
    # In case of updating, we create a new initialization file that contains the updated
    # parameter values.
    if is_update:
        init_file = "compare.respy.ini"
        shutil.copy(base_init, init_file)
        scripts_update(init_file)
    else:
        init_file = base_init

    # Read in relevant model specification.
    respy_obj = RespyCls(init_file)
    respy_obj.write_out("compare.respy.ini")

    # Distribute some information for further processing.
    num_periods, num_agents_est, num_agents_sim = dist_class_attributes(
        respy_obj, "num_periods", "num_agents_est", "num_agents_sim")

    # The comparison does make sense when the file of the simulated dataset and
    # estimation dataset are the same. Then the estimation dataset is overwritten by the
    # simulated dataset.
    fname_est = respy_obj.attr["file_est"].split(".")[0]
    fname_sim = respy_obj.attr["file_sim"].split(".")[0]
    if fname_est == fname_sim:
        raise UserError(" Simulation would overwrite estimation dataset")
    data_obs = process_dataset(respy_obj)
    data_sim = respy_obj.simulate()[1]

    if num_periods > 1:
        tf = []
        tf += [construct_transition_matrix(data_obs)]
        tf += [construct_transition_matrix(data_sim)]

    # Distribute class attributes
    max_periods = len(data_obs["Period"].unique())

    # Prepare results
    rslt_initial = _prepare_initial(data_obs, data_sim, num_agents_est,
                                    num_agents_sim)
    rslt_choice, rmse_choice = _prepare_choices(data_obs, data_sim)
    rslt_a = _prepare_wages(data_obs, data_sim, "Occupation A")
    rslt_b = _prepare_wages(data_obs, data_sim, "Occupation B")

    with open("compare.respy.info", "w") as file_:

        file_.write("\n Comparing the Observed and Simulated Economy\n\n")

        file_.write("   Number of Periods:      " + str(max_periods) + "\n\n")

        file_.write("\n   Initial Schooling Shares \n\n")
        fmt_ = "{:>15}" * 3 + "\n"
        labels = ["Level", "Observed", "Simulated"]
        file_.write(fmt_.format(*labels) + "\n")
        for info in rslt_initial:
            info[1:] = [format_float(x) for x in info[1:]]
            file_.write(fmt_.format(*info))

        # Comparing the choice distributions
        file_.write("\n\n   Choices \n\n")
        fmt_ = "{:>15}" * 7 + "\n"
        labels = ["Data", "Period", "Count", "White", "Blue", "School", "Home"]
        file_.write(fmt_.format(*labels) + "\n")
        for period in range(max_periods):
            for name in ["Observed", "Simulated"]:
                line = [name, period + 1] + rslt_choice[name][period]
                fmt_ = "{:>15}" * 3 + "{:15.2f}" * 4 + "\n"
                file_.write(fmt_.format(*line))
            file_.write("\n")
        line = "   Overall RMSE {:14.5f}\n".format(rmse_choice)
        file_.write(line)

        # Comparing the transition matrices
        if num_periods > 1:
            file_.write("\n\n   Transition Matrix \n\n")
            fmt_ = "{:>15}" * 6 + "\n\n"
            labels = ["Work A", "Work B", "School", "Home"]
            file_.write(fmt_.format(*["", ""] + labels))
            for i in range(4):
                for j, source in enumerate(["Observed", "Simulated"]):
                    fmt_ = "{:>15}{:>15}" + "{:15.4f}" * 4 + "\n"
                    line = [source, labels[i]] + tf[j][i, :].tolist()
                    file_.write(fmt_.format(*line))
                file_.write("\n")

        # Comparing the wages distributions
        file_.write("\n   Outcomes \n\n")
        fmt_ = "{:>15}" * 8 + "\n"

        labels = []
        labels += ["Data", "Period", "Count", "Mean", "Std."]
        labels += ["25%", "50%", "75%"]

        file_.write(fmt_.format(*labels) + "\n")
        for rslt, name in [(rslt_a, "Occupation A"), (rslt_b, "Occupation B")]:
            file_.write("\n    " + name + " \n\n")
            for period in range(max_periods):
                for label in ["Observed", "Simulated"]:
                    counts = int(rslt[label][period][0])
                    line = [label, period + 1, counts]
                    # The occurrence of NAN requires special care.
                    stats = rslt[label][period][1:]
                    stats = [format_float(x) for x in stats]
                    file_.write(fmt_.format(*line + stats))
                file_.write("\n")
示例#10
0
def check_model_attributes(attr_dict):
    a = attr_dict

    # Number of parameters
    assert isinstance(a["num_paras"], int)
    assert a["num_paras"] >= 53

    # Parallelism
    assert isinstance(a["num_procs"], int)
    assert a["num_procs"] > 0
    if a["num_procs"] > 1:
        assert a["version"] == "fortran"

    assert isinstance(a["num_procs"], int)
    assert a["num_procs"] > 0
    if a["num_procs"] > 1:
        assert a["version"] == "fortran"
        assert IS_PARALLELISM_MPI

    # Version version of package
    assert a["version"] in ["fortran", "python"]
    if a["version"] == "fortran":
        assert IS_FORTRAN

    assert isinstance(a["num_threads"], int)
    assert a["num_threads"] >= 1
    if a["num_threads"] >= 2:
        assert a["version"] == "fortran"
        assert IS_PARALLELISM_OMP

    # Debug status
    assert a["is_debug"] in [True, False]

    # Forward-looking agents
    assert a["is_myopic"] in [True, False]

    # Seeds
    for seed in [a["seed_emax"], a["seed_sim"], a["seed_prob"]]:
        assert np.isfinite(seed)
        assert isinstance(seed, int)
        assert seed > 0

    # Number of agents
    for num_agents in [a["num_agents_sim"], a["num_agents_est"]]:
        assert np.isfinite(num_agents)
        assert isinstance(num_agents, int)
        assert num_agents > 0

    # Number of periods
    assert np.isfinite(a["num_periods"])
    assert isinstance(a["num_periods"], int)
    assert a["num_periods"] > 0

    # Number of draws for Monte Carlo integration
    assert np.isfinite(a["num_draws_emax"])
    assert isinstance(a["num_draws_emax"], int)
    assert a["num_draws_emax"] >= 0

    # Debugging mode
    assert a["is_debug"] in [True, False]

    # Window for smoothing parameter
    assert isinstance(a["tau"], float)
    assert a["tau"] > 0

    # Interpolation
    assert a["is_interpolated"] in [True, False]
    assert isinstance(a["num_points_interp"], int)
    assert a["num_points_interp"] > 0

    # Simulation of S-ML
    assert isinstance(a["num_draws_prob"], int)
    assert a["num_draws_prob"] > 0

    # Maximum number of iterations
    assert isinstance(a["maxfun"], int)
    assert a["maxfun"] >= 0

    # Optimizers
    assert a["optimizer_used"] in OPT_EST_FORT + OPT_EST_PYTH

    # Scaling
    assert a["precond_spec"]["type"] in ["identity", "gradient", "magnitudes"]
    for key_ in ["minimum", "eps"]:
        assert isinstance(a["precond_spec"][key_], float)
        assert a["precond_spec"][key_] > 0.0

    # Education
    assert isinstance(a["edu_spec"]["max"], int)
    assert a["edu_spec"]["max"] > 0
    assert isinstance(a["edu_spec"]["start"], list)
    assert len(a["edu_spec"]["start"]) == len(set(a["edu_spec"]["start"]))
    assert all(isinstance(item, int) for item in a["edu_spec"]["start"])
    assert all(item > 0 for item in a["edu_spec"]["start"])
    assert all(item <= a["edu_spec"]["max"] for item in a["edu_spec"]["start"])
    assert all(isinstance(item, float) for item in a["edu_spec"]["share"])
    assert all(0 <= item <= 1 for item in a["edu_spec"]["lagged"])
    assert all(0 <= item <= 1 for item in a["edu_spec"]["share"])
    np.testing.assert_almost_equal(np.sum(a["edu_spec"]["share"]),
                                   1.0,
                                   decimal=4)

    # Derivatives
    assert a["derivatives"] in ["forward-differences"]

    # Check model parameters
    check_model_parameters(a["optim_paras"])

    # Check that all parameter values are within the bounds.
    x = get_optim_paras(a["optim_paras"], a["num_paras"], "all", True)

    # It is not clear at this point how to impose parameter constraints on
    # the covariance matrix in a flexible manner. So, either all fixed or
    # none. As a special case, we also allow for all off-diagonal elements
    # to be fixed to zero.
    shocks_coeffs = a["optim_paras"]["shocks_cholesky"][np.tril_indices(4)]
    shocks_fixed = np.array(a["optim_paras"]["paras_fixed"][43:53])

    all_free = not shocks_fixed.any()

    dim = len(a["optim_paras"]["shocks_cholesky"])
    helper = np.zeros((dim, dim))
    helper[np.tril_indices(dim)] = shocks_coeffs
    off_diagonals_zero = np.diag(helper).sum() == helper.sum()

    helper = np.zeros((dim, dim), dtype=bool)
    helper[np.tril_indices(dim)] = shocks_fixed
    off_diagonals_fixed = (helper[np.tril_indices(dim, k=-1)]).all()

    diagonal_matrix = off_diagonals_zero & off_diagonals_fixed

    if not (all_free or shocks_fixed.all() or diagonal_matrix):
        raise UserError(" Misspecified constraints for covariance matrix")

    # Discount rate and type shares need to be larger than on at all times.
    for label in ["paras_fixed", "paras_bounds"]:
        assert isinstance(a["optim_paras"][label], list)
        assert len(a["optim_paras"][label]) == a["num_paras"]

    for i in range(1):
        assert a["optim_paras"]["paras_bounds"][i][0] >= 0.00

    for i in range(a["num_paras"]):
        lower, upper = a["optim_paras"]["paras_bounds"][i]
        if lower is not None:
            assert isinstance(lower, float)
            assert lower <= x[i]
            assert abs(lower) < PRINT_FLOAT
        if upper is not None:
            assert isinstance(upper, float)
            assert upper >= x[i]
            assert abs(upper) < PRINT_FLOAT
        if (upper is not None) and (lower is not None):
            assert upper >= lower

    _check_optimizer_options(a["optimizer_options"])