Exemplo n.º 1
0
    def cov(self, return_type="pytree"):
        """Calculate the variance-covariance matrix of the estimated parameters.

        Args:
            return_type (str): One of "pytree", "array" or "dataframe". Default pytree.
                If "array", a 2d numpy array with the covariance is returned. If
                "dataframe", a pandas DataFrame with parameter names in the
                index and columns are returned.
                The default is "pytree".

        Returns:
            Any: The covariance matrix of the estimated parameters as a block-pytree,
                numpy.ndarray, or pandas.DataFrame.
        """
        cov = self._internal_cov

        if return_type == "dataframe":
            registry = get_registry(extended=True)
            names = np.array(leaf_names(self._base_outcome, registry=registry))
            cov = pd.DataFrame(cov, columns=names, index=names)
        elif return_type == "pytree":
            cov = matrix_to_block_tree(cov, self._base_outcome,
                                       self._base_outcome)
        elif return_type != "array":
            raise ValueError(
                "return_type must be one of pytree, array, or dataframe, "
                f"not {return_type}.")
        return cov
Exemplo n.º 2
0
def get_weighting_matrix(moments_cov,
                         method,
                         empirical_moments,
                         clip_value=1e-6,
                         return_type="pytree"):
    """Calculate a weighting matrix from moments_cov.

    Args:
        moments_cov (pandas.DataFrame or numpy.ndarray): Square DataFrame or Array
            with the covariance matrix of the moment conditions for msm estimation.
        method (str): One of "optimal", "diagonal".
        empirical_moments (pytree): Pytree containing empirical moments. Used to get
            the tree structure
        clip_value (float): Bound at which diagonal elements of the moments_cov are
            clipped to avoid dividing by zero.
        return_type (str): One of "pytree", "array" or "pytree_and_array"

    Returns:
        pandas.DataFrame or numpy.ndarray: Weighting matrix with the same shape as
            moments_cov.

    """
    fast_path = isinstance(moments_cov, np.ndarray) and moments_cov.ndim == 2

    if fast_path:
        _internal_cov = moments_cov
    else:
        _internal_cov = block_tree_to_matrix(
            moments_cov,
            outer_tree=empirical_moments,
            inner_tree=empirical_moments,
        )

    if method == "optimal":
        array_weights = robust_inverse(_internal_cov)
    elif method == "diagonal":
        diagonal_values = 1 / np.clip(np.diagonal(_internal_cov), clip_value,
                                      np.inf)
        array_weights = np.diag(diagonal_values)
    else:
        raise ValueError(f"Invalid method: {method}")

    if return_type == "array" or (fast_path and "_and_" not in return_type):
        out = array_weights
    elif fast_path:
        out = (array_weights, array_weights)
    else:
        tree_weights = matrix_to_block_tree(
            array_weights,
            outer_tree=empirical_moments,
            inner_tree=empirical_moments,
        )
        if return_type == "pytree":
            out = tree_weights
        else:
            out = (tree_weights, array_weights)

    return out
Exemplo n.º 3
0
def get_moments_cov(data,
                    calculate_moments,
                    *,
                    moment_kwargs=None,
                    bootstrap_kwargs=None):
    """Bootstrap the covariance matrix of the moment conditions.

    Args:
        data (pandas.DataFrame): DataFrame with empirical data.
        calculate_moments (callable): Function that calculates that takes data and
            moment_kwargs as arguments and returns a 1d numpy array or pandas Series
            with moment conditions.
        moment_kwargs (dict): Additional keyword arguments for calculate_moments.
        bootstrap_kwargs (dict): Additional keyword arguments that govern the
            bootstrapping. Allowed arguments are "n_draws", "seed", "n_cores",
            "batch_evaluator", "cluster" and "error_handling". For details see the
            bootstrap function.

    Returns:
        pandas.DataFrame or numpy.ndarray: The covariance matrix of the moment
            conditions for msm estimation.

    """
    moment_kwargs = {} if moment_kwargs is None else moment_kwargs
    bootstrap_kwargs = {} if bootstrap_kwargs is None else bootstrap_kwargs
    valid_bs_kwargs = {
        "n_cores",
        "n_draws",
        "seed",
        "batch_evaluator",
        "cluster",
        "error_handling",
    }
    problematic = set(bootstrap_kwargs).difference(valid_bs_kwargs)
    if problematic:
        raise ValueError(f"Invalid bootstrap_kwargs: {problematic}")

    first_eval = calculate_moments(data, **moment_kwargs)

    registry = get_registry(extended=True)

    @functools.wraps(calculate_moments)
    def func(data, **kwargs):
        raw = calculate_moments(data, **kwargs)
        out = pd.Series(tree_just_flatten(
            raw, registry=registry))  # xxxx won't be necessary soon!
        return out

    cov_arr = bootstrap(data=data, outcome=func,
                        outcome_kwargs=moment_kwargs).cov()

    if isinstance(cov_arr, pd.DataFrame):
        cov_arr = cov_arr.to_numpy()  # xxxx won't be necessary soon

    cov = matrix_to_block_tree(cov_arr, first_eval, first_eval)

    return cov
Exemplo n.º 4
0
def test_matrix_to_block_tree_single_element():
    tree1 = {"a": 0}
    tree2 = {"b": 1, "c": 2}

    block_tree = {"a": {"b": 0, "c": 1}}
    matrix = np.array([[0, 1]])

    calculated = matrix_to_block_tree(matrix, tree1, tree2)
    assert tree_equal(block_tree, calculated)
Exemplo n.º 5
0
def transform_free_cov_to_cov(free_cov, free_params, params, return_type):
    """Fill non-free values and project to params block-tree."""
    mask = free_params.free_mask
    cov = np.full((len(mask), len(mask)), np.nan)
    cov[np.ix_(mask, mask)] = free_cov
    if return_type == "dataframe":
        names = free_params.all_names
        cov = pd.DataFrame(cov, columns=names, index=names)
    elif return_type == "pytree":
        cov = matrix_to_block_tree(cov, params, params)
    elif return_type != "array":
        raise ValueError(
            "return_type must be one of pytree, array, or dataframe, "
            f"not {return_type}.")
    return cov
Exemplo n.º 6
0
def test_matrix_to_block_tree_array_and_scalar():
    t = {"a": 1.0, "b": np.arange(2)}
    calculated = matrix_to_block_tree(np.arange(9).reshape(3, 3), t, t)

    expected = {
        "a": {
            "a": np.array(0),
            "b": np.array([1, 2])
        },
        "b": {
            "a": np.array([3, 6]),
            "b": np.array([[4, 5], [7, 8]])
        },
    }

    assert _tree_equal_up_to_dtype(calculated, expected)
Exemplo n.º 7
0
def test_matrix_to_block_tree_only_params_dfs():
    tree = {
        "a": pd.DataFrame(index=["a", "b"]).assign(value=[1, 2]),
        "b": pd.DataFrame(index=["j", "k", "l"]).assign(value=[3, 4, 5]),
    }

    calculated = matrix_to_block_tree(np.arange(25).reshape(5, 5), tree, tree)

    expected = {
        "a": {
            "a":
            pd.DataFrame([[0, 1], [5, 6]],
                         columns=["a", "b"],
                         index=["a", "b"]),
            "b":
            pd.DataFrame([[2, 3, 4], [7, 8, 9]],
                         columns=["j", "k", "l"],
                         index=["a", "b"]),
        },
        "b": {
            "a":
            pd.DataFrame(
                [[10, 11], [15, 16], [20, 21]],
                index=["j", "k", "l"],
                columns=["a", "b"],
            ),
            "b":
            pd.DataFrame(
                [[12, 13, 14], [17, 18, 19], [22, 23, 24]],
                index=["j", "k", "l"],
                columns=["j", "k", "l"],
            ),
        },
    }

    assert _tree_equal_up_to_dtype(calculated, expected)
Exemplo n.º 8
0
def estimate_ml(
    loglike,
    params,
    optimize_options,
    *,
    lower_bounds=None,
    upper_bounds=None,
    constraints=None,
    logging=False,
    log_options=None,
    loglike_kwargs=None,
    numdiff_options=None,
    jacobian=None,
    jacobian_kwargs=None,
    hessian=None,
    hessian_kwargs=None,
    design_info=None,
):
    """Do a maximum likelihood (ml) estimation.

    This is a high level interface of our lower level functions for maximization,
    numerical differentiation and inference. It does the full workflow for maximum
    likelihood estimation with just one function call.

    While we have good defaults, you can still configure each aspect of each step
    via the optional arguments of this function. If you find it easier to do the
    maximization separately, you can do so and just provide the optimal parameters as
    ``params`` and set ``optimize_options=False``

    Args:
        loglike (callable): Likelihood function that takes a params (and potentially
            other keyword arguments) and returns a dictionary that has at least the
            entries "value" (a scalar float) and "contributions" (a 1d numpy array or
            pytree) with the log likelihood contribution per individual.
        params (pytree): A pytree containing the estimated or start parameters of the
            likelihood model. If the supplied parameters are estimated parameters, set
            optimize_options to False. Pytrees can be a numpy array, a pandas Series, a
            DataFrame with "value" column, a float and any kind of (nested) dictionary
            or list containing these elements. See :ref:`params` for examples.
        optimize_options (dict, str or False): Keyword arguments that govern the
            numerical optimization. Valid entries are all arguments of
            :func:`~estimagic.optimization.optimize.minimize` except for those that are
            passed explicilty to ``estimate_ml``. If you pass False as optimize_options
            you signal that ``params`` are already the optimal parameters and no
            numerical optimization is needed. If you pass a str as optimize_options it
            is used as the ``algorithm`` option.
        lower_bounds (pytree): A pytree with the same structure as params with lower
            bounds for the parameters. Can be ``-np.inf`` for parameters with no lower
            bound.
        upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with
            no upper bound.
        constraints (list, dict): List with constraint dictionaries or single dict.
            See :ref:`constraints`.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.
            - "fast_logging": A boolean that determines if "unsafe" settings are used
            to speed up write processes to the database. This should only be used for
            very short running criterion functions where the main purpose of the log
            is a real-time dashboard and it would not be catastrophic to get a
            corrupted database in case of a sudden system shutdown. If one evaluation
            of the criterion function (and gradient if applicable) takes more than
            100 ms, the logging overhead is negligible.
            - "if_table_exists": (str) One of "extend", "replace", "raise". What to
            do if the tables we want to write to already exist. Default "extend".
            - "if_database_exists": (str): One of "extend", "replace", "raise". What to
            do if the database we want to write to already exists. Default "extend".
        loglike_kwargs (dict): Additional keyword arguments for loglike.
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives for the calculation of standard errors. See
            :ref:`first_derivative` for details.
        jacobian (callable or None): A function that takes ``params`` and potentially
            other keyword arguments and returns the jacobian of loglike["contributions"]
            with respect to the params. Note that you only need to pass a Jacobian
            function if you have a closed form Jacobian. If you pass None, a numerical
            Jacobian will be calculated.
        jacobian_kwargs (dict): Additional keyword arguments for the Jacobian function.
        hessian (callable or None or False): A function that takes ``params`` and
            potentially other keyword arguments and returns the Hessian of
            loglike["value"] with respect to the params.  If you pass None, a numerical
            Hessian will be calculated. If you pass ``False``, you signal that no
            Hessian should be calculated. Thus, no result that requires the Hessian will
            be calculated.
        hessian_kwargs (dict): Additional keyword arguments for the Hessian function.
        design_info (pandas.DataFrame): DataFrame with one row per observation that
            contains some or all of the variables "psu" (primary sampling unit),
            "strata" and "fpc" (finite population corrector). See
            :ref:`robust_likelihood_inference` for details.

    Returns:
        LikelihoodResult: A LikelihoodResult object.

    """
    # ==================================================================================
    # Check and process inputs
    # ==================================================================================
    is_optimized = optimize_options is False

    if not is_optimized:
        if isinstance(optimize_options, str):
            optimize_options = {"algorithm": optimize_options}

        check_optimization_options(
            optimize_options,
            usage="estimate_ml",
            algorithm_mandatory=True,
        )

    jac_case = get_derivative_case(jacobian)
    hess_case = get_derivative_case(hessian)

    check_numdiff_options(numdiff_options, "estimate_ml")
    numdiff_options = {} if numdiff_options in (None,
                                                False) else numdiff_options
    loglike_kwargs = {} if loglike_kwargs is None else loglike_kwargs
    constraints = [] if constraints is None else constraints
    jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs
    hessian_kwargs = {} if hessian_kwargs is None else hessian_kwargs

    # ==================================================================================
    # Calculate estimates via maximization (if necessary)
    # ==================================================================================

    if is_optimized:
        estimates = params
        opt_res = None
    else:
        opt_res = maximize(
            criterion=loglike,
            criterion_kwargs=loglike_kwargs,
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            constraints=constraints,
            logging=logging,
            log_options=log_options,
            **optimize_options,
        )
        estimates = opt_res.params

    # ==================================================================================
    # Do first function evaluations at estimated parameters
    # ==================================================================================

    try:
        loglike_eval = loglike(estimates, **loglike_kwargs)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating loglike at estimated params."
        raise InvalidFunctionError(msg) from e

    if callable(jacobian):
        try:
            jacobian_eval = jacobian(estimates, **jacobian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form jacobian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        jacobian_eval = None

    if callable(hessian):
        try:
            hessian_eval = hessian(estimates, **hessian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form hessian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        hessian_eval = None

    # ==================================================================================
    # Get the converter for params and function outputs
    # ==================================================================================

    converter, internal_estimates = get_converter(
        params=estimates,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=loglike_eval,
        primary_key="contributions",
        scaling=False,
        scaling_options=None,
        derivative_eval=jacobian_eval,
    )

    # ==================================================================================
    # Calculate internal jacobian
    # ==================================================================================

    if jac_case == "closed-form":
        int_jac = converter.derivative_to_internal(jacobian_eval,
                                                   internal_estimates.values)
    elif jac_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["contributions"]
            out = converter.func_to_internal(loglike_eval)
            return out

        jac_res = first_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )

        int_jac = jac_res["derivative"]
    else:
        int_jac = None

    if constraints in [None, []
                       ] and jacobian_eval is None and int_jac is not None:
        loglike_contribs = loglike_eval
        if isinstance(loglike_contribs,
                      dict) and "contributions" in loglike_contribs:
            loglike_contribs = loglike_contribs["contributions"]

        jacobian_eval = matrix_to_block_tree(
            int_jac,
            outer_tree=loglike_contribs,
            inner_tree=estimates,
        )

    if jacobian_eval is None:
        _no_jac_reason = (
            "no closed form jacobian was provided and there are constraints")
    else:
        _no_jac_reason = None
    # ==================================================================================
    # Calculate internal Hessian
    # ==================================================================================

    if hess_case == "skip":
        int_hess = None
    elif hess_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["value"]
            out = converter.func_to_internal(loglike_eval)
            return out

        hess_res = second_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )
        int_hess = hess_res["derivative"]
    elif hess_case == "closed-form" and constraints:
        raise NotImplementedError(
            "Closed-form Hessians are not yet compatible with constraints.")
    elif hess_case == "closed-form":
        int_hess = block_tree_to_matrix(
            hessian_eval,
            outer_tree=params,
            inner_tree=params,
        )
    else:
        raise ValueError()

    if constraints in [None, []
                       ] and hessian_eval is None and int_hess is not None:
        hessian_eval = matrix_to_block_tree(
            int_hess,
            outer_tree=params,
            inner_tree=params,
        )

    if hessian_eval is None:
        if hess_case == "skip":
            _no_hess_reason = "the hessian calculation was explicitly skipped."
        else:
            _no_hess_reason = (
                "no closed form hessian was provided and there are constraints"
            )
    else:
        _no_hess_reason = None

    # ==================================================================================
    # create a LikelihoodResult object
    # ==================================================================================

    free_estimates = calculate_free_estimates(estimates, internal_estimates)

    res = LikelihoodResult(
        _params=estimates,
        _converter=converter,
        _optimize_result=opt_res,
        _jacobian=jacobian_eval,
        _no_jacobian_reason=_no_jac_reason,
        _hessian=hessian_eval,
        _no_hessian_reason=_no_hess_reason,
        _internal_jacobian=int_jac,
        _internal_hessian=int_hess,
        _design_info=design_info,
        _internal_estimates=internal_estimates,
        _free_estimates=free_estimates,
        _has_constraints=constraints not in [None, []],
    )

    return res
Exemplo n.º 9
0
def estimate_msm(
    simulate_moments,
    empirical_moments,
    moments_cov,
    params,
    optimize_options,
    *,
    lower_bounds=None,
    upper_bounds=None,
    constraints=None,
    logging=False,
    log_options=None,
    simulate_moments_kwargs=None,
    weights="diagonal",
    numdiff_options=None,
    jacobian=None,
    jacobian_kwargs=None,
):
    """Do a method of simulated moments or indirect inference estimation.

    This is a high level interface for our lower level functions for minimization,
    numerical differentiation, inference and sensitivity analysis. It does the full
    workflow for MSM or indirect inference estimation with just one function call.

    While we have good defaults, you can still configure each aspect of each steps
    vial the optional arguments of this functions. If you find it easier to do the
    minimization separately, you can do so and just provide the optimal parameters as
    ``params`` and set ``optimize_options=False``.

    Args:
        simulate_moments (callable): Function that takes params and potentially other
            keyword arguments and returns a pytree with simulated moments. If the
            function returns a dict containing the key ``"simulated_moments"`` we only
            use the value corresponding to that key. Other entries are stored in the
            log database if you use logging.

        empirical_moments (pandas.Series): A pytree with the same structure as the
            result of ``simulate_moments``.
        moments_cov (pandas.DataFrame): A block-pytree containing the covariance
            matrix of the empirical moments. This is typically calculated with
            our ``get_moments_cov`` function.
        params (pytree): A pytree containing the estimated or start parameters of the
            model. If the supplied parameters are estimated parameters, set
            optimize_options to False. Pytrees can be a numpy array, a pandas Series, a
            DataFrame with "value" column, a float and any kind of (nested) dictionary
            or list containing these elements. See :ref:`params` for examples.
        optimize_options (dict, str or False): Keyword arguments that govern the
            numerical optimization. Valid entries are all arguments of
            :func:`~estimagic.optimization.optimize.minimize` except for those that can
            be passed explicitly to ``estimate_msm``.  If you pass False as
            ``optimize_options`` you signal that ``params`` are already
            the optimal parameters and no numerical optimization is needed. If you pass
            a str as optimize_options it is used as the ``algorithm`` option.
        lower_bounds (pytree): A pytree with the same structure as params with lower
            bounds for the parameters. Can be ``-np.inf`` for parameters with no lower
            bound.
        upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with
            no upper bound.
        simulate_moments_kwargs (dict): Additional keyword arguments for
            ``simulate_moments``.
        weights (str): One of "diagonal" (default), "identity" or "optimal".
            Note that "optimal" refers to the asymptotically optimal weighting matrix
            and is often not a good choice due to large finite sample bias.
        constraints (list, dict): List with constraint dictionaries or single dict.
            See :ref:`constraints`.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.

            - "fast_logging" (bool):
                A boolean that determines if "unsafe" settings are used to speed up
                write processes to the database. This should only be used for very short
                running criterion functions where the main purpose of the log is a
                real-time dashboard and it would not be catastrophic to get a corrupted
                database in case of a sudden system shutdown. If one evaluation of the
                criterion function (and gradient if applicable) takes more than 100 ms,
                the logging overhead is negligible.
            - "if_table_exists" (str):
                One of "extend", "replace", "raise". What to do if the tables we want to
                write to already exist. Default "extend".
            - "if_database_exists" (str):
                One of "extend", "replace", "raise". What to do if the database we want
                to write to already exists. Default "extend".
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives for the calculation of standard errors. See
            :ref:`first_derivative` for details. Note that by default we increase the
            step_size by a factor of 2 compared to the rule of thumb for optimal
            step sizes. This is because many msm criterion functions are slightly noisy.
        jacobian (callable): A function that take ``params`` and
            potentially other keyword arguments and returns the jacobian of
            simulate_moments with respect to the params.
        jacobian_kwargs (dict): Additional keyword arguments for the jacobian function.

        Returns:
            dict: The estimated parameters, standard errors and sensitivity measures
                and covariance matrix of the parameters.

    """
    # ==================================================================================
    # Check and process inputs
    # ==================================================================================

    if weights not in ["diagonal", "optimal"]:
        raise NotImplementedError(
            "Custom weighting matrices are not yet implemented.")

    is_optimized = optimize_options is False

    if not is_optimized:
        if isinstance(optimize_options, str):
            optimize_options = {"algorithm": optimize_options}

        check_optimization_options(
            optimize_options,
            usage="estimate_msm",
            algorithm_mandatory=True,
        )

    jac_case = get_derivative_case(jacobian)

    check_numdiff_options(numdiff_options, "estimate_msm")

    numdiff_options = {} if numdiff_options in (
        None, False) else numdiff_options.copy()
    if "scaling_factor" not in numdiff_options:
        numdiff_options["scaling_factor"] = 2

    weights, internal_weights = get_weighting_matrix(
        moments_cov=moments_cov,
        method=weights,
        empirical_moments=empirical_moments,
        return_type="pytree_and_array",
    )

    internal_moments_cov = block_tree_to_matrix(
        moments_cov,
        outer_tree=empirical_moments,
        inner_tree=empirical_moments,
    )

    constraints = [] if constraints is None else constraints
    jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs
    simulate_moments_kwargs = ({} if simulate_moments_kwargs is None else
                               simulate_moments_kwargs)

    # ==================================================================================
    # Calculate estimates via minimization (if necessary)
    # ==================================================================================

    if is_optimized:
        estimates = params
        opt_res = None
    else:
        funcs = get_msm_optimization_functions(
            simulate_moments=simulate_moments,
            empirical_moments=empirical_moments,
            weights=weights,
            simulate_moments_kwargs=simulate_moments_kwargs,
            # Always pass None because we do not support closed form jacobians during
            # optimization yet. Otherwise we would get a NotImplementedError
            jacobian=None,
            jacobian_kwargs=jacobian_kwargs,
        )

        opt_res = minimize(
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            constraints=constraints,
            logging=logging,
            log_options=log_options,
            params=params,
            **funcs,  # contains the criterion func and possibly more
            **optimize_options,
        )

        estimates = opt_res.params

    # ==================================================================================
    # do first function evaluations
    # ==================================================================================

    try:
        sim_mom_eval = simulate_moments(estimates, **simulate_moments_kwargs)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating simulate_moments at estimated params."
        raise InvalidFunctionError(msg) from e

    if callable(jacobian):
        try:
            jacobian_eval = jacobian(estimates, **jacobian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating derivative at estimated params."
            raise InvalidFunctionError(msg) from e

    else:
        jacobian_eval = None

    # ==================================================================================
    # get converter for params and function outputs
    # ==================================================================================

    def helper(params):
        raw = simulate_moments(params, **simulate_moments_kwargs)
        if isinstance(raw, dict) and "simulated_moments" in raw:
            out = {"contributions": raw["simulated_moments"]}
        else:
            out = {"contributions": raw}
        return out

    if isinstance(sim_mom_eval, dict) and "simulated_moments" in sim_mom_eval:
        func_eval = {"contributions": sim_mom_eval["simulated_moments"]}
    else:
        func_eval = {"contributions": sim_mom_eval}

    converter, internal_estimates = get_converter(
        params=estimates,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=func_eval,
        primary_key="contributions",
        scaling=False,
        scaling_options=None,
        derivative_eval=jacobian_eval,
    )

    # ==================================================================================
    # Calculate internal jacobian
    # ==================================================================================

    if jac_case == "closed-form":
        x = converter.params_to_internal(estimates)
        int_jac = converter.derivative_to_internal(jacobian_eval, x)
    else:

        def func(x):
            p = converter.params_from_internal(x)
            sim_mom_eval = helper(p)
            out = converter.func_to_internal(sim_mom_eval)
            return out

        int_jac = first_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )["derivative"]

    # ==================================================================================
    # Calculate external jac (if no constraints and not closed form )
    # ==================================================================================

    if constraints in [None, []
                       ] and jacobian_eval is None and int_jac is not None:
        jacobian_eval = matrix_to_block_tree(
            int_jac,
            outer_tree=empirical_moments,
            inner_tree=estimates,
        )

    if jacobian_eval is None:
        _no_jac_reason = (
            "no closed form jacobian was provided and there are constraints")
    else:
        _no_jac_reason = None

    # ==================================================================================
    # Create MomentsResult
    # ==================================================================================

    free_estimates = calculate_free_estimates(estimates, internal_estimates)

    res = MomentsResult(
        _params=estimates,
        _weights=weights,
        _converter=converter,
        _internal_weights=internal_weights,
        _internal_moments_cov=internal_moments_cov,
        _internal_jacobian=int_jac,
        _jacobian=jacobian_eval,
        _no_jacobian_reason=_no_jac_reason,
        _empirical_moments=empirical_moments,
        _internal_estimates=internal_estimates,
        _free_estimates=free_estimates,
        _has_constraints=constraints not in [None, []],
    )
    return res
Exemplo n.º 10
0
            raw = calculate_sensitivity_to_weighting(
                jac=jac,
                weights=weights,
                moments_cov=moments_cov,
                params_cov=params_cov,
            )
        else:
            raise ValueError(f"Invalid kind: {kind}")

        if return_type == "array":
            out = raw
        elif return_type == "pytree":
            out = matrix_to_block_tree(
                raw,
                outer_tree=self._params,
                inner_tree=self._empirical_moments,
            )
        elif return_type == "dataframe":
            registry = get_registry(extended=True)
            row_names = self._internal_estimates.names
            col_names = leaf_names(self._empirical_moments, registry=registry)
            out = pd.DataFrame(
                data=raw,
                index=row_names,
                columns=col_names,
            )
        else:
            msg = (
                f"Invalid return type: {return_type}. Valid are 'pytree', 'array' "
                "and 'dataframe'")
Exemplo n.º 11
0
def first_derivative(
    func,
    params,
    *,
    func_kwargs=None,
    method="central",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=DEFAULT_N_CORES,
    error_handling="continue",
    batch_evaluator="joblib",
    return_func_value=False,
    return_info=False,
    key=None,
):
    """Evaluate first derivative of func at params according to method and step options.

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Jacobian of that function is calculated.

    The parameters and the function output can be estimagic-pytrees; for more details on
    estimagi-pytrees see :ref:`eeppytrees`. By default the resulting Jacobian will be
    returned as a block-pytree.

    For a detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict, see
    :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        params (pytree): A pytree. See :ref:`params`.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of ["central", "forward", "backward"], default "central".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (numpy.ndarray, optional): 1d array of the same length as params.
            base_steps * scaling_factor is the absolute value of the first (and possibly
            only) step used in the finite differences approximation of the derivative.
            If base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (numpy.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an numpy.ndarray, it needs to be as long as params.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (pytree): To be written.
        upper_bounds (pytree): To be written.
        step_ratio (float, numpy.array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. The step ratio is only used if n_steps > 1.
        min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Must have same length as params. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (numpy.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        error_handling (str): One of "continue" (catch errors and continue to calculate
            derivative estimates. In this case, some derivative estimates can be
            missing but no errors are raised), "raise" (catch errors and continue
            to calculate derivative estimates at fist but raise an error if all
            evaluations for one parameter failed) and "raise_strict" (raise an error
            as soon as a function evaluation fails).
        batch_evaluator (str or callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators.
        return_func_value (bool): If True, return function value at params, stored in
            output dict under "func_value". Default False. This is useful when using
            first_derivative during optimization.
        return_info (bool): If True, return additional information on function
            evaluations and internal derivative candidates, stored in output dict under
            "func_evals" and "derivative_candidates". Derivative candidates are only
            returned if n_steps > 1. Default False.
        key (str): If func returns a dictionary, take the derivative of
            func(params)[key].

    Returns:
        result (dict): Result dictionary with keys:
            - "derivative" (numpy.ndarray, pandas.Series or pandas.DataFrame): The
                estimated first derivative of func at params. The shape of the output
                depends on the dimension of params and func(params):

                - f: R -> R leads to shape (1,), usually called derivative
                - f: R^m -> R leads to shape (m, ), usually called Gradient
                - f: R -> R^n leads to shape (n, 1), usually called Jacobian
                - f: R^m -> R^n leads to shape (n, m), usually called Jacobian

            - "func_value" (numpy.ndarray, pandas.Series or pandas.DataFrame): Function
                value at params, returned if return_func_value is True.

            - "func_evals" (pandas.DataFrame): Function evaluations produced by internal
                derivative method, returned if return_info is True.

            - "derivative_candidates" (pandas.DataFrame): Derivative candidates from
                Richardson extrapolation, returned if return_info is True and n_steps >
                1.

    """
    _is_fast_params = isinstance(params, np.ndarray) and params.ndim == 1
    registry = get_registry(extended=True)

    lower_bounds, upper_bounds = get_bounds(params, lower_bounds, upper_bounds)

    # handle keyword arguments
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)

    # convert params to numpy
    if not _is_fast_params:
        x, params_treedef = tree_flatten(params, registry=registry)
        x = np.array(x, dtype=np.float64)
    else:
        x = params.astype(float)

    if np.isnan(x).any():
        raise ValueError("The parameter vector must not contain NaNs.")

    # generate the step array
    steps = generate_steps(
        x=x,
        method=method,
        n_steps=n_steps,
        target="first_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    # generate parameter vectors at which func has to be evaluated as numpy arrays
    evaluation_points = []
    for step_arr in steps:
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points.append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points.append(point)

    # convert the numpy arrays to whatever is needed by func
    if not _is_fast_params:
        evaluation_points = [
            # entries are either a numpy.ndarray or np.nan
            _unflatten_if_not_nan(p, params_treedef, registry)
            for p in evaluation_points
        ]

    # we always evaluate f0, so we can fall back to one-sided derivatives if
    # two-sided derivatives fail. The extra cost is negligible in most cases.
    if f0 is None:
        evaluation_points.append(params)

    # do the function evaluations, including error handling
    batch_error_handling = "raise" if error_handling == "raise_strict" else "continue"
    raw_evals = _nan_skipping_batch_evaluator(
        func=partialed_func,
        arguments=evaluation_points,
        n_cores=n_cores,
        error_handling=batch_error_handling,
        batch_evaluator=batch_evaluator,
    )

    # extract information on exceptions that occurred during function evaluations
    exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)])
    raw_evals = [
        val if not isinstance(val, str) else np.nan for val in raw_evals
    ]

    # store full function value at params as func_value and a processed version of it
    # that we need to calculate derivatives as f0
    if f0 is None:
        f0 = raw_evals[-1]
        raw_evals = raw_evals[:-1]
    func_value = f0

    use_key = key is not None and isinstance(f0, dict)
    f0_tree = f0[key] if use_key else f0
    scalar_out = np.isscalar(f0_tree)
    vector_out = isinstance(f0_tree, np.ndarray) and f0_tree.ndim == 1

    if scalar_out:
        f0 = np.array([f0_tree], dtype=float)
    elif vector_out:
        f0 = f0_tree.astype(float)
    else:
        f0 = tree_leaves(f0_tree, registry=registry)
        f0 = np.array(f0, dtype=np.float64)

    # convert the raw evaluations to numpy arrays
    raw_evals = _convert_evals_to_numpy(
        raw_evals=raw_evals,
        key=key,
        registry=registry,
        is_scalar_out=scalar_out,
        is_vector_out=vector_out,
    )

    # apply finite difference formulae
    evals = np.array(raw_evals).reshape(2, n_steps, len(x), -1)
    evals = np.transpose(evals, axes=(0, 1, 3, 2))
    evals = Evals(pos=evals[0], neg=evals[1])

    jac_candidates = {}
    for m in ["forward", "backward", "central"]:
        jac_candidates[m] = finite_differences.jacobian(evals, steps, f0, m)

    # get the best derivative estimate out of all derivative estimates that could be
    # calculated, given the function evaluations.
    orders = {
        "central": ["central", "forward", "backward"],
        "forward": ["forward", "backward"],
        "backward": ["backward", "forward"],
    }

    if n_steps == 1:
        jac = _consolidate_one_step_derivatives(jac_candidates, orders[method])
        updated_candidates = None
    else:
        richardson_candidates = _compute_richardson_candidates(
            jac_candidates, steps, n_steps)
        jac, updated_candidates = _consolidate_extrapolated(
            richardson_candidates)

    # raise error if necessary
    if error_handling in ("raise", "raise_strict") and np.isnan(jac).any():
        raise Exception(exc_info)

    # results processing
    if _is_fast_params and vector_out:
        derivative = jac
    elif _is_fast_params and scalar_out:
        derivative = jac.flatten()
    else:
        derivative = matrix_to_block_tree(jac, f0_tree, params)

    result = {"derivative": derivative}
    if return_func_value:
        result["func_value"] = func_value
    if return_info:
        info = _collect_additional_info(steps,
                                        evals,
                                        updated_candidates,
                                        target="first_derivative")
        result = {**result, **info}
    return result