def _get_derivative_flatten(registry, primary_key, params, func_eval, derivative_eval):
    # gradient case
    if primary_key == "value":

        def derivative_flatten(derivative_eval):
            flat = np.array(
                tree_just_flatten(derivative_eval, registry=registry)
            ).astype(float)
            return flat

    # jacobian case
    else:
        key, _ = _get_best_key_and_aggregator(primary_key, func_eval)

        def derivative_flatten(derivative_eval):
            flat = block_tree_to_matrix(
                derivative_eval,
                outer_tree=func_eval[key],
                inner_tree=params,
            )
            return flat

    if derivative_eval is not None:
        try:
            derivative_flatten(derivative_eval)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "The output of derivative and criterion cannot be aligned."
            raise InvalidFunctionError(msg) from e

    return derivative_flatten
def _get_best_key_and_aggregator(needed_key, available_keys):
    if needed_key in available_keys:
        key = needed_key
        if needed_key == "value":
            aggregate = lambda x: float(x[0])
        else:
            aggregate = lambda x: np.array(x).astype(float)
    elif needed_key == "contributions" and "root_contributions" in available_keys:
        key = "root_contributions"
        aggregate = lambda x: np.array(x).astype(float) ** 2
    elif needed_key == "value" and "contributions" in available_keys:
        key = "contributions"
        aggregate = lambda x: float(np.sum(x))
    elif needed_key == "value" and "root_contributions" in available_keys:
        key = "root_contributions"
        aggregate = lambda x: float((np.array(x) ** 2).sum())
    else:
        msg = (
            "The optimizer you requested requires a criterion function that returns "
            f"a dictionary with the entry '{needed_key}'. Your function returns a "
            f"dictionary that only contains the entries {available_keys}."
        )
        raise InvalidFunctionError(msg)

    return key, aggregate
def _get_func_flatten(registry, func_eval, primary_key):

    if isscalar(func_eval):
        if primary_key == "value":
            func_flatten = lambda func_eval: float(func_eval)
        else:
            msg = (
                "criterion returns a scalar value but the requested optimizer "
                "requires a vector or pytree output. criterion can either return this "
                f"output alone or inside a dictionary with the key {primary_key}."
            )
            raise InvalidFunctionError(msg)
    elif not isinstance(func_eval, dict):
        raise ValueError()  # xxxx
    else:
        key, aggregate = _get_best_key_and_aggregator(primary_key, func_eval)

        def func_flatten(func_eval):
            # the if condition is necessary, such that we can also accept func_evals
            # where the primary entry has already been extracted. This is for example
            # necessary if the criterion_and_derivative returns only the relevant
            # entry of criterion, whereas criterion returns a dict.
            if isinstance(func_eval, dict) and key in func_eval:
                func_eval = func_eval[key]
            return aggregate(tree_just_flatten(func_eval, registry=registry))

    return func_flatten
示例#4
0
def process_func_of_params(func,
                           kwargs,
                           name="your function",
                           skip_checks=False):
    # fast path
    if skip_checks and kwargs in (None, {}):
        return func

    kept, ignored = filter_kwargs(func, kwargs)

    if ignored:
        possibilities = [
            p for p in inspect.signature(func).parameters if p != "params"
        ]
        proposals = [
            propose_alternatives(arg, possibilities, 1)[0] for arg in ignored
        ]

        msg = (
            "The following user provided keyword arguments are not compatible with "
            f"{name}:\n\n")
        for arg, prop in zip(ignored, proposals):
            msg += f"{arg}: Did you mean {prop}?"

        raise InvalidKwargsError(msg)

    out = partial(func, **kept)

    if not skip_checks:

        unpartialled_args = get_unpartialled_arguments(out)
        no_default_args = get_arguments_without_default(out)

        no_free_argument_left = len(unpartialled_args) < 1

        if no_free_argument_left and kept:
            raise InvalidKwargsError(
                f"Too many keyword arguments for {name}. After applying all keyword "
                "arguments there must be at least one free argument (the params) left."
            )
        elif no_free_argument_left:
            raise InvalidFunctionError(
                f"{name} must have at least one free argument.")

        required_args = unpartialled_args.intersection(no_default_args)
        too_many_required_arguments = len(required_args) > 1

        # Try to discover if we have a jax calculated jacobian that has a weird
        # signature that would not pass this test:
        skip_because_of_jax = required_args == {"args", "kwargs"}

        if too_many_required_arguments and not skip_because_of_jax:
            raise InvalidKwargsError(
                f"Too few keyword arguments for {name}. After applying all keyword "
                "arguments at most one required argument (the params) should remain. "
                "in your case the following required arguments remain: "
                f"{required_args}.")

    return out
示例#5
0
def estimate_ml(
    loglike,
    params,
    optimize_options,
    *,
    lower_bounds=None,
    upper_bounds=None,
    constraints=None,
    logging=False,
    log_options=None,
    loglike_kwargs=None,
    numdiff_options=None,
    jacobian=None,
    jacobian_kwargs=None,
    hessian=None,
    hessian_kwargs=None,
    design_info=None,
):
    """Do a maximum likelihood (ml) estimation.

    This is a high level interface of our lower level functions for maximization,
    numerical differentiation and inference. It does the full workflow for maximum
    likelihood estimation with just one function call.

    While we have good defaults, you can still configure each aspect of each step
    via the optional arguments of this function. If you find it easier to do the
    maximization separately, you can do so and just provide the optimal parameters as
    ``params`` and set ``optimize_options=False``

    Args:
        loglike (callable): Likelihood function that takes a params (and potentially
            other keyword arguments) and returns a dictionary that has at least the
            entries "value" (a scalar float) and "contributions" (a 1d numpy array or
            pytree) with the log likelihood contribution per individual.
        params (pytree): A pytree containing the estimated or start parameters of the
            likelihood model. If the supplied parameters are estimated parameters, set
            optimize_options to False. Pytrees can be a numpy array, a pandas Series, a
            DataFrame with "value" column, a float and any kind of (nested) dictionary
            or list containing these elements. See :ref:`params` for examples.
        optimize_options (dict, str or False): Keyword arguments that govern the
            numerical optimization. Valid entries are all arguments of
            :func:`~estimagic.optimization.optimize.minimize` except for those that are
            passed explicilty to ``estimate_ml``. If you pass False as optimize_options
            you signal that ``params`` are already the optimal parameters and no
            numerical optimization is needed. If you pass a str as optimize_options it
            is used as the ``algorithm`` option.
        lower_bounds (pytree): A pytree with the same structure as params with lower
            bounds for the parameters. Can be ``-np.inf`` for parameters with no lower
            bound.
        upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with
            no upper bound.
        constraints (list, dict): List with constraint dictionaries or single dict.
            See :ref:`constraints`.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.
            - "fast_logging": A boolean that determines if "unsafe" settings are used
            to speed up write processes to the database. This should only be used for
            very short running criterion functions where the main purpose of the log
            is a real-time dashboard and it would not be catastrophic to get a
            corrupted database in case of a sudden system shutdown. If one evaluation
            of the criterion function (and gradient if applicable) takes more than
            100 ms, the logging overhead is negligible.
            - "if_table_exists": (str) One of "extend", "replace", "raise". What to
            do if the tables we want to write to already exist. Default "extend".
            - "if_database_exists": (str): One of "extend", "replace", "raise". What to
            do if the database we want to write to already exists. Default "extend".
        loglike_kwargs (dict): Additional keyword arguments for loglike.
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives for the calculation of standard errors. See
            :ref:`first_derivative` for details.
        jacobian (callable or None): A function that takes ``params`` and potentially
            other keyword arguments and returns the jacobian of loglike["contributions"]
            with respect to the params. Note that you only need to pass a Jacobian
            function if you have a closed form Jacobian. If you pass None, a numerical
            Jacobian will be calculated.
        jacobian_kwargs (dict): Additional keyword arguments for the Jacobian function.
        hessian (callable or None or False): A function that takes ``params`` and
            potentially other keyword arguments and returns the Hessian of
            loglike["value"] with respect to the params.  If you pass None, a numerical
            Hessian will be calculated. If you pass ``False``, you signal that no
            Hessian should be calculated. Thus, no result that requires the Hessian will
            be calculated.
        hessian_kwargs (dict): Additional keyword arguments for the Hessian function.
        design_info (pandas.DataFrame): DataFrame with one row per observation that
            contains some or all of the variables "psu" (primary sampling unit),
            "strata" and "fpc" (finite population corrector). See
            :ref:`robust_likelihood_inference` for details.

    Returns:
        LikelihoodResult: A LikelihoodResult object.

    """
    # ==================================================================================
    # Check and process inputs
    # ==================================================================================
    is_optimized = optimize_options is False

    if not is_optimized:
        if isinstance(optimize_options, str):
            optimize_options = {"algorithm": optimize_options}

        check_optimization_options(
            optimize_options,
            usage="estimate_ml",
            algorithm_mandatory=True,
        )

    jac_case = get_derivative_case(jacobian)
    hess_case = get_derivative_case(hessian)

    check_numdiff_options(numdiff_options, "estimate_ml")
    numdiff_options = {} if numdiff_options in (None,
                                                False) else numdiff_options
    loglike_kwargs = {} if loglike_kwargs is None else loglike_kwargs
    constraints = [] if constraints is None else constraints
    jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs
    hessian_kwargs = {} if hessian_kwargs is None else hessian_kwargs

    # ==================================================================================
    # Calculate estimates via maximization (if necessary)
    # ==================================================================================

    if is_optimized:
        estimates = params
        opt_res = None
    else:
        opt_res = maximize(
            criterion=loglike,
            criterion_kwargs=loglike_kwargs,
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            constraints=constraints,
            logging=logging,
            log_options=log_options,
            **optimize_options,
        )
        estimates = opt_res.params

    # ==================================================================================
    # Do first function evaluations at estimated parameters
    # ==================================================================================

    try:
        loglike_eval = loglike(estimates, **loglike_kwargs)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating loglike at estimated params."
        raise InvalidFunctionError(msg) from e

    if callable(jacobian):
        try:
            jacobian_eval = jacobian(estimates, **jacobian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form jacobian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        jacobian_eval = None

    if callable(hessian):
        try:
            hessian_eval = hessian(estimates, **hessian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form hessian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        hessian_eval = None

    # ==================================================================================
    # Get the converter for params and function outputs
    # ==================================================================================

    converter, internal_estimates = get_converter(
        params=estimates,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=loglike_eval,
        primary_key="contributions",
        scaling=False,
        scaling_options=None,
        derivative_eval=jacobian_eval,
    )

    # ==================================================================================
    # Calculate internal jacobian
    # ==================================================================================

    if jac_case == "closed-form":
        int_jac = converter.derivative_to_internal(jacobian_eval,
                                                   internal_estimates.values)
    elif jac_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["contributions"]
            out = converter.func_to_internal(loglike_eval)
            return out

        jac_res = first_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )

        int_jac = jac_res["derivative"]
    else:
        int_jac = None

    if constraints in [None, []
                       ] and jacobian_eval is None and int_jac is not None:
        loglike_contribs = loglike_eval
        if isinstance(loglike_contribs,
                      dict) and "contributions" in loglike_contribs:
            loglike_contribs = loglike_contribs["contributions"]

        jacobian_eval = matrix_to_block_tree(
            int_jac,
            outer_tree=loglike_contribs,
            inner_tree=estimates,
        )

    if jacobian_eval is None:
        _no_jac_reason = (
            "no closed form jacobian was provided and there are constraints")
    else:
        _no_jac_reason = None
    # ==================================================================================
    # Calculate internal Hessian
    # ==================================================================================

    if hess_case == "skip":
        int_hess = None
    elif hess_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["value"]
            out = converter.func_to_internal(loglike_eval)
            return out

        hess_res = second_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )
        int_hess = hess_res["derivative"]
    elif hess_case == "closed-form" and constraints:
        raise NotImplementedError(
            "Closed-form Hessians are not yet compatible with constraints.")
    elif hess_case == "closed-form":
        int_hess = block_tree_to_matrix(
            hessian_eval,
            outer_tree=params,
            inner_tree=params,
        )
    else:
        raise ValueError()

    if constraints in [None, []
                       ] and hessian_eval is None and int_hess is not None:
        hessian_eval = matrix_to_block_tree(
            int_hess,
            outer_tree=params,
            inner_tree=params,
        )

    if hessian_eval is None:
        if hess_case == "skip":
            _no_hess_reason = "the hessian calculation was explicitly skipped."
        else:
            _no_hess_reason = (
                "no closed form hessian was provided and there are constraints"
            )
    else:
        _no_hess_reason = None

    # ==================================================================================
    # create a LikelihoodResult object
    # ==================================================================================

    free_estimates = calculate_free_estimates(estimates, internal_estimates)

    res = LikelihoodResult(
        _params=estimates,
        _converter=converter,
        _optimize_result=opt_res,
        _jacobian=jacobian_eval,
        _no_jacobian_reason=_no_jac_reason,
        _hessian=hessian_eval,
        _no_hessian_reason=_no_hess_reason,
        _internal_jacobian=int_jac,
        _internal_hessian=int_hess,
        _design_info=design_info,
        _internal_estimates=internal_estimates,
        _free_estimates=free_estimates,
        _has_constraints=constraints not in [None, []],
    )

    return res
示例#6
0
def estimate_msm(
    simulate_moments,
    empirical_moments,
    moments_cov,
    params,
    optimize_options,
    *,
    lower_bounds=None,
    upper_bounds=None,
    constraints=None,
    logging=False,
    log_options=None,
    simulate_moments_kwargs=None,
    weights="diagonal",
    numdiff_options=None,
    jacobian=None,
    jacobian_kwargs=None,
):
    """Do a method of simulated moments or indirect inference estimation.

    This is a high level interface for our lower level functions for minimization,
    numerical differentiation, inference and sensitivity analysis. It does the full
    workflow for MSM or indirect inference estimation with just one function call.

    While we have good defaults, you can still configure each aspect of each steps
    vial the optional arguments of this functions. If you find it easier to do the
    minimization separately, you can do so and just provide the optimal parameters as
    ``params`` and set ``optimize_options=False``.

    Args:
        simulate_moments (callable): Function that takes params and potentially other
            keyword arguments and returns a pytree with simulated moments. If the
            function returns a dict containing the key ``"simulated_moments"`` we only
            use the value corresponding to that key. Other entries are stored in the
            log database if you use logging.

        empirical_moments (pandas.Series): A pytree with the same structure as the
            result of ``simulate_moments``.
        moments_cov (pandas.DataFrame): A block-pytree containing the covariance
            matrix of the empirical moments. This is typically calculated with
            our ``get_moments_cov`` function.
        params (pytree): A pytree containing the estimated or start parameters of the
            model. If the supplied parameters are estimated parameters, set
            optimize_options to False. Pytrees can be a numpy array, a pandas Series, a
            DataFrame with "value" column, a float and any kind of (nested) dictionary
            or list containing these elements. See :ref:`params` for examples.
        optimize_options (dict, str or False): Keyword arguments that govern the
            numerical optimization. Valid entries are all arguments of
            :func:`~estimagic.optimization.optimize.minimize` except for those that can
            be passed explicitly to ``estimate_msm``.  If you pass False as
            ``optimize_options`` you signal that ``params`` are already
            the optimal parameters and no numerical optimization is needed. If you pass
            a str as optimize_options it is used as the ``algorithm`` option.
        lower_bounds (pytree): A pytree with the same structure as params with lower
            bounds for the parameters. Can be ``-np.inf`` for parameters with no lower
            bound.
        upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with
            no upper bound.
        simulate_moments_kwargs (dict): Additional keyword arguments for
            ``simulate_moments``.
        weights (str): One of "diagonal" (default), "identity" or "optimal".
            Note that "optimal" refers to the asymptotically optimal weighting matrix
            and is often not a good choice due to large finite sample bias.
        constraints (list, dict): List with constraint dictionaries or single dict.
            See :ref:`constraints`.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.

            - "fast_logging" (bool):
                A boolean that determines if "unsafe" settings are used to speed up
                write processes to the database. This should only be used for very short
                running criterion functions where the main purpose of the log is a
                real-time dashboard and it would not be catastrophic to get a corrupted
                database in case of a sudden system shutdown. If one evaluation of the
                criterion function (and gradient if applicable) takes more than 100 ms,
                the logging overhead is negligible.
            - "if_table_exists" (str):
                One of "extend", "replace", "raise". What to do if the tables we want to
                write to already exist. Default "extend".
            - "if_database_exists" (str):
                One of "extend", "replace", "raise". What to do if the database we want
                to write to already exists. Default "extend".
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives for the calculation of standard errors. See
            :ref:`first_derivative` for details. Note that by default we increase the
            step_size by a factor of 2 compared to the rule of thumb for optimal
            step sizes. This is because many msm criterion functions are slightly noisy.
        jacobian (callable): A function that take ``params`` and
            potentially other keyword arguments and returns the jacobian of
            simulate_moments with respect to the params.
        jacobian_kwargs (dict): Additional keyword arguments for the jacobian function.

        Returns:
            dict: The estimated parameters, standard errors and sensitivity measures
                and covariance matrix of the parameters.

    """
    # ==================================================================================
    # Check and process inputs
    # ==================================================================================

    if weights not in ["diagonal", "optimal"]:
        raise NotImplementedError(
            "Custom weighting matrices are not yet implemented.")

    is_optimized = optimize_options is False

    if not is_optimized:
        if isinstance(optimize_options, str):
            optimize_options = {"algorithm": optimize_options}

        check_optimization_options(
            optimize_options,
            usage="estimate_msm",
            algorithm_mandatory=True,
        )

    jac_case = get_derivative_case(jacobian)

    check_numdiff_options(numdiff_options, "estimate_msm")

    numdiff_options = {} if numdiff_options in (
        None, False) else numdiff_options.copy()
    if "scaling_factor" not in numdiff_options:
        numdiff_options["scaling_factor"] = 2

    weights, internal_weights = get_weighting_matrix(
        moments_cov=moments_cov,
        method=weights,
        empirical_moments=empirical_moments,
        return_type="pytree_and_array",
    )

    internal_moments_cov = block_tree_to_matrix(
        moments_cov,
        outer_tree=empirical_moments,
        inner_tree=empirical_moments,
    )

    constraints = [] if constraints is None else constraints
    jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs
    simulate_moments_kwargs = ({} if simulate_moments_kwargs is None else
                               simulate_moments_kwargs)

    # ==================================================================================
    # Calculate estimates via minimization (if necessary)
    # ==================================================================================

    if is_optimized:
        estimates = params
        opt_res = None
    else:
        funcs = get_msm_optimization_functions(
            simulate_moments=simulate_moments,
            empirical_moments=empirical_moments,
            weights=weights,
            simulate_moments_kwargs=simulate_moments_kwargs,
            # Always pass None because we do not support closed form jacobians during
            # optimization yet. Otherwise we would get a NotImplementedError
            jacobian=None,
            jacobian_kwargs=jacobian_kwargs,
        )

        opt_res = minimize(
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            constraints=constraints,
            logging=logging,
            log_options=log_options,
            params=params,
            **funcs,  # contains the criterion func and possibly more
            **optimize_options,
        )

        estimates = opt_res.params

    # ==================================================================================
    # do first function evaluations
    # ==================================================================================

    try:
        sim_mom_eval = simulate_moments(estimates, **simulate_moments_kwargs)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating simulate_moments at estimated params."
        raise InvalidFunctionError(msg) from e

    if callable(jacobian):
        try:
            jacobian_eval = jacobian(estimates, **jacobian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating derivative at estimated params."
            raise InvalidFunctionError(msg) from e

    else:
        jacobian_eval = None

    # ==================================================================================
    # get converter for params and function outputs
    # ==================================================================================

    def helper(params):
        raw = simulate_moments(params, **simulate_moments_kwargs)
        if isinstance(raw, dict) and "simulated_moments" in raw:
            out = {"contributions": raw["simulated_moments"]}
        else:
            out = {"contributions": raw}
        return out

    if isinstance(sim_mom_eval, dict) and "simulated_moments" in sim_mom_eval:
        func_eval = {"contributions": sim_mom_eval["simulated_moments"]}
    else:
        func_eval = {"contributions": sim_mom_eval}

    converter, internal_estimates = get_converter(
        params=estimates,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=func_eval,
        primary_key="contributions",
        scaling=False,
        scaling_options=None,
        derivative_eval=jacobian_eval,
    )

    # ==================================================================================
    # Calculate internal jacobian
    # ==================================================================================

    if jac_case == "closed-form":
        x = converter.params_to_internal(estimates)
        int_jac = converter.derivative_to_internal(jacobian_eval, x)
    else:

        def func(x):
            p = converter.params_from_internal(x)
            sim_mom_eval = helper(p)
            out = converter.func_to_internal(sim_mom_eval)
            return out

        int_jac = first_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )["derivative"]

    # ==================================================================================
    # Calculate external jac (if no constraints and not closed form )
    # ==================================================================================

    if constraints in [None, []
                       ] and jacobian_eval is None and int_jac is not None:
        jacobian_eval = matrix_to_block_tree(
            int_jac,
            outer_tree=empirical_moments,
            inner_tree=estimates,
        )

    if jacobian_eval is None:
        _no_jac_reason = (
            "no closed form jacobian was provided and there are constraints")
    else:
        _no_jac_reason = None

    # ==================================================================================
    # Create MomentsResult
    # ==================================================================================

    free_estimates = calculate_free_estimates(estimates, internal_estimates)

    res = MomentsResult(
        _params=estimates,
        _weights=weights,
        _converter=converter,
        _internal_weights=internal_weights,
        _internal_moments_cov=internal_moments_cov,
        _internal_jacobian=int_jac,
        _jacobian=jacobian_eval,
        _no_jacobian_reason=_no_jac_reason,
        _empirical_moments=empirical_moments,
        _internal_estimates=internal_estimates,
        _free_estimates=free_estimates,
        _has_constraints=constraints not in [None, []],
    )
    return res
示例#7
0
def _optimize(
    direction,
    criterion,
    params,
    algorithm,
    *,
    lower_bounds=None,
    upper_bounds=None,
    soft_lower_bounds=None,
    soft_upper_bounds=None,
    criterion_kwargs,
    constraints,
    algo_options,
    derivative,
    derivative_kwargs,
    criterion_and_derivative,
    criterion_and_derivative_kwargs,
    numdiff_options,
    logging,
    log_options,
    error_handling,
    error_penalty,
    scaling,
    scaling_options,
    multistart,
    multistart_options,
    collect_history,
    skip_checks,
):
    """Minimize or maximize criterion using algorithm subject to constraints.

    Arguments are the same as in maximize and minimize, with an additional direction
    argument. Direction is a string that can take the values "maximize" and "minimize".

    Returns are the same as in maximize and minimize.

    """
    # ==================================================================================
    # Set default values and check options
    # ==================================================================================
    criterion_kwargs = _setdefault(criterion_kwargs, {})
    constraints = _setdefault(constraints, [])
    algo_options = _setdefault(algo_options, {})
    derivative_kwargs = _setdefault(derivative_kwargs, {})
    criterion_and_derivative_kwargs = _setdefault(criterion_and_derivative_kwargs, {})
    numdiff_options = _setdefault(numdiff_options, {})
    log_options = _setdefault(log_options, {})
    scaling_options = _setdefault(scaling_options, {})
    error_penalty = _setdefault(error_penalty, {})
    multistart_options = _setdefault(multistart_options, {})
    if logging:
        logging = Path(logging)

    if not skip_checks:
        check_optimize_kwargs(
            direction=direction,
            criterion=criterion,
            criterion_kwargs=criterion_kwargs,
            params=params,
            algorithm=algorithm,
            constraints=constraints,
            algo_options=algo_options,
            derivative=derivative,
            derivative_kwargs=derivative_kwargs,
            criterion_and_derivative=criterion_and_derivative,
            criterion_and_derivative_kwargs=criterion_and_derivative_kwargs,
            numdiff_options=numdiff_options,
            logging=logging,
            log_options=log_options,
            error_handling=error_handling,
            error_penalty=error_penalty,
            scaling=scaling,
            scaling_options=scaling_options,
            multistart=multistart,
            multistart_options=multistart_options,
        )
    # ==================================================================================
    # Get the algorithm info
    # ==================================================================================
    raw_algo, algo_info = process_user_algorithm(algorithm)

    algo_kwargs = set(algo_info.arguments)

    if algo_info.primary_criterion_entry == "root_contributions":
        if direction == "maximize":
            msg = (
                "Optimizers that exploit a least squares structure like {} can only be "
                "used for minimization."
            )
            raise ValueError(msg.format(algo_info.name))

    # ==================================================================================
    # Split constraints into nonlinear and reparametrization parts
    # ==================================================================================
    if isinstance(constraints, dict):
        constraints = [constraints]

    nonlinear_constraints = [c for c in constraints if c["type"] == "nonlinear"]

    if nonlinear_constraints and "nonlinear_constraints" not in algo_kwargs:
        raise ValueError(
            f"Algorithm {algo_info.name} does not support nonlinear constraints."
        )

    # the following constraints will be handled via reparametrization
    constraints = [c for c in constraints if c["type"] != "nonlinear"]

    # ==================================================================================
    # prepare logging
    # ==================================================================================
    if logging:
        problem_data = {
            "direction": direction,
            # "criterion"-criterion,
            "criterion_kwargs": criterion_kwargs,
            "algorithm": algorithm,
            "constraints": constraints,
            "algo_options": algo_options,
            # "derivative"-derivative,
            "derivative_kwargs": derivative_kwargs,
            # "criterion_and_derivative"-criterion_and_derivative,
            "criterion_and_derivative_kwargs": criterion_and_derivative_kwargs,
            "numdiff_options": numdiff_options,
            "log_options": log_options,
            "error_handling": error_handling,
            "error_penalty": error_penalty,
            "params": params,
        }

    # ==================================================================================
    # partial the kwargs into corresponding functions
    # ==================================================================================
    criterion = process_func_of_params(
        func=criterion,
        kwargs=criterion_kwargs,
        name="criterion",
        skip_checks=skip_checks,
    )
    if isinstance(derivative, dict):
        derivative = derivative.get(algo_info.primary_criterion_entry)
    if derivative is not None:
        derivative = process_func_of_params(
            func=derivative,
            kwargs=derivative_kwargs,
            name="derivative",
            skip_checks=skip_checks,
        )
    if isinstance(criterion_and_derivative, dict):
        criterion_and_derivative = criterion_and_derivative.get(
            algo_info.primary_criterion_entry
        )

    if criterion_and_derivative is not None:
        criterion_and_derivative = process_func_of_params(
            func=criterion_and_derivative,
            kwargs=criterion_and_derivative_kwargs,
            name="criterion_and_derivative",
            skip_checks=skip_checks,
        )

    # ==================================================================================
    # Do first evaluation of user provided functions
    # ==================================================================================
    try:
        first_crit_eval = criterion(params)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating criterion at start params."
        raise InvalidFunctionError(msg) from e

    # do first derivative evaluation (if given)
    if derivative is not None:
        try:
            first_deriv_eval = derivative(params)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating derivative at start params."
            raise InvalidFunctionError(msg) from e

    if criterion_and_derivative is not None:
        try:
            first_crit_and_deriv_eval = criterion_and_derivative(params)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating criterion_and_derivative at start params."
            raise InvalidFunctionError(msg) from e

    if derivative is not None:
        used_deriv = first_deriv_eval
    elif criterion_and_derivative is not None:
        used_deriv = first_crit_and_deriv_eval[1]
    else:
        used_deriv = None

    # ==================================================================================
    # Get the converter (for tree flattening, constraints and scaling)
    # ==================================================================================
    converter, internal_params = get_converter(
        params=params,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=first_crit_eval,
        primary_key=algo_info.primary_criterion_entry,
        scaling=scaling,
        scaling_options=scaling_options,
        derivative_eval=used_deriv,
        soft_lower_bounds=soft_lower_bounds,
        soft_upper_bounds=soft_upper_bounds,
        add_soft_bounds=multistart,
    )

    # ==================================================================================
    # initialize the log database
    # ==================================================================================
    if logging:
        problem_data["free_mask"] = internal_params.free_mask
        database = _create_and_initialize_database(logging, log_options, problem_data)
        db_kwargs = {
            "database": database,
            "path": logging,
            "fast_logging": log_options.get("fast_logging", False),
        }
    else:
        db_kwargs = {"database": None, "path": None, "fast_logging": False}

    # ==================================================================================
    # Do some things that require internal parameters or bounds
    # ==================================================================================

    if converter.has_transforming_constraints and multistart:
        raise NotImplementedError(
            "multistart optimizations are not yet compatible with transforming "
            "constraints."
        )

    numdiff_options = _fill_numdiff_options_with_defaults(
        numdiff_options=numdiff_options,
        lower_bounds=internal_params.lower_bounds,
        upper_bounds=internal_params.upper_bounds,
    )

    # get error penalty function
    error_penalty_func = get_error_penalty_function(
        error_handling=error_handling,
        start_x=internal_params.values,
        start_criterion=converter.func_to_internal(first_crit_eval),
        error_penalty=error_penalty,
        primary_key=algo_info.primary_criterion_entry,
        direction=direction,
    )

    # process nonlinear constraints:
    internal_constraints = process_nonlinear_constraints(
        nonlinear_constraints=nonlinear_constraints,
        params=params,
        converter=converter,
        numdiff_options=numdiff_options,
        skip_checks=skip_checks,
    )

    x = internal_params.values
    # ==================================================================================
    # get the internal algorithm
    # ==================================================================================
    internal_algorithm = get_final_algorithm(
        raw_algorithm=raw_algo,
        algo_info=algo_info,
        valid_kwargs=algo_kwargs,
        lower_bounds=internal_params.lower_bounds,
        upper_bounds=internal_params.upper_bounds,
        nonlinear_constraints=internal_constraints,
        algo_options=algo_options,
        logging=logging,
        db_kwargs=db_kwargs,
        collect_history=collect_history,
    )
    # ==================================================================================
    # partial arguments into the internal_criterion_and_derivative_template
    # ==================================================================================
    to_partial = {
        "direction": direction,
        "criterion": criterion,
        "converter": converter,
        "derivative": derivative,
        "criterion_and_derivative": criterion_and_derivative,
        "numdiff_options": numdiff_options,
        "logging": logging,
        "db_kwargs": db_kwargs,
        "algo_info": algo_info,
        "error_handling": error_handling,
        "error_penalty_func": error_penalty_func,
    }

    internal_criterion_and_derivative = functools.partial(
        internal_criterion_and_derivative_template,
        **to_partial,
    )

    problem_functions = {}
    for task in ["criterion", "derivative", "criterion_and_derivative"]:
        if task in algo_kwargs:
            problem_functions[task] = functools.partial(
                internal_criterion_and_derivative,
                task=task,
            )

    # ==================================================================================
    # Do actual optimization
    # ==================================================================================
    if not multistart:

        steps = [{"type": "optimization", "name": "optimization"}]

        step_ids = log_scheduled_steps_and_get_ids(
            steps=steps,
            logging=logging,
            db_kwargs=db_kwargs,
        )

        raw_res = internal_algorithm(**problem_functions, x=x, step_id=step_ids[0])
    else:

        multistart_options = _fill_multistart_options_with_defaults(
            options=multistart_options,
            params=params,
            x=x,
            params_to_internal=converter.params_to_internal,
        )

        raw_res = run_multistart_optimization(
            local_algorithm=internal_algorithm,
            primary_key=algo_info.primary_criterion_entry,
            problem_functions=problem_functions,
            x=x,
            lower_sampling_bounds=internal_params.soft_lower_bounds,
            upper_sampling_bounds=internal_params.soft_upper_bounds,
            options=multistart_options,
            logging=logging,
            db_kwargs=db_kwargs,
            error_handling=error_handling,
        )

    # ==================================================================================
    # Process the result
    # ==================================================================================

    _scalar_start_criterion = aggregate_func_output_to_value(
        converter.func_to_internal(first_crit_eval),
        algo_info.primary_criterion_entry,
    )

    fixed_result_kwargs = {
        "start_criterion": _scalar_start_criterion,
        "start_params": params,
        "algorithm": algo_info.name,
        "direction": direction,
        "n_free": internal_params.free_mask.sum(),
    }

    res = process_internal_optimizer_result(
        raw_res,
        converter=converter,
        primary_key=algo_info.primary_criterion_entry,
        fixed_kwargs=fixed_result_kwargs,
        skip_checks=skip_checks,
    )

    return res
示例#8
0
def _check_validity_and_return_evaluation(c, params, skip_checks):
    """Check that nonlinear constraints are valid.

    Returns:
        constaint_eval: Evaluation of constraint at params, if skip_checks if False,
            else None.

    """
    # ==================================================================================
    # check functions
    # ==================================================================================

    if "func" not in c:
        raise InvalidConstraintError(
            "Constraint needs to have entry 'fun', representing the constraint "
            "function.")
    if not callable(c["func"]):
        raise InvalidConstraintError(
            "Entry 'fun' in nonlinear constraints has be callable.")

    if "derivative" in c and not callable(c["derivative"]):
        raise InvalidConstraintError(
            "Entry 'jac' in nonlinear constraints has be callable.")

    # ==================================================================================
    # check bounds
    # ==================================================================================

    is_equality_constraint = "value" in c

    if is_equality_constraint:
        if "lower_bounds" in c or "upper_bounds" in c:
            raise InvalidConstraintError(
                "Only one of 'value' or ('lower_bounds', 'upper_bounds') can be "
                "passed to a nonlinear constraint.")

    if not is_equality_constraint:
        if "lower_bounds" not in c and "upper_bounds" not in c:
            raise InvalidConstraintError(
                "For inequality constraint at least one of ('lower_bounds', "
                "'upper_bounds') has to be passed to the nonlinear constraint."
            )

    if "lower_bounds" in c and "upper_bounds" in c:
        if not np.all(
                np.array(c["lower_bounds"]) <= np.array(c["upper_bounds"])):
            raise InvalidConstraintError(
                "If lower bounds need to less than or equal to upper bounds.")

    # ==================================================================================
    # check selector
    # ==================================================================================

    if "selector" in c:
        if not callable(c["selector"]):
            raise InvalidConstraintError(
                f"'selector' entry needs to be callable in constraint {c}.")
        else:
            try:
                c["selector"](params)
            except Exception:
                raise InvalidFunctionError(
                    "Error when calling 'selector' function on params in constraint "
                    f" {c}")

    elif "loc" in c:
        if not isinstance(params, (pd.Series, pd.DataFrame)):
            raise InvalidConstraintError(
                "params needs to be pd.Series or pd.DataFrame to use 'loc' selector in "
                f"in consrtaint {c}.")
        try:
            params.loc[c["loc"]]
        except (KeyError, IndexError):
            raise InvalidConstraintError("'loc' string is invalid.")

    elif "query" in c:
        if not isinstance(params, pd.DataFrame):
            raise InvalidConstraintError(
                "params needs to be pd.DataFrame to use 'query' selector in "
                f"constraints {c}.")
        try:
            params.query(c["query"])
        except Exception:
            raise InvalidConstraintError(
                f"'query' string is invalid in constraint {c}.")

    # ==================================================================================
    # check that constraints can be evaluated
    # ==================================================================================

    constraint_eval = None

    if not skip_checks:

        selector = _process_selector(c)

        try:
            constraint_eval = c["func"](selector(params))
        except Exception:
            raise InvalidFunctionError(
                f"Error when evaluating function of constraint {c}.")

    return constraint_eval