Exemplo n.º 1
0
def calculate_scaling_factor_and_offset(
    params,
    constraints,
    criterion,
    method="start_values",
    clipping_value=0.1,
    magnitude=1,
    numdiff_options=None,
    processed_params=None,
    processed_constraints=None,
):
    numdiff_options = {} if numdiff_options is None else numdiff_options
    to_internal, from_internal = get_reparametrize_functions(
        params=params,
        constraints=constraints,
        processed_params=processed_params,
        processed_constraints=processed_constraints,
    )

    x = to_internal(params["value"].to_numpy())

    if method in ("bounds", "gradient"):
        lower_bounds, upper_bounds = get_internal_bounds(
            params, constraints, processed_params=processed_params)

    if method == "start_values":
        raw_factor = np.clip(np.abs(x), clipping_value, np.inf)
        scaling_offset = None
    elif method == "bounds":
        raw_factor = upper_bounds - lower_bounds
        scaling_offset = lower_bounds
    elif method == "gradient":
        default_numdiff_options = {
            "scaling_factor": 100,
            "lower_bounds": lower_bounds,
            "upper_bounds": upper_bounds,
            "error_handling": "raise",
        }

        numdiff_options = {**default_numdiff_options, **numdiff_options}

        def func(x):
            p = params.copy(deep=True)
            p["value"] = from_internal(x)
            crit = criterion(p)
            if isinstance(crit, dict):
                crit = crit["value"]
            return crit

        gradient = first_derivative(func, x, **numdiff_options)["derivative"]

        raw_factor = np.clip(np.abs(gradient), clipping_value, np.inf)
        scaling_offset = None

    scaling_factor = raw_factor / magnitude

    return scaling_factor, scaling_offset
Exemplo n.º 2
0
def get_internal_first_derivative(
    func, params, constraints=None, func_kwargs=None, numdiff_options=None
):
    """Get the first_derivative of func with respect to internal parameters.

    If there are no constraints, we simply call the first_derivative function.

    Args:
        func (callable): Function to take the derivative of.
        params (pandas.DataFrame): Data frame with external parameters. See
            :ref:`params`.
        constraints (list): Constraints that define how to convert between internal
            and external parameters.
        func_kwargs (dict): Additional keyword arguments for func.
        numdiff_options (dict): Additional options for first_derivative.

    Returns:
        dict: See ``first_derivative`` for details. The only difference is that the
            the "derivative" entry is always a numpy array instead of a DataFrame

    """
    numdiff_options = {} if numdiff_options is None else numdiff_options
    func_kwargs = {} if func_kwargs is None else func_kwargs
    _func = functools.partial(func, **func_kwargs)

    if constraints is None:
        out = first_derivative(
            func=_func,
            params=params,
            **numdiff_options,
        )
        out["has_transforming_constraints"] = False
    else:

        lower_bounds, upper_bounds = get_internal_bounds(params, constraints)

        _internal_func = numpy_interface(
            func=_func, params=params, constraints=constraints
        )

        _to_internal, _ = get_reparametrize_functions(params, constraints)

        _x = _to_internal(params)

        out = first_derivative(
            _internal_func,
            _x,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            **numdiff_options,
        )

        if isinstance(out["derivative"], (pd.DataFrame, pd.Series)):
            out["derivative"] = out["derivative"].to_numpy()

    return out
Exemplo n.º 3
0
def test_scaling_cancels_itself():
    params = pd.DataFrame()
    params["value"] = np.arange(10) + 10
    params["lower_bound"] = np.arange(10)
    params["upper_bound"] = 25

    to_internal, from_internal = get_reparametrize_functions(
        params=params,
        constraints=[],
        scaling_factor=np.arange(10) + 1,
        scaling_offset=np.ones(10),
    )

    internal = to_internal(params["value"].to_numpy())
    external = from_internal(internal)

    aaae(external, params["value"].to_numpy())
Exemplo n.º 4
0
def test_get_parametrize_functions_with_back_and_forth_conversion():
    params = pd.DataFrame()
    params["value"] = np.arange(10)

    constraints = [{"loc": [2, 3, 4], "type": "fixed"}]
    scaling_factor = np.full(7, 2)
    scaling_offset = np.full(7, -1)

    to_internal, from_internal = get_reparametrize_functions(
        params=params,
        constraints=constraints,
        scaling_factor=scaling_factor,
        scaling_offset=scaling_offset,
    )

    internal = to_internal(params["value"].to_numpy())
    external = from_internal(internal)

    aaae(external, params["value"].to_numpy())
Exemplo n.º 5
0
def _optimize(
    direction,
    criterion,
    params,
    algorithm,
    *,
    criterion_kwargs,
    constraints,
    algo_options,
    derivative,
    derivative_kwargs,
    criterion_and_derivative,
    criterion_and_derivative_kwargs,
    numdiff_options,
    logging,
    log_options,
    error_handling,
    error_penalty,
    cache_size,
    scaling,
    scaling_options,
    multistart,
    multistart_options,
):
    """Minimize or maximize criterion using algorithm subject to constraints.

    Arguments are the same as in maximize and minimize, with an additional direction
    argument. Direction is a string that can take the values "maximize" and "minimize".

    Returns are the same as in maximize and minimize.

    """
    criterion_kwargs = _setdefault(criterion_kwargs, {})
    constraints = _setdefault(constraints, [])
    algo_options = _setdefault(algo_options, {})
    derivative_kwargs = _setdefault(derivative_kwargs, {})
    criterion_and_derivative_kwargs = _setdefault(
        criterion_and_derivative_kwargs, {})
    numdiff_options = _setdefault(numdiff_options, {})
    log_options = _setdefault(log_options, {})
    scaling_options = _setdefault(scaling_options, {})
    error_penalty = _setdefault(error_penalty, {})
    multistart_options = _setdefault(multistart_options, {})
    if logging:
        logging = Path(logging)

    check_optimize_kwargs(
        direction=direction,
        criterion=criterion,
        criterion_kwargs=criterion_kwargs,
        params=params,
        algorithm=algorithm,
        constraints=constraints,
        algo_options=algo_options,
        derivative=derivative,
        derivative_kwargs=derivative_kwargs,
        criterion_and_derivative=criterion_and_derivative,
        criterion_and_derivative_kwargs=criterion_and_derivative_kwargs,
        numdiff_options=numdiff_options,
        logging=logging,
        log_options=log_options,
        error_handling=error_handling,
        error_penalty=error_penalty,
        cache_size=cache_size,
        scaling=scaling,
        scaling_options=scaling_options,
        multistart=multistart,
        multistart_options=multistart_options,
    )

    # store some arguments in a dictionary to save them in the database later
    if logging:
        problem_data = {
            "direction": direction,
            # "criterion"-criterion,
            "criterion_kwargs": criterion_kwargs,
            "algorithm": algorithm,
            "constraints": constraints,
            "algo_options": algo_options,
            # "derivative"-derivative,
            "derivative_kwargs": derivative_kwargs,
            # "criterion_and_derivative"-criterion_and_derivative,
            "criterion_and_derivative_kwargs": criterion_and_derivative_kwargs,
            "numdiff_options": numdiff_options,
            "log_options": log_options,
            "error_handling": error_handling,
            "error_penalty": error_penalty,
            "cache_size": int(cache_size),
        }

    # partial the kwargs into corresponding functions
    criterion = functools.partial(criterion, **criterion_kwargs)
    if derivative is not None:
        derivative = functools.partial(derivative, **derivative_kwargs)
    if criterion_and_derivative is not None:
        criterion_and_derivative = functools.partial(
            criterion_and_derivative, **criterion_and_derivative_kwargs)

    # process params and constraints
    params = add_default_bounds_to_params(params)
    for col in ["value", "lower_bound", "upper_bound"]:
        params[col] = params[col].astype(float)
    check_params_are_valid(params)

    # get processed params and constraints
    if constraints:
        pc, pp = process_constraints(constraints, params)
    else:
        pc, pp = None, None

    if pc and multistart:
        types = {constr["type"] for constr in pc}
        raise NotImplementedError(
            "multistart optimizations are not yet compatible with transforming "
            f"constraints. Your transforming constraints are of type {types}.")

    # calculate scaling factor and offset and redo params and constraint processing
    if scaling:
        scaling_factor, scaling_offset = calculate_scaling_factor_and_offset(
            params=params,
            constraints=constraints,
            criterion=criterion,
            **scaling_options,
            processed_params=pp,
            processed_constraints=pc,
        )
        pc, pp = process_constraints(
            constraints=constraints,
            params=params,
            scaling_factor=scaling_factor,
            scaling_offset=scaling_offset,
        )
    else:
        scaling_factor, scaling_offset = None, None

    if logging:
        # name and group column are needed in the dashboard but could lead to problems
        # if present anywhere else
        params_with_name_and_group = _add_name_and_group_columns_to_params(
            params)
        problem_data["params"] = params_with_name_and_group

    params_to_internal, params_from_internal = get_reparametrize_functions(
        params=params,
        constraints=constraints,
        scaling_factor=scaling_factor,
        scaling_offset=scaling_offset,
        processed_params=pp,
        processed_constraints=pc,
    )
    # get internal parameters and bounds
    x = params_to_internal(params["value"].to_numpy())

    # this if condition reduces overhead in the no-constraints case
    if constraints in [None, []]:
        lower_bounds = params["lower_bound"].to_numpy()
        upper_bounds = params["upper_bound"].to_numpy()
    else:
        lower_bounds, upper_bounds = get_internal_bounds(
            params=params,
            constraints=constraints,
            scaling_factor=scaling_factor,
            scaling_offset=scaling_offset,
            processed_params=pp,
        )

    # get convert derivative
    convert_derivative = get_derivative_conversion_function(
        params=params,
        constraints=constraints,
        scaling_factor=scaling_factor,
        scaling_offset=scaling_offset,
        processed_params=pp,
        processed_constraints=pc,
    )

    # do first function evaluation
    first_eval = {
        "internal_params": x,
        "external_params": params,
        "output": criterion(params),
    }

    # fill numdiff_options with defaults
    numdiff_options = _fill_numdiff_options_with_defaults(
        numdiff_options, lower_bounds, upper_bounds)

    # create and initialize the database
    if logging:
        database = _create_and_initialize_database(logging, log_options,
                                                   first_eval, problem_data)
        db_kwargs = {
            "database": database,
            "path": logging,
            "fast_logging": log_options.get("fast_logging", False),
        }
    else:
        db_kwargs = {"database": None, "path": None, "fast_logging": False}

    # get the algorithm
    internal_algorithm = get_algorithm(
        algorithm=algorithm,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        algo_options=algo_options,
        logging=logging,
        db_kwargs=db_kwargs,
    )

    # set default error penalty
    error_penalty = _fill_error_penalty_with_defaults(error_penalty,
                                                      first_eval, direction)

    # create cache
    x_hash = hash_array(x)
    cache = {x_hash: {"criterion": first_eval["output"]}}

    # partial the internal_criterion_and_derivative_template
    always_partialled = {
        "direction": direction,
        "criterion": criterion,
        "params": params,
        "reparametrize_from_internal": params_from_internal,
        "convert_derivative": convert_derivative,
        "derivative": derivative,
        "criterion_and_derivative": criterion_and_derivative,
        "numdiff_options": numdiff_options,
        "logging": logging,
        "db_kwargs": db_kwargs,
        "first_criterion_evaluation": first_eval,
        "cache": cache,
        "cache_size": cache_size,
    }

    internal_criterion_and_derivative = functools.partial(
        internal_criterion_and_derivative_template,
        **always_partialled,
    )

    # do actual optimizations
    if not multistart:

        steps = [{"type": "optimization", "name": "optimization"}]

        step_ids = log_scheduled_steps_and_get_ids(
            steps=steps,
            logging=logging,
            db_kwargs=db_kwargs,
        )
        internal_criterion_and_derivative = functools.partial(
            internal_criterion_and_derivative,
            error_handling=error_handling,
            error_penalty=error_penalty,
        )
        raw_res = internal_algorithm(internal_criterion_and_derivative, x,
                                     step_ids[0])
    else:

        lower, upper = get_internal_sampling_bounds(params, constraints)

        multistart_options = _fill_multistart_options_with_defaults(
            options=multistart_options,
            params=params,
            x=x,
            params_to_internal=params_to_internal,
        )

        raw_res = run_multistart_optimization(
            local_algorithm=internal_algorithm,
            criterion_and_derivative=internal_criterion_and_derivative,
            x=x,
            lower_bounds=lower,
            upper_bounds=upper,
            options=multistart_options,
            logging=logging,
            db_kwargs=db_kwargs,
            error_handling=error_handling,
            error_penalty=error_penalty,
        )

    res = process_internal_optimizer_result(
        raw_res,
        direction=direction,
        params_from_internal=params_from_internal,
    )

    return res
Exemplo n.º 6
0

@pytest.mark.parametrize("case, number", to_test)
def test_reparametrize_to_internal(example_params, all_constraints, case, number):
    constraints = all_constraints[case]
    params = reduce_params(example_params, constraints)
    params["value"] = params[f"value{number}"]

    keep = params[f"internal_value{number}"].notnull()
    expected_internal_values = params[f"internal_value{number}"][keep]
    expected_internal_lower = params["internal_lower"]
    expected_internal_upper = params["internal_upper"]

    to_internal, _ = get_reparametrize_functions(
        params=params,
        constraints=constraints,
        scaling_factor=None,
        scaling_offset=None,
    )

    _, pp = process_constraints(constraints, params)

    calculated_internal_values_np = to_internal(pp["value"].to_numpy())
    calculated_internal_values_pd = to_internal(pp)

    calculated_internal_lower = pp["_internal_lower"]
    calculated_internal_upper = pp["_internal_upper"]

    aaae(calculated_internal_values_np, calculated_internal_values_pd)
    aaae(calculated_internal_values_np, expected_internal_values)
    aaae(calculated_internal_lower, expected_internal_lower)
    aaae(calculated_internal_upper, expected_internal_upper)
Exemplo n.º 7
0
def transform_covariance(
    params,
    internal_cov,
    constraints,
    n_samples,
    bounds_handling,
):
    """Transform the internal covariance matrix to an external one, given constraints.

    Args:
        params (pd.DataFrame): DataFrame where the "value" column contains estimated
            parameters of a likelihood model. See :ref:`params` for details.
        internal_cov (np.ndarray or pandas.DataFrame) with a covariance matrix of the
            internal parameter vector. For background information about internal and
            external params see :ref:`implementation_of_constraints`.
        constraints (list): List with constraint dictionaries.
            See .. _link: ../../docs/source/how_to_guides/how_to_use_constraints.ipynb
        n_samples (int): Number of samples used to transform the covariance matrix of
            the internal parameter vector into the covariance matrix of the external
            parameters.
        bounds_handling (str): One of "clip", "raise", "ignore". Determines how bounds
            are handled. If "clip", confidence intervals are clipped at the bounds.
            Standard errors are only adjusted if a sampling step is necessary due to
            additional constraints. If "raise" and any lower or upper bound is binding,
            we raise an error. If "ignore", boundary problems are simply ignored.

    Returns:
        pd.DataFrame: Quadratic DataFrame containing the covariance matrix of the free
            parameters. If parameters were fixed (explicitly or by other constraints),
            the index is a subset of params.index. The columns are the same as the
            index.

    """
    processed_constraints, processed_params = process_constraints(constraints, params)
    free_index = processed_params.query("_internal_free").index

    if isinstance(internal_cov, pd.DataFrame):
        internal_cov = internal_cov.to_numpy()

    if processed_constraints:
        _to_internal, _from_internal = get_reparametrize_functions(
            params=params, constraints=constraints
        )

        free = processed_params.loc[free_index]
        is_free = processed_params["_internal_free"].to_numpy()
        lower_bounds = free["_internal_lower"]
        upper_bounds = free["_internal_upper"]

        internal_mean = _to_internal(params)

        sample = np.random.multivariate_normal(
            mean=internal_mean,
            cov=internal_cov,
            size=n_samples,
        )
        transformed_free = []
        for params_vec in sample:
            if bounds_handling == "clip":
                params_vec = np.clip(params_vec, a_min=lower_bounds, a_max=upper_bounds)
            elif bounds_handling == "raise":
                if (params_vec < lower_bounds).any() or (
                    params_vec > upper_bounds
                ).any():
                    raise ValueError()

            transformed = _from_internal(internal=params_vec)
            transformed_free.append(transformed[is_free])

        free_cov = np.cov(
            np.array(transformed_free),
            rowvar=False,
        )

    else:
        free_cov = internal_cov

    res = pd.DataFrame(data=free_cov, columns=free_index, index=free_index)
    return res