示例#1
0
def test_processing_single_optim_with_non_standard_inputs(
        single_non_standard_inputs):
    kwargs = single_non_standard_inputs
    res = process_optimization_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == single_non_standard_inputs["constraints"]
示例#2
0
def test_processing_multi_optim_with_non_standard_inputs_same_constraints_for_all(
    multiple_non_standard_inputs, ):
    kwargs = multiple_non_standard_inputs
    res = process_optimization_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == kwargs["constraints"]
    assert res[1]["constraints"] == kwargs["constraints"]
示例#3
0
def test_processing_single_optim_with_all_standard_inputs():
    criterion = np.mean
    params = pd.DataFrame(np.ones(12).reshape(4, 3))
    algorithm = "scipy_L-BFGS-B"

    res = process_optimization_arguments(criterion, params, algorithm)

    check_single_argument_types(res[0])
示例#4
0
def test_processing_multiple_optim_with_all_standard_inputs():
    criterion = np.mean
    params = [
        pd.DataFrame(np.ones(12).reshape(4, 3)),
        pd.DataFrame(np.zeros(16).reshape(4, 4)),
    ]

    algorithms = ["scipy_L-BFGS-B", "pygmo_xnes"]

    res = process_optimization_arguments(criterion, params, algorithms)

    assert len(res) == 2
    check_single_argument_types(res[0])
    check_single_argument_types(res[1])
    assert res[0]["constraints"] == []
示例#5
0
def test_processing_multi_optim_with_non_standard_inputs_different_constraints(
    multiple_non_standard_inputs, ):
    kwargs = multiple_non_standard_inputs
    differing_constraints = [
        [{
            "loc": 1,
            "type": "fixed",
            "value": 4
        }],
        [{
            "loc": [2, 3],
            "type": "increasing"
        }],
    ]
    kwargs["constraints"] = differing_constraints

    res = process_optimization_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == differing_constraints[0]
    assert res[1]["constraints"] == differing_constraints[1]
示例#6
0
def minimize(
    criterion,
    params,
    algorithm,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    gradient_options=None,
    logging=DEFAULT_DATABASE_NAME,
    log_options=None,
    dashboard=False,
    db_options=None,
):
    """Minimize *criterion* using *algorithm* subject to *constraints* and bounds.

    Each argument except for ``general_options`` can also be replaced by a list of
    arguments in which case several optimizations are run in parallel. For this, either
    all arguments must be lists of the same length, or some arguments can be provided
    as single arguments in which case they are automatically broadcasted.

    Args:
        criterion (function or list of functions):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame or list of pd.DataFrames):
            See :ref:`params`.

        algorithm (str or list of strings):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict or list of dicts):
            additional keyword arguments for criterion

        constraints (list or list of lists):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict or list of dicts):
            algorithm specific configurations for the optimization

        gradient_options (dict):
            Options for the gradient function.

        logging (str or pathlib.Path): Path to an sqlite3 file which typically has the
            file extension ``.db``. If the file does not exist, it will be created. See
            :ref:`logging` for details.

        log_options (dict): Keyword arguments to influence the logging. See
            :ref:`logging` for details.

        dashboard (bool):
            whether to create and show a dashboard. See :ref:`dashboard` for details.

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function. See
                :ref:`dashboard` for details.

    """
    arguments = process_optimization_arguments(
        criterion=criterion,
        params=params,
        algorithm=algorithm,
        criterion_kwargs=criterion_kwargs,
        constraints=constraints,
        general_options=general_options,
        algo_options=algo_options,
        gradient=None,
        gradient_options=gradient_options,
        logging=logging,
        log_options=log_options,
        dashboard=dashboard,
        db_options=db_options,
    )

    if len(arguments) == 1:
        # Run only one optimization
        arguments = arguments[0]
        results = _single_minimize(**arguments)
    else:
        # Run multiple optimizations
        if dashboard:
            raise NotImplementedError(
                "Dashboard cannot be used for multiple optimizations, yet.")

        # set up multiprocessing
        if "n_cores" not in arguments[0]["general_options"]:
            raise ValueError(
                "n_cores need to be specified in general_options" +
                " if multiple optimizations should be run.")
        n_cores = arguments[0]["general_options"]["n_cores"]

        results = Parallel(n_jobs=n_cores)(
            delayed(_one_argument_single_minimize)(argument)
            for argument in arguments)

    return results
示例#7
0
def maximize_log_likelihood(
    log_like_obs,
    params,
    algorithm,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    gradient_options=None,
    logging=DEFAULT_DATABASE_NAME,
    log_options=None,
    dashboard=False,
    db_options=None,
):
    """Estimate parameters via maximum likelihood.

    This function provides a convenient interface for estimating models via maximum
    likelihood. In the future, it will also calculate standard errors for the solution.

    The criterion function ``log_like_obs`` has to return an array of log likelihoods at
    the first position, not the mean log likelihood. The array is internally aggregated
    to whatever output is needed. For example, the mean is used for maximization, the
    sum for standard error calculations.

    The second return can be a :class:`pandas.DataFrame` in the `tidy data format`_ to
    display the distribution of contributions for subgroups via the comparison plot in
    the future.

    The limitation to log likelihoods instead of likelihoods may seem unnecessarily
    restrictive, but it is preferred for two reasons.

    1. Optimization methods which rely on gradients generally work better optimizing the
       log transformation. See `1`_ for a simplified example.

    2. Using the log transformation to convert products of probabilities to sums of log
       probabilities is numerically more stable as it prevents over- and underflows. See
       `2`_ for an example.

    Args:
        log_like_obs (callable or list of callables):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns an array of log likelihood contributions as the first
            return.

        params (pd.DataFrame or list of pd.DataFrames):
            See :ref:`params`.

        algorithm (str or list of strings):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict or list of dicts):
            additional keyword arguments for criterion

        constraints (list or list of lists):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict or list of dicts):
            algorithm specific configurations for the optimization

        gradient_options (dict):
            Options for the gradient function.

        logging (str or pathlib.Path): Path to an sqlite3 file which typically has the
            file extension ``.db``. If the file does not exist, it will be created. See
            :ref:`logging` for details.

        log_options (dict): Keyword arguments to influence the logging. See
            :ref:`logging` for details.

        dashboard (bool):
            whether to create and show a dashboard. See :ref:`dashboard` for details.

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function. See
                :ref:`dashboard` for details.

    Returns:
        results (tuple or list of tuples):
            The return is either a tuple containing a dictionary of the results and the
            parameters or a list of tuples containing multiples of the former.

    .. _tidy data format:
        http://dx.doi.org/10.18637/jss.v059.i10

    .. _1:
        https://stats.stackexchange.com/a/176563/218971

    .. _2:
        https://statmodeling.stat.columbia.edu/2016/06/11/log-sum-of-exponentials/

    """
    if isinstance(log_like_obs, list):
        wrapped_loglikeobs = [
            expand_criterion_output(crit_func) for crit_func in log_like_obs
        ]
        wrapped_loglikeobs = [
            aggregate_criterion_output(np.mean)(crit_func)
            for crit_func in wrapped_loglikeobs
        ]
    else:
        wrapped_loglikeobs = expand_criterion_output(log_like_obs)
        wrapped_loglikeobs = aggregate_criterion_output(
            np.mean)(wrapped_loglikeobs)

    results = maximize(
        wrapped_loglikeobs,
        params,
        algorithm,
        criterion_kwargs,
        constraints,
        general_options,
        algo_options,
        gradient_options,
        logging,
        log_options,
        dashboard,
        db_options,
    )

    # To convert the mean log likelihood in the results dictionary to the log
    # likelihood, get the length of contributions for each optimization.
    arguments = process_optimization_arguments(
        criterion=log_like_obs,
        params=params,
        algorithm=algorithm,
        criterion_kwargs=criterion_kwargs,
        constraints=constraints,
        general_options=general_options,
        algo_options=algo_options,
        gradient=None,
        gradient_options=gradient_options,
        logging=logging,
        log_options=log_options,
        dashboard=dashboard,
        db_options=db_options,
    )

    n_contributions = [
        len(
            list(args_one_run["criterion"](
                args_one_run["params"], **args_one_run["criterion_kwargs"])))
        for args_one_run in arguments
    ]

    if isinstance(results, list):
        for result, n_contribs in zip(results, n_contributions):
            result[0]["fitness"] = result[0]["fitness"] * n_contribs
    else:
        results[0]["fitness"] = results[0]["fitness"] * n_contributions[0]

    return results
示例#8
0
def minimize(
    criterion,
    params,
    algorithm,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    dashboard=False,
    db_options=None,
):
    """Minimize *criterion* using *algorithm* subject to *constraints* and bounds.
    Run several optimizations if called by lists of inputs.

    Args:
        criterion (function or list of functions):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame or list of pd.DataFrames):
            See :ref:`params`.

        algorithm (str or list of strings):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict or list of dicts):
            additional keyword arguments for criterion

        constraints (list or list of lists):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict or list of dicts):
            algorithm specific configurations for the optimization

        dashboard (bool):
            whether to create and show a dashboard

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function.

    """

    arguments = process_optimization_arguments(
        criterion=criterion,
        params=params,
        algorithm=algorithm,
        criterion_kwargs=criterion_kwargs,
        constraints=constraints,
        general_options=general_options,
        algo_options=algo_options,
        dashboard=dashboard,
        db_options=db_options,
    )

    if len(arguments) == 1:
        # Run only one optimization
        arguments = arguments[0]
        result = _single_minimize(**arguments)
    else:
        # Run multiple optimizations
        if dashboard:
            raise NotImplementedError(
                "Dashboard cannot be used for multiple optimizations, yet.")

        # set up multiprocessing
        if "n_cores" not in arguments[0]["general_options"]:
            raise ValueError(
                "n_cores need to be specified in general_options" +
                " if multiple optimizations should be run.")
        n_cores = arguments[0]["general_options"]["n_cores"]
        pool = Pool(processes=n_cores)
        result = pool.map(_one_argument_single_minimize, arguments)

    return result