Exemplo n.º 1
0
def fit_nls(
    df_data,
    md=None,
    out=None,
    var_fix=None,
    df_init=None,
    verbose=True,
    uq_method=None,
    **kwargs,
):
    r"""Fit a model with Nonlinear Least Squares (NLS)

    Estimate best-fit variable levels with nonlinear least squares (NLS), and
    return an executable model with those frozen best-fit levels. Optionally,
    fit a distribution on the parameters to quantify parametric uncertainty.

    Note: This is a *synonym* for eval_nls(); see the documentation for
    eval_nls() for keyword argument options available beyond those listed here.

    Args:
        df_data (DataFrame): Data for estimating best-fit variable levels.
            Variables not found in df_data optimized for fitting.
        md (gr.Model): Model to analyze. All model variables
            selected for fitting must be bounded or random. Deterministic
            variables may have semi-infinite bounds.
        var_fix (list or None): Variables to fix to nominal levels. Note that
            variables with domain width zero will automatically be fixed.
        df_init (DataFrame): Initial guesses for parameters; overrides n_restart
        n_restart (int): Number of restarts to try; the first try is at
            the nominal conditions of the model. Returned model will use
            the least-error parameter set among restarts tested.
        n_maxiter (int): Optimizer maximum iterations
        verbose (bool): Print best-fit parameters to console?
        uq_method (str OR None): If string, select method to quantify parameter
            uncertainties. If None, provide best-fit values only. Methods:
            uq_method = "linpool": assume normal errors; linearly approximate
                parameter effects; equally pool variance matrices for each output

    Returns:
        gr.Model: Model for evaluation with best-fit variables frozen to
            optimized levels.

    Examples:
        >>> import grama as gr
        >>> from grama.data import df_trajectory_windowed
        >>> from grama.models import make_trajectory_linear
        >>> X = gr.Intention()
        >>>
        >>> md_trajectory = make_trajectory_linear()
        >>> md_fitted = (
        >>>     df_trajectory_windowed
        >>>     >> gr.ft_nls(
        >>>         md=md_trajectory,
        >>>         uq_method="linpool",
        >>>     )
        >>> )
    """
    ## Check `out` invariants
    if out is None:
        out = md.out
        print("... fit_nls setting out = {}".format(out))

    ## Check invariants
    if md is None:
        raise ValueError("Must provide model md")

    ## Determine variables to be fixed
    if var_fix is None:
        var_fix = set()
    else:
        var_fix = set(var_fix)
    for var in md.var_det:
        wid = md.domain.get_width(var)
        if wid == 0:
            var_fix.add(var)

    ## Run eval_nls to fit model parameter values
    df_fit = eval_nls(
        md,
        df_data=df_data,
        var_fix=var_fix,
        df_init=df_init,
        append=True,
        verbose=verbose,
        **kwargs,
    )
    ## Select best-fit values
    df_best = df_fit.sort_values(by="mse",
                                 axis=0).iloc[[0]].reset_index(drop=True)
    if verbose:
        print(df_fit.sort_values(by="mse", axis=0))

    ## Determine variables that were fitted
    var_fitted = list(set(md.var).intersection(set(df_best.columns)))
    var_remain = list(set(md.var).difference(set(var_fitted)))

    if len(var_remain) == 0:
        raise ValueError("Resulting model is constant!")

    ## Assemble and return fitted model
    if md.name is None:
        name = "(Fitted Model)"
    else:
        name = md.name + " (Fitted)"

    ## Calibrate parametric uncertainty, if requested
    if uq_method == "linpool":
        ## Precompute data
        df_nom = eval_nominal(md, df_det="nom")
        df_base = tran_outer(
            df_data, concat((df_best[var_fitted], df_nom[var_fix]), axis=1))
        df_pred = eval_df(md, df=df_base)
        df_grad = eval_grad_fd(md, df_base=df_base, var=var_fitted)

        ## Pool variance matrices
        n_obs = df_data.shape[0]
        n_fitted = len(var_fitted)
        Sigma_pooled = zeros((n_fitted, n_fitted))

        for output in out:
            ## Approximate sigma_sq
            sigma_sq = npsum(
                nppow(df_data[output].values - df_pred[output].values,
                      2)) / (n_obs - n_fitted)
            ## Approximate (pseudo)-inverse hessian
            var_grad = list(map(lambda v: "D" + output + "_D" + v, var_fitted))
            Z = df_grad[var_grad].values
            Hinv = pinv(Z.T.dot(Z), hermitian=True)

            ## Add variance matrix to pooled Sigma
            Sigma_pooled = Sigma_pooled + sigma_sq * Hinv / n_fitted

        ## Check model for identifiability
        kappa_out = cond(Sigma_pooled)
        if kappa_out > 1e10:
            warn(
                "Model is locally unidentifiable as measured by the " +
                "condition number of the pooled covariance matrix; " +
                "kappa = {}".format(kappa_out),
                RuntimeWarning,
            )

        ## Convert to std deviations and correlation
        sigma_comp = npsqrt(diag(Sigma_pooled))
        corr_mat = Sigma_pooled / (atleast_2d(sigma_comp).T.dot(
            atleast_2d(sigma_comp)))
        corr_data = []
        I, J = triu_indices(n_fitted, k=1)
        for ind in range(len(I)):
            i = I[ind]
            j = J[ind]
            corr_data.append([var_fitted[i], var_fitted[j], corr_mat[i, j]])
        df_corr = DataFrame(data=corr_data, columns=["var1", "var2", "corr"])

        ## Assemble marginals
        marginals = {}
        for ind, var_ in enumerate(var_fitted):
            marginals[var_] = {
                "dist": "norm",
                "loc": df_best[var_].values[0],
                "scale": sigma_comp[ind],
            }

        ## Construct model with Gaussian copula
        if len(var_fix) > 0:
            md_res = (Model(name) >> cp_function(
                lambda x: df_nom[var_fix].values,
                var=set(var_remain).difference(var_fix),
                out=var_fix,
                name="Fix variable levels",
            ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >>
                      cp_copula_gaussian(df_corr=df_corr))
        else:
            md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals(
                **marginals) >> cp_copula_gaussian(df_corr=df_corr))

    ## Return deterministic model
    elif uq_method is None:
        md_res = (Model(name) >> cp_function(
            lambda x: df_best[var_fitted].values,
            var=var_remain,
            out=var_fitted,
            name="Fix variable levels",
        ) >> cp_md_det(md=md))

    else:
        raise ValueError(
            "uq_method option {} not recognized".format(uq_method))

    return md_res
Exemplo n.º 2
0
def eval_nls(
    model,
    df_data=None,
    out=None,
    var_fix=None,
    df_init=None,
    append=False,
    tol=1e-6,
    ftol=1e-9,
    gtol=1e-5,
    n_maxiter=100,
    n_restart=1,
    n_process=1,
    method="L-BFGS-B",
    seed=None,
    verbose=True,
):
    r"""Estimate with Nonlinear Least Squares (NLS)

    Estimate best-fit variable levels with nonlinear least squares (NLS).

    Args:
        model (gr.Model): Model to analyze. All model variables
            selected for fitting must be bounded or random. Deterministic
            variables may have semi-infinite bounds.
        df_data (DataFrame): Data for estimating parameters. Variables not
            found in df_data optimized in fitting.
        out (list or None): Output contributions to consider in computing MSE.
            Assumed to be model.out if left as None.
        var_fix (list or None): Variables to fix to nominal levels. Note that
            variables with domain width zero will automatically be fixed.
        df_init (DataFrame): Initial guesses for parameters; overrides n_restart
        append (bool): Append metadata? (Initial guess, MSE, optimizer status)
        tol (float): Optimizer convergence tolerance
        n_maxiter (int): Optimizer maximum iterations
        n_restart (int): Number of restarts; beyond n_restart=1 random
            restarts are used.
        seed (int OR None): Random seed for restarts
        verbose (bool): Print messages to console?

    Returns:
        DataFrame: Results of estimation

    Examples:
        >>> import grama as gr
        >>> from grama.data import df_trajectory_full
        >>> from grama.models import make_trajectory_linear
        >>>
        >>> md_trajectory = make_trajectory_linear()
        >>>
        >>> df_fit = (
        >>>     md_trajectory
        >>>     >> gr.ev_nls(df_data=df_trajectory_full)
        >>> )
        >>>
        >>> print(df_fit)

    """
    ## Check `out` invariants
    if out is None:
        out = model.out
        if verbose:
            print("... eval_nls setting out = {}".format(out))
    set_diff = set(out).difference(set(df_data.columns))
    if len(set_diff) > 0:
        raise ValueError("out must be subset of df_data.columns\n" +
                         "difference = {}".format(set_diff))

    ## Determine variables to be fixed
    if var_fix is None:
        var_fix = set()
    else:
        var_fix = set(var_fix)
    for var in model.var_det:
        wid = model.domain.get_width(var)
        if wid == 0:
            var_fix.add(var)
    if verbose:
        print("... eval_nls setting var_fix = {}".format(list(var_fix)))
    var_fix = list(var_fix)

    ## Determine variables for evaluation
    var_feat = set(model.var).intersection(set(df_data.columns))
    if verbose:
        print("... eval_nls setting var_feat = {}".format(var_feat))
    var_feat = list(var_feat)

    ## Determine variables for fitting
    var_fit = set(model.var).difference(set(var_fix).union(set(var_feat)))
    if len(var_fit) == 0:
        raise ValueError("No var selected for fitting!\n" +
                         "Try checking model bounds and df_data.columns.")
    var_fit = list(var_fit)

    ## Separate var_fit into det and rand
    var_fit_det = list(set(model.var_det).intersection(var_fit))
    var_fit_rand = list(set(model.var_rand).intersection(var_fit))

    ## Construct bounds, fix var_fit order
    var_fit = var_fit_det + var_fit_rand
    bounds = []
    var_prob = []
    for var in var_fit_det:
        if not isfinite(model.domain.get_nominal(var)):
            var_prob.append(var)
        bounds.append(model.domain.get_bound(var))
    if len(var_prob) > 0:
        raise ValueError(
            "all variables to be fitted must finite nominal value\n" +
            "offending var = {}".format(var_prob))

    for var in var_fit_rand:
        bounds.append((
            model.density.marginals[var].q(0),
            model.density.marginals[var].q(1),
        ))

    ## Determine initial guess points
    df_nom = eval_nominal(model, df_det="nom", skip=True)

    ## Use specified initial guess(es)
    if not (df_init is None):
        # Check invariants
        set_diff = list(set(var_fit).difference(set(df_init.columns)))
        if len(set_diff) > 0:
            raise ValueError("var_fit must be subset of df_init.columns\n" +
                             "difference = {}".format(set_diff))
        # Pull n_restart
        n_restart = df_init.shape[0]

    ## Generate initial guess(es)
    else:

        df_init = df_nom[var_fit]
        if n_restart > 1:
            if not (seed is None):
                setseed(seed)
            ## Collect sweep-able deterministic variables
            var_sweep = list(
                filter(
                    lambda v: isfinite(model.domain.get_width(v))
                    & (model.domain.get_width(v) > 0),
                    model.var_det,
                ))
            ## Generate pseudo-marginals
            dicts_var = {}
            for v in var_sweep:
                dicts_var[v] = {
                    "dist": "uniform",
                    "loc": model.domain.get_bound(v)[0],
                    "scale": model.domain.get_width(v),
                }
            ## Overwrite model
            md_sweep = comp_marginals(model, **dicts_var)
            md_sweep = comp_copula_independence(md_sweep)
            ## Generate random start points
            df_rand = eval_sample(
                md_sweep,
                n=n_restart - 1,
                df_det="nom",
                skip=True,
            )
            df_init = concat((df_init, df_rand[var_fit]),
                             axis=0).reset_index(drop=True)

    ## Iterate over initial guesses
    df_res = DataFrame()

    def fun_mp(i):
        x0 = df_init[var_fit].iloc[i].values

        ## Build evaluator
        def objective(x):
            """x = [var_fit]"""
            ## Evaluate model
            df_var = tran_outer(
                df_data[var_feat],
                concat(
                    (df_nom[var_fix].iloc[[0]],
                     df_make(**dict(zip(var_fit, x)))),
                    axis=1,
                ),
            )
            df_tmp = eval_df(model, df=df_var)

            ## Compute joint MSE
            return ((df_tmp[out].values - df_data[out].values)**2).mean()

        ## Run optimization
        res = minimize(
            objective,
            x0,
            args=(),
            method=method,
            jac=False,
            tol=tol,
            options={
                "maxiter": n_maxiter,
                "disp": False,
                "ftol": ftol,
                "gtol": gtol,
            },
            bounds=bounds,
        )

        df_tmp = df_make(
            **dict(zip(var_fit, res.x)),
            **dict(zip(map(lambda s: s + "_0", var_fit), x0)),
        )
        df_tmp["success"] = [res.success]
        df_tmp["message"] = [res.message]
        df_tmp["n_iter"] = [res.nit]
        df_tmp["mse"] = [res.fun]
        return df_tmp

    df_res = DataFrame()
    for i in range(n_restart):
        df_tmp = fun_mp(i)
        df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True)

    ## Post-process
    if append:
        return df_res
    return df_res[var_fit]
Exemplo n.º 3
0
def eval_min(
    model,
    out_min=None,
    out_geq=None,
    out_leq=None,
    out_eq=None,
    method="SLSQP",
    tol=1e-6,
    n_restart=1,
    n_maxiter=50,
    seed=None,
    df_start=None,
):
    r"""Constrained minimization using functions from a model

    Perform constrained minimization using functions from a model. Model must
    have deterministic variables only.

    Wrapper for scipy.optimize.minimize

    Args:
        model (gr.Model): Model to analyze. All model variables must be
            deterministic.
        out_min (str): Output to use as minimization objective.
        out_geq (None OR list of str): Outputs to use as geq constraints; out >= 0
        out_leq (None OR list of str): Outputs to use as leq constraints; out <= 0
        out_eq (None OR list of str): Outputs to use as equality constraints; out == 0

        method (str): Optimization method; see the documentation for
            scipy.optimize.minimize for options.
        tol (float): Optimization objective convergence tolerance
        n_restart (int): Number of restarts; beyond n_restart=1 random
            restarts are used.
        df_start (None or DataFrame): Specific starting values to use; overrides
            n_restart if non None provided.

    Returns:
        DataFrame: Results of optimization

    Examples:
        >>> import grama as gr
        >>> md = (
        >>>     gr.Model("Constrained Rosenbrock")
        >>>     >> gr.cp_function(
        >>>         fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2,
        >>>         var=["x", "y"],
        >>>         out=["c"],
        >>>     )
        >>>     >> gr.cp_function(
        >>>         fun=lambda x: (x[0] - 1)**3 - x[1] + 1,
        >>>         var=["x", "y"],
        >>>         out=["g1"],
        >>>     )
        >>>     >> gr.cp_function(
        >>>         fun=lambda x: x[0] + x[1] - 2,
        >>>         var=["x", "y"],
        >>>         out=["g2"],
        >>>     )
        >>>     >> gr.cp_bounds(
        >>>         x=(-1.5, +1.5),
        >>>         y=(-0.5, +2.5),
        >>>     )
        >>> )
        >>> md >> gr.ev_min(
        >>>     out_min="c",
        >>>     out_leq=["g1", "g2"]
        >>> )

    """
    ## Check that model has only deterministic variables
    if model.n_var_rand > 0:
        raise ValueError("model must have no random variables")
    ## Check that objective is in model
    if not (out_min in model.out):
        raise ValueError("model must contain out_min")
    ## Check that constraints are in model
    if not (out_geq is None):
        out_diff = set(out_geq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_geq; missing {}".format(out_diff))
    if not (out_leq is None):
        out_diff = set(out_leq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_leq; missing {}".format(out_diff))
    if not (out_eq is None):
        out_diff = set(out_eq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_eq; missing {}".format(out_diff))

    ## Formulate initial guess
    df_nom = eval_nominal(model, df_det="nom", skip=True)
    if df_start is None:
        df_start = df_nom[model.var]

        if n_restart > 1:
            if not (seed is None):
                setseed(seed)
            ## Collect sweep-able deterministic variables
            var_sweep = list(
                filter(
                    lambda v: isfinite(model.domain.get_width(v))
                    & (model.domain.get_width(v) > 0),
                    model.var_det,
                ))
            ## Generate pseudo-marginals
            dicts_var = {}
            for v in var_sweep:
                dicts_var[v] = {
                    "dist": "uniform",
                    "loc": model.domain.get_bound(v)[0],
                    "scale": model.domain.get_width(v),
                }
            ## Overwrite model
            md_sweep = comp_marginals(model, **dicts_var)
            md_sweep = comp_copula_independence(md_sweep)
            ## Generate random start points
            df_rand = eval_sample(
                md_sweep,
                n=n_restart - 1,
                df_det="nom",
                skip=True,
            )
            df_start = concat((df_start, df_rand[model.var]),
                              axis=0).reset_index(drop=True)
    else:
        n_restart = df_start.shape[0]

    ## Factory for wrapping model's output
    def make_fun(out, sign=+1):
        def fun(x):
            df = DataFrame([x], columns=model.var)
            df_res = eval_df(model, df)
            return sign * df_res[out]

        return fun

    ## Create helper functions for constraints
    constraints = []

    if not (out_geq is None):
        for out in out_geq:
            constraints.append({
                "type": "ineq",
                "fun": make_fun(out),
            })

    if not (out_leq is None):
        for out in out_leq:
            constraints.append({
                "type": "ineq",
                "fun": make_fun(out, sign=-1),
            })

    if not (out_eq is None):
        for out in out_eq:
            constraints.append({
                "type": "eq",
                "fun": make_fun(out),
            })

    ## Parse the bounds for minimize
    bounds = list(map(lambda k: model.domain.bounds[k], model.var))

    ## Run optimization
    df_res = DataFrame()
    for i in range(n_restart):
        x0 = df_start[model.var].iloc[i].values
        res = minimize(
            make_fun(out_min),
            x0,
            args=(),
            method=method,
            jac=False,
            tol=tol,
            options={
                "maxiter": n_maxiter,
                "disp": False
            },
            constraints=constraints,
            bounds=bounds,
        )

        df_opt = df_make(
            **dict(zip(model.var, res.x)),
            **dict(zip(map(lambda s: s + "_0", model.var), x0)),
        )
        df_tmp = eval_df(model, df=df_opt)
        df_tmp["success"] = [res.success]
        df_tmp["message"] = [res.message]
        df_tmp["n_iter"] = [res.nit]

        df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True)

    return df_res
Exemplo n.º 4
0
def eval_nls(
    model,
    df_data=None,
    out=None,
    var_fix=None,
    append=False,
    tol=1e-3,
    maxiter=25,
    nrestart=1,
):
    r"""Estimate with Nonlinear Least Squares (NLS)

    Estimate best-fit variable levels with nonlinear least squares (NLS).

    Args:
        model (gr.Model): Model to analyze. All model variables
            selected for fitting must be bounded or random. Deterministic
            variables may have semi-infinite bounds.
        df_data (DataFrame): Data for estimating parameters. Variables not
            found in df_data optimized in fitting.
        out (list or None): Output contributions to consider in computing MSE.
            Assumed to be model.out if left as None.
        var_fix (list or None): Variables to fix to nominal levels. Note that
            variables with domain width zero will automatically be fixed.
        append (bool): Append metadata? (Initial guess, MSE, optimizer status)
        tol (float): Optimizer convergence tolerance
        maxiter (int): Optimizer maximum iterations
        nrestart (int): Number of restarts; beyond nrestart=1 random
            restarts are used.

    Returns:
        DataFrame: Results of estimation

    Examples:
        >>> import grama as gr
        >>> from grama.data import df_trajectory_full
        >>> from grama.models import make_trajectory_linear
        >>>
        >>> md_trajectory = make_trajectory_linear()
        >>>
        >>> df_fit = (
        >>>     md_trajectory
        >>>     >> gr.ev_nls(df_data=df_trajectory_full)
        >>> )
        >>>
        >>> print(df_fit)

    """
    ## Check `out` invariants
    if out is None:
        out = model.out
        print("... eval_nls setting out = {}".format(out))
    set_diff = set(out).difference(set(df_data.columns))
    if len(set_diff) > 0:
        raise ValueError("out must be subset of df_data.columns\n" +
                         "difference = {}".format(set_diff))

    ## Determine variables to be fixed
    if var_fix is None:
        var_fix = set()
    else:
        var_fix = set(var_fix)
    for var in model.var_det:
        wid = model.domain.get_width(var)
        if wid == 0:
            var_fix.add(var)
    print("... eval_nls setting var_fix = {}".format(list(var_fix)))

    ## Determine variables for evaluation
    var_feat = set(model.var).intersection(set(df_data.columns))
    print("... eval_nls setting var_feat = {}".format(list(var_feat)))

    ## Determine variables for fitting
    var_fit = set(model.var).difference(var_fix.union(var_feat))
    if len(var_fit) == 0:
        raise ValueError("No var selected for fitting!\n" +
                         "Try checking model bounds and df_data.columns.")

    ## Separate var_fit into det and rand
    var_fit_det = list(set(model.var_det).intersection(var_fit))
    var_fit_rand = list(set(model.var_rand).intersection(var_fit))

    ## Construct bounds, fix var_fit order
    var_fit = var_fit_det + var_fit_rand
    bounds = []
    var_prob = []
    for var in var_fit_det:
        if not isfinite(model.domain.get_nominal(var)):
            var_prob.append(var)
        bounds.append(model.domain.get_bound(var))
    if len(var_prob) > 0:
        raise ValueError(
            "all variables to be fitted must finite nominal value\n" +
            "offending var = {}".format(var_prob))

    for var in var_fit_rand:
        bounds.append((
            model.density.marginals[var].q(0),
            model.density.marginals[var].q(1),
        ))

    ## Determine initial guess points
    df_nom = eval_nominal(model, df_det="nom", skip=True)

    df_init = df_nom[var_fit]
    if nrestart > 1:
        raise NotImplementedError()

    ## Iterate over initial guesses
    df_res = DataFrame()
    for i in range(df_init.shape[0]):
        x0 = df_init[var_fit].iloc[i].values

        ## Build evaluator
        def objective(x):
            """x = [var_fit]"""
            ## Evaluate model
            df_var = tran_outer(
                df_data[var_feat],
                concat(
                    (df_nom[var_fix].iloc[[0]],
                     df_make(**dict(zip(var_fit, x)))),
                    axis=1,
                ),
            )
            df_res = eval_df(model, df=df_var)

            ## Compute joint MSE
            return ((df_res[out].values - df_data[out].values)**2).mean()

        ## Run optimization
        res = minimize(
            objective,
            x0,
            args=(),
            method="SLSQP",
            jac=False,
            tol=tol,
            options={
                "maxiter": maxiter,
                "disp": False
            },
            bounds=bounds,
        )

        df_res = concat(
            (
                df_res,
                df_make(
                    **dict(zip(var_fit, res.x)),
                    **dict(zip(map(lambda s: s + "_0", var_fit), x0)),
                    status=res.status,
                    mse=res.fun,
                ),
            ),
            axis=0,
        )

    ## Post-process
    if append:
        return df_res
    else:
        return df_res[var_fit]