示例#1
0
        def objective(x):
            """x = [var_fit]"""
            ## Evaluate model
            df_var = tran_outer(
                df_data[var_feat],
                concat(
                    (df_nom[var_fix].iloc[[0]], df_make(**dict(zip(var_fit, x)))),
                    axis=1,
                ),
            )
            df_tmp = eval_df(model, df=df_var)

            ## Compute joint MSE
            return ((df_tmp[out].values - df_data[out].values) ** 2).mean()
示例#2
0
    def var_outer(self, df_rand, df_det=None):
        """Outer product of random and deterministic samples

        Args:
            df_rand (DataFrame) random variable samples
            df_det (DataFrame) deterministic variable samples
                set to "nom" for nominal evaluation

        Returns:
            DataFrame: Outer product of samples

        """
        ## Pass-through if no var_det
        if self.n_var_det == 0:
            return df_rand

        ## Error-throwing default value
        if df_det is None:
            raise ValueError("df_det must be DataFrame or 'nom'")
        ## String shortcut
        elif isinstance(df_det, str):
            if df_det == "nom":
                df_det = self.det_nom()
            else:
                raise ValueError("df_det shortcut string invalid")
        ## DataFrame
        else:
            ## Check invariant; model inputs must be subset of df columns
            if not set(self.var_det).issubset(set(df_det.columns)):
                raise ValueError("model.var_det not a subset of given columns")

        ## Pass-through if no var_rand
        if self.n_var_rand == 0:
            return df_det

        ## Outer product if both det and rand exist
        return gr.tran_outer(df_rand, df_det)
示例#3
0
def fit_nls(
    df_data,
    md=None,
    out=None,
    var_fix=None,
    df_init=None,
    verbose=True,
    uq_method=None,
    **kwargs,
):
    r"""Fit a model with Nonlinear Least Squares (NLS)

    Estimate best-fit variable levels with nonlinear least squares (NLS), and
    return an executable model with those frozen best-fit levels. Optionally,
    fit a distribution on the parameters to quantify parametric uncertainty.

    Note: This is a *synonym* for eval_nls(); see the documentation for
    eval_nls() for keyword argument options available beyond those listed here.

    Args:
        df_data (DataFrame): Data for estimating best-fit variable levels.
            Variables not found in df_data optimized for fitting.
        md (gr.Model): Model to analyze. All model variables
            selected for fitting must be bounded or random. Deterministic
            variables may have semi-infinite bounds.
        var_fix (list or None): Variables to fix to nominal levels. Note that
            variables with domain width zero will automatically be fixed.
        df_init (DataFrame): Initial guesses for parameters; overrides n_restart
        n_restart (int): Number of restarts to try; the first try is at
            the nominal conditions of the model. Returned model will use
            the least-error parameter set among restarts tested.
        n_maxiter (int): Optimizer maximum iterations
        verbose (bool): Print best-fit parameters to console?
        uq_method (str OR None): If string, select method to quantify parameter
            uncertainties. If None, provide best-fit values only. Methods:
            uq_method = "linpool": assume normal errors; linearly approximate
                parameter effects; equally pool variance matrices for each output

    Returns:
        gr.Model: Model for evaluation with best-fit variables frozen to
            optimized levels.

    Examples:
        >>> import grama as gr
        >>> from grama.data import df_trajectory_windowed
        >>> from grama.models import make_trajectory_linear
        >>> X = gr.Intention()
        >>>
        >>> md_trajectory = make_trajectory_linear()
        >>> md_fitted = (
        >>>     df_trajectory_windowed
        >>>     >> gr.ft_nls(
        >>>         md=md_trajectory,
        >>>         uq_method="linpool",
        >>>     )
        >>> )
    """
    ## Check `out` invariants
    if out is None:
        out = md.out
        print("... fit_nls setting out = {}".format(out))

    ## Check invariants
    if md is None:
        raise ValueError("Must provide model md")

    ## Determine variables to be fixed
    if var_fix is None:
        var_fix = set()
    else:
        var_fix = set(var_fix)
    for var in md.var_det:
        wid = md.domain.get_width(var)
        if wid == 0:
            var_fix.add(var)

    ## Run eval_nls to fit model parameter values
    df_fit = eval_nls(
        md,
        df_data=df_data,
        var_fix=var_fix,
        df_init=df_init,
        append=True,
        verbose=verbose,
        **kwargs,
    )
    ## Select best-fit values
    df_best = df_fit.sort_values(by="mse",
                                 axis=0).iloc[[0]].reset_index(drop=True)
    if verbose:
        print(df_fit.sort_values(by="mse", axis=0))

    ## Determine variables that were fitted
    var_fitted = list(set(md.var).intersection(set(df_best.columns)))
    var_remain = list(set(md.var).difference(set(var_fitted)))

    if len(var_remain) == 0:
        raise ValueError("Resulting model is constant!")

    ## Assemble and return fitted model
    if md.name is None:
        name = "(Fitted Model)"
    else:
        name = md.name + " (Fitted)"

    ## Calibrate parametric uncertainty, if requested
    if uq_method == "linpool":
        ## Precompute data
        df_nom = eval_nominal(md, df_det="nom")
        df_base = tran_outer(
            df_data, concat((df_best[var_fitted], df_nom[var_fix]), axis=1))
        df_pred = eval_df(md, df=df_base)
        df_grad = eval_grad_fd(md, df_base=df_base, var=var_fitted)

        ## Pool variance matrices
        n_obs = df_data.shape[0]
        n_fitted = len(var_fitted)
        Sigma_pooled = zeros((n_fitted, n_fitted))

        for output in out:
            ## Approximate sigma_sq
            sigma_sq = npsum(
                nppow(df_data[output].values - df_pred[output].values,
                      2)) / (n_obs - n_fitted)
            ## Approximate (pseudo)-inverse hessian
            var_grad = list(map(lambda v: "D" + output + "_D" + v, var_fitted))
            Z = df_grad[var_grad].values
            Hinv = pinv(Z.T.dot(Z), hermitian=True)

            ## Add variance matrix to pooled Sigma
            Sigma_pooled = Sigma_pooled + sigma_sq * Hinv / n_fitted

        ## Check model for identifiability
        kappa_out = cond(Sigma_pooled)
        if kappa_out > 1e10:
            warn(
                "Model is locally unidentifiable as measured by the " +
                "condition number of the pooled covariance matrix; " +
                "kappa = {}".format(kappa_out),
                RuntimeWarning,
            )

        ## Convert to std deviations and correlation
        sigma_comp = npsqrt(diag(Sigma_pooled))
        corr_mat = Sigma_pooled / (atleast_2d(sigma_comp).T.dot(
            atleast_2d(sigma_comp)))
        corr_data = []
        I, J = triu_indices(n_fitted, k=1)
        for ind in range(len(I)):
            i = I[ind]
            j = J[ind]
            corr_data.append([var_fitted[i], var_fitted[j], corr_mat[i, j]])
        df_corr = DataFrame(data=corr_data, columns=["var1", "var2", "corr"])

        ## Assemble marginals
        marginals = {}
        for ind, var_ in enumerate(var_fitted):
            marginals[var_] = {
                "dist": "norm",
                "loc": df_best[var_].values[0],
                "scale": sigma_comp[ind],
            }

        ## Construct model with Gaussian copula
        if len(var_fix) > 0:
            md_res = (Model(name) >> cp_function(
                lambda x: df_nom[var_fix].values,
                var=set(var_remain).difference(var_fix),
                out=var_fix,
                name="Fix variable levels",
            ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >>
                      cp_copula_gaussian(df_corr=df_corr))
        else:
            md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals(
                **marginals) >> cp_copula_gaussian(df_corr=df_corr))

    ## Return deterministic model
    elif uq_method is None:
        md_res = (Model(name) >> cp_function(
            lambda x: df_best[var_fitted].values,
            var=var_remain,
            out=var_fitted,
            name="Fix variable levels",
        ) >> cp_md_det(md=md))

    else:
        raise ValueError(
            "uq_method option {} not recognized".format(uq_method))

    return md_res
示例#4
0
def eval_grad_fd(model,
                 h=1e-8,
                 df_base=None,
                 var=None,
                 append=True,
                 skip=False):
    r"""Finite-difference gradient approximation

    Evaluates a given model with a central-difference stencil to approximate the
    gradient.

    Args:
        model (gr.Model): Model to differentiate
        h (numeric): finite difference stepsize,
            single (scalar): or per-input (array)
        df_base (DataFrame): Base-points for gradient calculations
        var (list(str) or string): list of variables to differentiate,
            or flag; "rand" for var_rand, "det" for var_det
        append (bool): Append results to base point inputs?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Gradient approximation or unevaluated design

    @pre (not isinstance(h, collections.Sequence)) |
         (h.shape[0] == df_base.shape[1])

    Examples:

        >>> import grama as gr
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> df_nom = md >> gr.ev_nominal(df_det="nom")
        >>> df_grad = md >> gr.ev_grad_fd(df_base=df_nom)
        >>> df_grad >> gr.tf_gather("var", "val", gr.everything())

    """
    ## Check invariants
    if not set(model.var).issubset(set(df_base.columns)):
        raise ValueError("model.var must be subset of df_base.columns")
    if var is None:
        var = model.var
    elif isinstance(var, str):
        if var == "rand":
            var = model.var_rand
        elif var == "det":
            var = model.var_det
        else:
            raise ValueError("var flag not recognized; use 'rand' or 'det'")
    else:
        if not set(var).issubset(set(model.var)):
            raise ValueError("var must be subset of model.var")
    var_fix = list(set(model.var).difference(set(var)))

    ## TODO
    if skip == True:
        raise NotImplementedError("skip not implemented")

    ## Build stencil
    n_var = len(var)
    stencil = eye(n_var) * h
    stepscale = tile(atleast_2d(0.5 / h).T, (1, model.n_out))

    outputs = model.out
    nested_labels = [
        list(map(lambda s_out: "D" + s_out + "_D" + s_var, outputs))
        for s_var in var
    ]
    grad_labels = list(itertools.chain.from_iterable(nested_labels))

    ## Loop over df_base
    results = []  # TODO: Preallocate?
    for row_i in range(df_base.shape[0]):
        ## Evaluate
        df_left = eval_df(
            model,
            tran_outer(
                DataFrame(columns=var,
                          data=-stencil + df_base[var].iloc[[row_i]].values),
                df_base[var_fix].iloc[[row_i]],
            ),
            append=False,
        )

        df_right = eval_df(
            model,
            tran_outer(
                DataFrame(columns=var,
                          data=+stencil + df_base[var].iloc[[row_i]].values),
                df_base[var_fix].iloc[[row_i]],
            ),
            append=False,
        )

        ## Compute differences
        res = (stepscale *
               (df_right[outputs] - df_left[outputs]).values).flatten()
        df_grad = DataFrame(columns=grad_labels, data=[res])

        results.append(df_grad)

    return concat(results).reset_index(drop=True)