Пример #1
0
def rprop(n, Sigma, X_means, seed=None):
    r"""Draw a sample from gaussian mixture distribution

    Draw a sample from a gaussian mixture distribution with a common covariance
    matrix and different means.

    Args:
        n (int): Number of observations in sample
        Sigma (2d numpy array): Common covariance matrix for mixture distribution
        X_means (2d numpy array): Means for mixture distribution

    Preconditions:
        Sigma.shape[0] == X_means.shape[1]
        Sigma.shape[1] == X_means.shape[1]

    Returns:
        2d numpy array: Sample of observations
    """
    if seed is not None:
        set_seed(seed)

    n_pareto, n_dim = X_means.shape
    X_sample = zeros((n, n_dim))

    for i in range(n):
        idx = choice(n_pareto)
        X_sample[i, :] = multivariate_normal(
            mean=X_means[idx, :],
            cov=Sigma,
            size=1,
        )

    return X_sample
Пример #2
0
    def sample(self, n=1, seed=None):
        """Draw samples from copula

        Draw samples according to gaussian copula dependence structure.

        Args:
            self (gr.CopulaGaussian):
            n (int): Number of samples to draw

        Returns:
            array: Copula samples

        """
        ## Set seed only if given
        if seed is not None:
            set_seed(seed)

        ## Generate correlated samples
        gaussian_samples = multivariate_normal(
            mean=[0] * len(self.var_rand), cov=self.Sigma, size=n
        )
        ## Convert to uniform marginals
        quantiles = valid_dist["norm"].cdf(gaussian_samples)

        return DataFrame(data=quantiles, columns=self.var_rand)
Пример #3
0
def param_sample_random(sample_space, count=100, seed=None):
    """
	:param sample_space: A dictionary with either
		- Scalar values, which will be used as-is.
		- List, tuple, set or array, which will be chosen from at random (uniform).
		- Scipy distributions, which will be sampled from. [NOT IMPLEMENTED YET]
	:param count: How many samples
	:return: An iterable of samples.
	"""
    # possible improvement: remove duplicates if any
    if seed is not None:
        set_seed(seed)
    sampled = []
    for nr in range(count):
        sample = type(sample_space)()
        for key, value in sample_space.items():
            if isinstance(value, (int, float, str)):
                sample[key] = value
            elif isinstance(value, (list, tuple, set, ndarray)):
                sample[key] = choice(value)
            elif hasattr(value, 'rvs'):
                sample[key] = value.rvs()
            else:
                raise ValueError(
                    'param_sample_random does not know what do with object of type {0:}'
                    .format(type(value)))
        sampled.append(sample)
    return sampled
Пример #4
0
def random_covariance(X, cov=0.1, K=2, seed=None, dt=None):
    """Add covariance between K randomly selected electrode pairs."""
    set_seed(seed)

    # Note sizes
    M, N = X.shape

    # Init (will contain covar electrodes)
    noi = np.copy(X)

    # Pick K random pairs of M rows (electrodes)
    index = range(M)
    np.random.shuffle(index)
    index = index[0:(k * 2)]

    # Add covar
    L = len(index)
    for i in range(0, L, 2):
        e1 = X[index[i], :]
        e2 = X[index[i + 1], :]
        e1 += (e2 * cov)

        noi[i, :] = e1

    return noi
Пример #5
0
def random_Gabarit(form=None, seed=None):
	"""
	Generate a random Gabarit
	Parameters:
	- form: (string) {None, ‘lowpass’, ‘highpass’, ‘bandpass’, ‘bandstop’}. Gives the type of filter. If None, the type is randomized
	- seed: if not None, indicates the seed to use for the random part (in order to be reproductible, the seed is stored in the name of the gabarit)
	"""
	# change the seed if asked (otherwise, set the seed)
	if not seed:
		set_seed(None)  # (from doc):  If seed is omitted or None, current system time is used
		seed = randint(0, 16777215)  # between 0 and 2^24-1
	set_seed(seed)


	# choose a form if asked
	if form is None:
		# form = choice(("lowpass", "highpass", "bandpass", "bandstop"))
		form = choice(("lowpass",))

	Fs = randint(500, 100000)

	# lowpass
	if form == 'lowpass':
		Fpass = uniform(0.01, 0.9)*Fs/2  # Wpass between 0.01 and 0.9
		Fstop = uniform(Fpass, Fs/2)     # Wstop between Wpass and 1
		gp = uniform(-5, 5)              # upperband for pass in [-5;5]
		gps = uniform(0.1, 5)            # pass width in [0.1;5]
		gs = uniform(-80, 2*(gp-gps))   # stop band in [-80 and 2*lowerband]
		bands = [(0, Fpass), (Fstop, None)]
		Gains = [(gp, gp-gps), gs]
	else:
		raise ValueError('The form is not valid')

	return Gabarit(Fs, bands, Gains, seed=seed)
Пример #6
0
def push_seed(seed=None): # pragma no cover
    """
    Set a temporary seed to the numpy random number generator, restoring it
    at the end of the context.  If seed is None, then get a seed from
    /dev/urandom (or the windows analogue).
    """
    from numpy.random import get_state, set_state, seed as set_seed
    state = get_state()
    set_seed(seed)
    yield
    set_state(state)
Пример #7
0
def push_seed(seed=None): # pragma no cover
    """
    Set a temporary seed to the numpy random number generator, restoring it
    at the end of the context.  If seed is None, then get a seed from
    /dev/urandom (or the windows analogue).
    """
    from numpy.random import get_state, set_state, seed as set_seed
    state = get_state()
    set_seed(seed)
    yield
    set_state(state)
Пример #8
0
    def sample(self, n=1, seed=None):
        """Draw samples from copula

        Args:
            n (int): Number of samples
            seed (int): Random seed

        Returns:
            DataFrame: Independent samples
        """
        ## Set seed only if given
        if seed is not None:
            set_seed(seed)

        return DataFrame(data=random((n, len(self.var_rand))), columns=self.var_rand)
Пример #9
0
def eval_lhs(model,
             n=1,
             df_det=None,
             seed=None,
             append=True,
             skip=False,
             criterion=None):
    r"""Latin Hypercube evaluation
    Evaluates a given model on a latin hypercube sample (LHS) using the model's
    density.
    Args:
        model (gr.Model): Model to evaluate
        n (numeric): Number of LHS samples to draw
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        seed (int): Random seed to use
        append (bool): Append results to conservative inputs?
        skip (bool): Skip evaluation of the functions?
        criterion (str): flag for LHS sample criterion
            allowable values: None, "center" ("c"), "maxmin" ("m"),
            "centermaxmin" ("cm"), "correlation" ("corr")
    Returns:
        DataFrame: Results of evaluation or unevaluated design
    Notes:
        - Wrapper on pyDOE.lhs
    """
    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n, Integral):
        print("eval_lhs() is rounding n...")
        n = int(n)

    ## Draw samples
    df_quant = DataFrame(data=lhs(model.n_var_rand, samples=n),
                         columns=model.var_rand)

    ## Convert samples to desired marginals
    df_rand = model.density.pr2sample(df_quant)
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        return df_samp
    else:
        return gr.eval_df(model, df=df_samp, append=append)
Пример #10
0
def paired_covariance(X, cov=0.1, pairs=None, seed=None, dt=None):
    """Add covariance between K randomly selected electrode pairs."""
    set_seed(seed)

    # Note sizes
    M, N = X.shape

    # Init (will contain covar electrodes)
    noi = np.copy(X)

    # Add covar
    for pair in pairs:
        e1 = X[pair[0], :]
        e2 = X[pair[1], :]

        e1 += (e2 * cov)
        e2 += (e1 * cov)

        noi[pair[0], :] = e1
        noi[pair[1], :] = e2

    return noi
Пример #11
0
def brown(X, scale=0.5, seed=None, dt=None):
    """Add brown noise"""

    set_seed(seed)

    X = np.atleast_2d(X)
    M, N = X.shape
    noi = np.zeros_like(X)

    for j in range(M):
        d = np.random.normal(0, scale)
        rates = [
            d,
        ]

        for _ in range(N - 1):
            d += np.random.normal(0, scale)
            rates.append(d)

        noi[j, :] = rates

    return X + noi
Пример #12
0
def eval_sinews(
    model,
    n_density=10,
    n_sweeps=3,
    seed=None,
    df_det=None,
    varname="sweep_var",
    indname="sweep_ind",
    append=True,
    skip=False,
):
    r"""Sweep study

    Perform coordinate sweeps over each model random variable ("sinew" design). Use random starting points drawn from the joint density. Optionally sweep the deterministic variables.

    For more expensive models, it can be helpful to tune n_density and n_sweeps to achieve a reasonable runtime.

    Use gr.plot_auto() to construct a quick visualization of the output dataframe. Use `skip` version to visualize the design, and non-skipped version to visualize the results.

    Args:
        model (gr.Model): Model to evaluate
        n_density (numeric): Number of points along each sweep
        n_sweeps (numeric): Number of sweeps per-random variable
        seed (int): Random seed to use
        df_det (DataFrame): Deterministic levels for evaluation;
            use "nom" for nominal deterministic levels,
            use "swp" to sweep deterministic variables
        varname (str): Column name to give for sweep variable; default="sweep_var"
        indname (str): Column name to give for sweep index; default="sweep_ind"
        append (bool): Append results to conservative inputs?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    Examples:

        >>> import grama as gr
        >>> md = gr.make_cantilever_beam()
        >>> # Skip evaluation, vis. design
        >>> df_design = md >> gr.ev_sinews(df_det="nom", skip=True)
        >>> df_design >> gr.pt_auto()
        >>> # Vis results
        >>> df_sinew = md >> gr.ev_sinews(df_det="nom")
        >>> df_sinew >> gr.pt_auto()

    """
    ## Override model if deterministic sweeps desired
    if df_det == "swp":
        ## Collect sweep-able deterministic variables
        var_sweep = list(
            filter(
                lambda v: isfinite(model.domain.get_width(v))
                & (model.domain.get_width(v) > 0),
                model.var_det,
            ))
        ## Generate pseudo-marginals
        dicts_var = {}
        for v in var_sweep:
            dicts_var[v] = {
                "dist": "uniform",
                "loc": model.domain.get_bound(v)[0],
                "scale": model.domain.get_width(v),
            }
        ## Overwrite model
        model = comp_marginals(model, **dicts_var)
        ## Restore flag
        df_det = "nom"

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n_density, Integral):
        print("eval_sinews() is rounding n_density...")
        n_density = int(n_density)
    if not isinstance(n_sweeps, Integral):
        print("eval_sinews() is rounding n_sweeps...")
        n_sweeps = int(n_sweeps)

    ## Build quantile sweep data
    q_random = tile(random((1, model.n_var_rand, n_sweeps)), (n_density, 1, 1))
    q_dense = linspace(0, 1, num=n_density)
    Q_all = zeros((n_density * n_sweeps * model.n_var_rand, model.n_var_rand))
    C_var = ["tmp"] * (n_density * n_sweeps * model.n_var_rand)
    C_ind = [0] * (n_density * n_sweeps * model.n_var_rand)

    ## Interlace
    for i_input in range(model.n_var_rand):
        ind_base = i_input * n_density * n_sweeps
        for i_sweep in range(n_sweeps):
            ind_start = ind_base + i_sweep * n_density
            ind_end = ind_base + (i_sweep + 1) * n_density

            Q_all[ind_start:ind_end] = q_random[:, :, i_sweep]
            Q_all[ind_start:ind_end, i_input] = q_dense
            C_var[ind_start:ind_end] = [model.var_rand[i_input]] * n_density
            C_ind[ind_start:ind_end] = [i_sweep] * n_density

            ## Modify endpoints for infinite support
            if not isfinite(
                    model.density.marginals[model.var_rand[i_input]].q(0)):
                Q_all[ind_start, i_input] = 1 / n_density / 10
            if not isfinite(
                    model.density.marginals[model.var_rand[i_input]].q(1)):
                Q_all[ind_end - 1, i_input] = 1 - 1 / n_density / 10

    ## Assemble sampling plan
    df_pr = DataFrame(data=Q_all, columns=model.var_rand)
    df_rand = model.density.pr2sample(df_pr)
    df_rand[varname] = C_var
    df_rand[indname] = C_ind
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        ## Evaluation estimate
        runtime_est = model.runtime(df_samp.shape[0])
        if runtime_est > 0:
            print(
                "Estimated runtime for design with model ({0:1}):\n  {1:4.3} sec"
                .format(model.name, runtime_est))
        else:
            print(
                "Design runtime estimates unavailable; model has no timing data."
            )

        ## For autoplot
        with catch_warnings():
            simplefilter("ignore")
            df_samp._plot_info = {
                "type": "sinew_inputs",
                "var": model.var_rand
            }

        ## Pass-through
        return df_samp

    ## Apply
    df_res = eval_df(model, df=df_samp, append=append)
    ## For autoplot
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "sinew_outputs",
            "var": model.var_rand,
            "out": model.out,
        }

    return df_res
Пример #13
0
def eval_sample(model,
                n=None,
                df_det=None,
                seed=None,
                append=True,
                skip=False):
    r"""Draw a random sample

    Evaluates a model with a random sample of the random model inputs. Generates outer product with deterministic samples.

    For more expensive models, it can be helpful to tune n to achieve a reasonable runtime. An even more effective approach is to use skip evaluation along with tran_sp() to evaluate a small, representative sample. (See examples below.)

    Args:
        model (gr.Model): Model to evaluate
        n (numeric): number of observations to draw
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        seed (int): random seed to use
        append (bool): Append results to input values?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    Examples:

        >>> import grama as gr
        >>> from grama.models import make_test
        >>> DF = gr.Intention()
        >>>
        >>> # Simple random sample evaluation
        >>> md = make_test()
        >>> df = md >> gr.ev_sample(n=1e2, df_det="nom")
        >>> df.describe()
        >>>
        >>> ## Use autoplot to visualize results
        >>> (
        >>>     md
        >>>     >> gr.ev_sample(n=1e2, df_det="nom")
        >>>     >> gr.pt_auto()
        >>> )
        >>>
        >>> ## Cantilever beam examples
        >>> from grama.models import make_cantilever_beam
        >>> md_beam = make_cantilever_beam()
        >>>
        >>> ## Use iocorr to generate input/output correlation tile plot
        >>> (
        >>>     md_beam
        >>>     >> gr.ev_sample(n=1e3, df_det="nom", skip=True)
        >>>     # Generate input/output correlation summary
        >>>     >> gr.tf_iocorr()
        >>>     # Visualize
        >>>     >> gr.pt_auto()
        >>> )
        >>>
        >>> ## Use support points to reduce model runtime
        >>> (
        >>>     md_beam
        >>>     # Generate large input sample but don't evaluate outputs
        >>>     >> gr.ev_sample(n=1e5, df_det="nom", skip=True)
        >>>     # Reduce to a smaller---but representative---sample
        >>>     >> gr.tf_sp(n=50)
        >>>     # Evaluate the outputs
        >>>     >> gr.tf_md(md_beam)
        >>> )
        >>>
        >>> ## Estimate probabilities
        >>> (
        >>>     md_beam
        >>>     # Generate large
        >>>     >> gr.ev_sample(n=1e5, df_det="nom")
        >>>     # Estimate probabilities of failure
        >>>     >> gr.tf_summarize(
        >>>         pof_stress=gr.mean(DF.g_stress <= 0),
        >>>         pof_disp=gr.mean(DF.g_disp <= 0),
        >>>     )
        >>> )


    """
    ## Check invariants
    if n is None:
        raise ValueError("Must provide a valid n value.")

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n, Integral):
        print("eval_sample() is rounding n...")
        n = int(n)

    ## Draw samples
    df_rand = model.density.sample(n=n, seed=seed)
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        ## Evaluation estimate
        runtime_est = model.runtime(df_samp.shape[0])
        if runtime_est > 0:
            print(
                "Estimated runtime for design with model ({0:1}):\n  {1:4.3} sec"
                .format(model.name, runtime_est))
        else:
            print(
                "Design runtime estimates unavailable; model has no timing data."
            )

        ## Attach metadata
        with catch_warnings():
            simplefilter("ignore")
            df_samp._plot_info = {
                "type": "sample_inputs",
                "var": model.var_rand,
            }

        return df_samp

    df_res = eval_df(model, df=df_samp, append=append)
    ## Attach metadata
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "sample_outputs",
            "var": model.var,
            "out": model.out,
        }

    return df_res
Пример #14
0
def eval_hybrid(
    model,
    n=1,
    plan="first",
    df_det=None,
    varname="hybrid_var",
    seed=None,
    append=True,
    skip=False,
):
    r"""Hybrid points for Sobol' indices

    Use the "hybrid point" design (Sobol', 1999) to support estimating Sobol'
    indices. Use gr.tran_sobol() to post-process the results and compute
    estimates.

    Args:
        model (gr.Model): Model to evaluate; must have CopulaIndependence
        n (numeric): Number of points along each sweep
        plan (str): Sobol' index to compute; plan={"first", "total"}
        seed (int): Random seed to use
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        varname (str): Column name to give for sweep variable; default="hybrid_var"
        append (bool): Append results to conservative inputs?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    References:
        I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models"
        (1999) MMCE, Vol 1.

    Examples:

        >>> import grama as gr
        >>> md = gr.make_cantilever_beam()
        >>> df_first = md >> gr.ev_hybrid(df_det="nom", plan="first")
        >>> df_first >> gr.tf_sobol()
        >>>
        >>> df_total = md >> gr.ev_hybrid(df_det="nom", plan="total")
        >>> df_total >> gr.tf_sobol()

    """
    ## Check invariants
    if not isinstance(model.density.copula, CopulaIndependence):
        raise ValueError(
            "model must have CopulaIndependence structure;\n" +
            "Sobol' indices only defined for independent variables")

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    if not isinstance(n, Integral):
        print("eval_hybrid() is rounding n...")
        n = int(n)

    ## Draw hybrid points
    X = random((n, model.n_var_rand))
    Z = random((n, model.n_var_rand))

    ## Reserve space
    Q_all = zeros((n * (model.n_var_rand + 1), model.n_var_rand))
    Q_all[:n] = X  # Base samples
    C_var = ["_"] * (n * (model.n_var_rand + 1))

    ## Interleave samples
    for i_in in range(model.n_var_rand):
        i_start = (i_in + 1) * n
        i_end = (i_in + 2) * n

        if plan == "first":
            Q_all[i_start:i_end, :] = Z
            Q_all[i_start:i_end, i_in] = X[:, i_in]
        elif plan == "total":
            Q_all[i_start:i_end, :] = X
            Q_all[i_start:i_end, i_in] = Z[:, i_in]
        else:
            raise ValueError("plan must be `first` or `total`")

        C_var[i_start:i_end] = [model.var_rand[i_in]] * n

    ## Construct sampling plan
    df_pr = DataFrame(data=Q_all, columns=model.var_rand)
    ## Convert samples to desired marginals
    df_rand = model.density.pr2sample(df_pr)
    df_rand[varname] = C_var
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        with catch_warnings():
            simplefilter("ignore")
            df_samp._meta = dict(
                type="eval_hybrid",
                varname=varname,
                plan=plan,
                var_rand=model.var_rand,
                out=model.out,
            )

        return df_samp

    df_res = eval_df(model, df=df_samp, append=append)
    with catch_warnings():
        simplefilter("ignore")
        df_res._meta = dict(
            type="eval_hybrid",
            varname=varname,
            plan=plan,
            var_rand=model.var_rand,
            out=model.out,
        )

    return df_res
Пример #15
0
def eval_monte_carlo(model,
                     n=1,
                     df_det=None,
                     seed=None,
                     append=True,
                     skip=False):
    r"""Monte Carlo evaluation

    Evaluates a given model at a given dataframe. Generates outer product
    with deterministic samples.

    Args:
        model (gr.Model): Model to evaluate
        n (numeric): number of Monte Carlo samples to draw
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        seed (int): random seed to use
        append (bool): Append results to random values?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    Examples:

        >>> import grama as gr
        >>> from grama.models import make_test
        >>> md = make_test()
        >>> df = md >> gr.ev_monte_carlo(n=1e2, df_det="nom")
        >>> df.describe()

    """
    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n, Integral):
        print("eval_monte_carlo() is rounding n...")
        n = int(n)

    ## Draw samples
    df_rand = model.density.sample(n=n, seed=seed)
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        ## Evaluation estimate
        runtime_est = model.runtime(df_samp.shape[0])
        if runtime_est > 0:
            print(
                "Estimated runtime for design with model ({0:1}):\n  {1:4.3} sec"
                .format(model.name, runtime_est))
        else:
            print(
                "Design runtime estimates unavailable; model has no timing data."
            )

        ## Attach metadata
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            df_samp._plot_info = {
                "type": "monte_carlo_inputs",
                "var": model.var_rand,
            }

        return df_samp
    else:
        df_res = gr.eval_df(model, df=df_samp, append=append)

        ## Attach metadata
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            df_res._plot_info = {
                "type": "monte_carlo_outputs",
                "out": model.out
            }

        return df_res
Пример #16
0
Test_Experiment = "CNN_GRU_Dual"
record_path = "record.csv"

datasets = FLAGS.datasets
# override
datasets = ["Musical_Instruments_5"]
seeds = FLAGS.seeds
# override
# seeds = [55, 66, 77, 88, 99]
for get_experiment_result in range(FLAGS.get_result_times):
    for dataset in datasets:
        for seed in seeds:
            from numpy.random import seed as set_seed

            set_seed(seed=seed)
            from tensorflow import set_random_seed

            set_random_seed(seed=seed)

            src_document_all = os.path.join(os.getcwd(), "data", dataset,
                                            "document.all")
            src_ratings_data = os.path.join(os.getcwd(), "data", dataset,
                                            "ratings.dat")
            np.random.seed(seed)
            ratings = pd.read_csv(
                src_ratings_data,
                sep="::",
                names=["user", "item", "rating", "timestamp"],
                engine='python')
            ratings_array = ratings.values
Пример #17
0
def tran_kfolds(
    df,
    k=None,
    ft=None,
    out=None,
    var_fold=None,
    suffix="_mean",
    summaries=None,
    tf=tf_summarize,
    shuffle=True,
    seed=None,
):
    r"""Perform k-fold CV

    Perform k-fold cross-validation (CV) using a given fitting procedure (ft).
    Optionally provide a fold identifier column, or (randomly) assign folds.

    Args:
        df (DataFrame): Data to pass to given fitting procedure
        ft (gr.ft_): Partially-evaluated grama fit function; defines model fitting
            procedure and outputs to aggregate
        tf (gr.tf_): Partially-evaluated grama transform function; evaluation of
            fitted model will be passed to tf and provided with keyword arguments
            from summaries
        out (list or None): Outputs for which to compute `summaries`; None uses ft.out
        var_fold (str or None): Column to treat as fold identifier; overrides `k`
        suffix (str): Suffix for predicted value; used to distinguish between predicted and actual
        summaries (dict of functions): Summary functions to pass to tf; will be evaluated
            for outputs of ft. Each summary must have signature summary(f_pred, f_meas).
            Grama includes builtin options: gr.mse, gr.rmse, gr.rel_mse, gr.rsq, gr.ndme
        k (int): Number of folds; k=5 to k=10 recommended [1]
        shuffle (bool): Shuffle the data before CV? True recommended [1]

    Notes:
        - Many grama functions support *partial evaluation*; this allows one to specify things like hyperparameters in fitting functions without providing data and executing the fit. You can take advantage of this functionality to easly do hyperparameter studies.

    Returns:
        DataFrame: Aggregated results within each of k-folds using given model and
            summary transform

    References:
        [1] James, Witten, Hastie, and Tibshirani, "An introduction to statistical learning" (2017), Chapter 5. Resampling Methods

    Examples:

        >>> import grama as gr
        >>> from grama.data import df_stang
        >>> from grama.fit import ft_rf
        >>> df_kfolds = (
        >>>     df_stang
        >>>     >> gr.tf_kfolds(
        >>>         k=5,
        >>>         ft=ft_rf(out=["thick"], var=["E", "mu"]),
        >>>     )

    """
    ## Check invariants
    if ft is None:
        raise ValueError("Must provide ft keyword argument")
    if (k is None) and (var_fold is None):
        print("... tran_kfolds is using default k=5")
        k = 5
    if summaries is None:
        print("... tran_kfolds is using default summaries mse and rsq")
        summaries = dict(mse=mse, rsq=rsq)

    n = df.shape[0]
    ## Handle custom folds
    if not (var_fold is None):
        ## Check for a valid var_fold
        if not (var_fold in df.columns):
            raise ValueError("var_fold must be in df.columns or None")
        ## Build folds
        levels = unique(df[var_fold])
        k = len(levels)
        print("... tran_kfolds found {} levels via var_folds".format(k))
        Is = []
        for l in levels:
            Is.append(list(arange(n)[df[var_fold] == l]))

    else:
        ## Shuffle data indices
        if shuffle:
            if seed:
                set_seed(seed)
            I = permutation(n)
        else:
            I = arange(n)
        ## Build folds
        di = int(ceil(n / k))
        Is = [I[i * di:min((i + 1) * di, n)] for i in range(k)]

    ## Iterate over folds
    df_res = DataFrame()
    for i in range(k):
        ## Train by out-of-fold data
        md_fit = df >> tf_filter(~var_in(X.index, Is[i])) >> ft

        ## Determine predicted and actual
        if out is None:
            out = str_replace(md_fit.out, suffix, "")
        else:
            out = str_replace(out, suffix, "")

        ## Test by in-fold data
        df_pred = md_fit >> ev_df(df=df >> tf_filter(var_in(X.index, Is[i])),
                                  append=False)

        ## Specialize summaries for output names
        summaries_all = ChainMap(*[{
            key + "_" + o: fun(X[o + suffix], X[o])
            for key, fun in summaries.items()
        } for o in out])

        ## Aggregate
        df_summary_tmp = (
            df_pred >>
            tf_bind_cols(df[out] >> tf_filter(var_in(X.index, Is[i]))) >>
            tf(**summaries_all)
            # >> tf_mutate(_kfold=i)
        )

        if var_fold is None:
            df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i)
        else:
            df_summary_tmp[var_fold] = levels[i]

        df_res = concat((df_res, df_summary_tmp),
                        axis=0).reset_index(drop=True)

    return df_res
Пример #18
0
def fit_lolo(df,
             md=None,
             var=None,
             out=None,
             domain=None,
             density=None,
             seed=None,
             return_std=True,
             suppress_warnings=True,
             **kwargs):
    r"""Fit a random forest

    Fit a random forest to given data. Specify inputs and outputs, or inherit
    from an existing model.

    Args:
        df (DataFrame): Data for function fitting
        md (gr.Model): Model from which to inherit metadata
        var (list(str) or None): List of features or None for all except outputs
        out (list(str)): List of outputs to fit
        domain (gr.Domain): Domain for new model
        density (gr.Density): Density for new model
        seed (int or None): Random seed for fitting process
        return_std (bool): Return predictive standard deviations?
        suppress_warnings (bool): Suppress warnings when fitting?

    Keyword Arguments:

        num_trees (int):
        use_jackknife (bool):
        bias_learner ():
        leaf_learner ():
        subset_strategy (str):
        min_leaf_instances (int):
        max_depth (int):
        uncertainty_calibration (bool):
        randomize_pivot_location (bool):
        randomly_rotate_features (bool):

    Returns:
        gr.Model: A grama model with fitted function(s)

    Notes:
        - Wrapper for lolopy.learners.RandomForestRegressor

    """
    if suppress_warnings:
        filterwarnings("ignore")

    n_obs, n_in = df.shape

    ## Check minimum rows
    if n_obs < 8:
        raise ValueError("The lolo random forest requires at least 8 rows")

    ## Infer fitting metadata, if available
    if not (md is None):
        domain = md.domain
        density = md.density
        out = md.out

    ## Check invariants
    if not set(out).issubset(set(df.columns)):
        raise ValueError("out must be subset of df.columns")
    ## Default input value
    if var is None:
        var = list(set(df.columns).difference(set(out)))
    ## Check more invariants
    set_inter = set(out).intersection(set(var))
    if len(set_inter) > 0:
        raise ValueError(
            "outputs and inputs must be disjoint; intersect = {}".format(
                set_inter))
    if not set(var).issubset(set(df.columns)):
        raise ValueError("var must be subset of df.columns")

    ## Construct gaussian process for each output
    functions = []

    for output in out:
        rf = RandomForestRegressor(**kwargs)
        set_seed(seed)
        rf.fit(df[var].values, df[output].values)
        name = "RF"

        fun = FunctionRFR(rf, var, [output], name, 0, return_std)
        functions.append(fun)

    ## Construct model
    return gr.Model(functions=functions, domain=domain, density=density)
Пример #19
0
 def set_state(self, random_state):
     seed = random_state.stats_seed
     set_seed(seed)
     state = random_state.random_state
     random.setstate(state)
Пример #20
0
def tran_bootstrap(df,
                   tran=None,
                   n_boot=500,
                   n_sub=25,
                   con=0.90,
                   col_sel=None,
                   seed=None):
    r"""Estimate bootstrap confidence intervals

    Estimate bootstrap confidence intervals for a given transform. Uses the
    "bootstrap-t" procedure discussed in Efron and Tibshirani (1993).

    Args:
        df (DataFrame): Data to bootstrap
        tran (grama tran_ function): Transform procedure which generates statistic
        n_boot (numeric): Monte Carlo resamples for bootstrap
        n_sub (numeric): Nested resamples to estimate SE
        con (float): Confidence level
        col_sel (list(string)): Columns to include in bootstrap calculation

    Returns:
        DataFrame: Results of tran(df), plus _lo and _up columns for
        numeric columns

    References and notes:
       Efron and Tibshirani (1993) "The bootstrap-t procedure... is
       particularly applicable to location statistics like the sample mean....
       The bootstrap-t method, at least in its simple form, cannot be trusted
       for more general problems, like setting a confidence interval for a
       correlation coefficient."

    Examples:

    """
    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n_boot, Integral):
        print("tran_bootstrap() is rounding n_boot...")
        n_boot = int(n_boot)
    if not isinstance(n_sub, Integral):
        print("tran_bootstrap() is rounding n_sub...")
        n_sub = int(n_sub)

    ## Base results
    df_base = tran(df)

    ## Select columns for bootstrap
    col_numeric = list(df_base.select_dtypes(include="number").columns)
    if not (col_sel is None):
        col_numeric = list(set(col_numeric).intersection(set(col_sel)))

    ## Setup
    n_samples = df.shape[0]
    n_row = df_base.shape[0]
    n_col = len(col_numeric)
    alpha = (1 - con) / 2
    theta_hat = df_base[col_numeric].values

    theta_all = zeros((n_boot, n_row, n_col))
    se_boot_all = zeros((n_boot, n_row, n_col))
    z_all = zeros((n_boot, n_row, n_col))
    theta_sub = zeros((n_sub, n_row, n_col))

    ## Main loop
    for ind in range(n_boot):
        ## Construct resample
        Ib = choice(n_samples, size=n_samples, replace=True)
        df_tmp = copy_meta(df, df.iloc[Ib, ])
        theta_all[ind] = tran(df_tmp)[col_numeric].values

        ## Internal loop to approximate SE
        for jnd in range(n_sub):
            Isub = Ib[choice(n_samples, size=n_samples, replace=True)]
            df_tmp = copy_meta(df, df.iloc[Isub, ])
            theta_sub[jnd] = tran(df_tmp)[col_numeric].values
        se_boot_all[ind] = std(theta_sub, axis=0)

        ## Construct approximate pivot
        z_all[ind] = (theta_all[ind] - theta_hat) / se_boot_all[ind]

    ## Compute bootstrap table
    t_lo, t_hi = quantile(z_all, q=[1 - alpha, alpha], axis=0)

    ## Estimate bootstrap intervals
    se = std(theta_all, axis=0)
    theta_lo = theta_hat - t_lo * se
    theta_hi = theta_hat - t_hi * se

    ## Assemble output data
    col_lo = list(map(lambda s: s + "_lo", col_numeric))
    col_hi = list(map(lambda s: s + "_up", col_numeric))

    df_lo = DataFrame(data=theta_lo, columns=col_lo)
    df_hi = DataFrame(data=theta_hi, columns=col_hi)

    df_ci = concat((df_lo, df_hi), axis=1).sort_index(axis=1)
    df_ci.index = df_base.index

    return concat((df_base, df_ci), axis=1)
Пример #21
0
 def set_state(self, random_state):
     seed = random_state.stats_seed
     set_seed(seed)
     state = random_state.random_state
     random.setstate(state)
Пример #22
0
 def wrapper(*args, **kwargs):
     initial_state = get_state()
     set_seed(seed)
     return_value = func(*args, **kwargs)
     set_state(initial_state)
     return return_value
Пример #23
0
def normal(X, scale=1.0, seed=None, dt=None):
    """Add white noise"""
    set_seed(seed)

    return X + np.random.normal(0, scale, size=X.shape)
Пример #24
0
def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False, index=None):
    r"""Draw a random sample

    Evaluates a model with a random sample of the random model inputs. Generates outer product with deterministic samples.

    For more expensive models, it can be helpful to tune n to achieve a reasonable runtime. An even more effective approach is to use skip evaluation along with tran_sp() to evaluate a small, representative sample. (See examples below.)

    Args:
        model (gr.Model): Model to evaluate
        n (numeric): number of observations to draw
        df_det (DataFrame or None): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels. If provided model has no 
            deterministic variables (model.n_var_det == 0), then df_det may 
            equal None.
        seed (int): random seed to use
        append (bool): Append results to input values?
        skip (bool): Skip evaluation of the functions?
        index (str or None): Name of draw index column; not added if None

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    Examples::

        import grama as gr
        from grama.models import make_test
        DF = gr.Intention()

        # Simple random sample evaluation
        md = make_test()
        df = md >> gr.ev_sample(n=1e2, df_det="nom")
        df.describe()

        ## Use autoplot to visualize results
        (
            md
            >> gr.ev_sample(n=1e2, df_det="nom")
            >> gr.pt_auto()
        )

        ## Cantilever beam examples
        from grama.models import make_cantilever_beam
        md_beam = make_cantilever_beam()

        ## Use the draw index to facilitate plotting
        # Try running this without the `group` aesthetic in `geom_line()`;
        # without the group the plot will not have multiple lines.
        (
            md_beam
            >> gr.ev_sample(
                n=20,
                df_det=gr.df_make(w=3, t=gr.linspace(2, 4, 100)),
                index="idx",
            )

            >> gr.ggplot(gr.aes("t", "g_stress"))
            + gr.geom_line(gr.aes(color="w", group="idx"))
        )

        ## Use iocorr to generate input/output correlation tile plot
        (
            md_beam
            >> gr.ev_sample(n=1e3, df_det="nom", skip=True)
            # Generate input/output correlation summary
            >> gr.tf_iocorr()
            # Visualize
            >> gr.pt_auto()
        )

        ## Use support points to reduce model runtime
        (
            md_beam
            # Generate large input sample but don't evaluate outputs
            >> gr.ev_sample(n=1e5, df_det="nom", skip=True)
            # Reduce to a smaller---but representative---sample
            >> gr.tf_sp(n=50)
            # Evaluate the outputs
            >> gr.tf_md(md_beam)
        )

        ## Estimate probabilities
        (
            md_beam
            # Generate large
            >> gr.ev_sample(n=1e5, df_det="nom")
            # Estimate probabilities of failure
            >> gr.tf_summarize(
                pof_stress=gr.mean(DF.g_stress <= 0),
                pof_disp=gr.mean(DF.g_disp <= 0),
            )
        )


    """
    ## Check invariants
    invariants_eval_model(model, skip)
    invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"],
        acc_none=(model.n_var_det==0))
    if n is None:
        raise ValueError("Must provide a valid n value.")

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n, Integral):
        print("eval_sample() is rounding n...")
        n = int(n)

    ## Draw samples
    df_rand = model.density.sample(n=n, seed=seed)
    if not index is None:
        df_rand[index] = df_rand.index
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        ## Evaluation estimate
        runtime_est = model.runtime(df_samp.shape[0])
        if runtime_est > 0:
            print(
                "Estimated runtime for design with model ({0:1}):\n  {1:4.3} sec".format(
                    model.name, runtime_est
                )
            )
        else:
            print("Design runtime estimates unavailable; model has no timing data.")

        ## Attach metadata
        with catch_warnings():
            simplefilter("ignore")
            df_samp._plot_info = {
                "type": "sample_inputs",
                "var": model.var_rand,
            }

        return df_samp

    df_res = eval_df(model, df=df_samp, append=append)
    ## Attach metadata
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "sample_outputs",
            "var": model.var,
            "out": model.out,
        }

    return df_res
Пример #25
0
def gamma(X, shape=2, scale=2, seed=None, dt=None):
    set_seed(seed)

    return X + np.random.gamma(shape, scale=scale, size=X.shape)