def rprop(n, Sigma, X_means, seed=None): r"""Draw a sample from gaussian mixture distribution Draw a sample from a gaussian mixture distribution with a common covariance matrix and different means. Args: n (int): Number of observations in sample Sigma (2d numpy array): Common covariance matrix for mixture distribution X_means (2d numpy array): Means for mixture distribution Preconditions: Sigma.shape[0] == X_means.shape[1] Sigma.shape[1] == X_means.shape[1] Returns: 2d numpy array: Sample of observations """ if seed is not None: set_seed(seed) n_pareto, n_dim = X_means.shape X_sample = zeros((n, n_dim)) for i in range(n): idx = choice(n_pareto) X_sample[i, :] = multivariate_normal( mean=X_means[idx, :], cov=Sigma, size=1, ) return X_sample
def sample(self, n=1, seed=None): """Draw samples from copula Draw samples according to gaussian copula dependence structure. Args: self (gr.CopulaGaussian): n (int): Number of samples to draw Returns: array: Copula samples """ ## Set seed only if given if seed is not None: set_seed(seed) ## Generate correlated samples gaussian_samples = multivariate_normal( mean=[0] * len(self.var_rand), cov=self.Sigma, size=n ) ## Convert to uniform marginals quantiles = valid_dist["norm"].cdf(gaussian_samples) return DataFrame(data=quantiles, columns=self.var_rand)
def param_sample_random(sample_space, count=100, seed=None): """ :param sample_space: A dictionary with either - Scalar values, which will be used as-is. - List, tuple, set or array, which will be chosen from at random (uniform). - Scipy distributions, which will be sampled from. [NOT IMPLEMENTED YET] :param count: How many samples :return: An iterable of samples. """ # possible improvement: remove duplicates if any if seed is not None: set_seed(seed) sampled = [] for nr in range(count): sample = type(sample_space)() for key, value in sample_space.items(): if isinstance(value, (int, float, str)): sample[key] = value elif isinstance(value, (list, tuple, set, ndarray)): sample[key] = choice(value) elif hasattr(value, 'rvs'): sample[key] = value.rvs() else: raise ValueError( 'param_sample_random does not know what do with object of type {0:}' .format(type(value))) sampled.append(sample) return sampled
def random_covariance(X, cov=0.1, K=2, seed=None, dt=None): """Add covariance between K randomly selected electrode pairs.""" set_seed(seed) # Note sizes M, N = X.shape # Init (will contain covar electrodes) noi = np.copy(X) # Pick K random pairs of M rows (electrodes) index = range(M) np.random.shuffle(index) index = index[0:(k * 2)] # Add covar L = len(index) for i in range(0, L, 2): e1 = X[index[i], :] e2 = X[index[i + 1], :] e1 += (e2 * cov) noi[i, :] = e1 return noi
def random_Gabarit(form=None, seed=None): """ Generate a random Gabarit Parameters: - form: (string) {None, ‘lowpass’, ‘highpass’, ‘bandpass’, ‘bandstop’}. Gives the type of filter. If None, the type is randomized - seed: if not None, indicates the seed to use for the random part (in order to be reproductible, the seed is stored in the name of the gabarit) """ # change the seed if asked (otherwise, set the seed) if not seed: set_seed(None) # (from doc): If seed is omitted or None, current system time is used seed = randint(0, 16777215) # between 0 and 2^24-1 set_seed(seed) # choose a form if asked if form is None: # form = choice(("lowpass", "highpass", "bandpass", "bandstop")) form = choice(("lowpass",)) Fs = randint(500, 100000) # lowpass if form == 'lowpass': Fpass = uniform(0.01, 0.9)*Fs/2 # Wpass between 0.01 and 0.9 Fstop = uniform(Fpass, Fs/2) # Wstop between Wpass and 1 gp = uniform(-5, 5) # upperband for pass in [-5;5] gps = uniform(0.1, 5) # pass width in [0.1;5] gs = uniform(-80, 2*(gp-gps)) # stop band in [-80 and 2*lowerband] bands = [(0, Fpass), (Fstop, None)] Gains = [(gp, gp-gps), gs] else: raise ValueError('The form is not valid') return Gabarit(Fs, bands, Gains, seed=seed)
def push_seed(seed=None): # pragma no cover """ Set a temporary seed to the numpy random number generator, restoring it at the end of the context. If seed is None, then get a seed from /dev/urandom (or the windows analogue). """ from numpy.random import get_state, set_state, seed as set_seed state = get_state() set_seed(seed) yield set_state(state)
def sample(self, n=1, seed=None): """Draw samples from copula Args: n (int): Number of samples seed (int): Random seed Returns: DataFrame: Independent samples """ ## Set seed only if given if seed is not None: set_seed(seed) return DataFrame(data=random((n, len(self.var_rand))), columns=self.var_rand)
def eval_lhs(model, n=1, df_det=None, seed=None, append=True, skip=False, criterion=None): r"""Latin Hypercube evaluation Evaluates a given model on a latin hypercube sample (LHS) using the model's density. Args: model (gr.Model): Model to evaluate n (numeric): Number of LHS samples to draw df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. seed (int): Random seed to use append (bool): Append results to conservative inputs? skip (bool): Skip evaluation of the functions? criterion (str): flag for LHS sample criterion allowable values: None, "center" ("c"), "maxmin" ("m"), "centermaxmin" ("cm"), "correlation" ("corr") Returns: DataFrame: Results of evaluation or unevaluated design Notes: - Wrapper on pyDOE.lhs """ ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n, Integral): print("eval_lhs() is rounding n...") n = int(n) ## Draw samples df_quant = DataFrame(data=lhs(model.n_var_rand, samples=n), columns=model.var_rand) ## Convert samples to desired marginals df_rand = model.density.pr2sample(df_quant) ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: return df_samp else: return gr.eval_df(model, df=df_samp, append=append)
def paired_covariance(X, cov=0.1, pairs=None, seed=None, dt=None): """Add covariance between K randomly selected electrode pairs.""" set_seed(seed) # Note sizes M, N = X.shape # Init (will contain covar electrodes) noi = np.copy(X) # Add covar for pair in pairs: e1 = X[pair[0], :] e2 = X[pair[1], :] e1 += (e2 * cov) e2 += (e1 * cov) noi[pair[0], :] = e1 noi[pair[1], :] = e2 return noi
def brown(X, scale=0.5, seed=None, dt=None): """Add brown noise""" set_seed(seed) X = np.atleast_2d(X) M, N = X.shape noi = np.zeros_like(X) for j in range(M): d = np.random.normal(0, scale) rates = [ d, ] for _ in range(N - 1): d += np.random.normal(0, scale) rates.append(d) noi[j, :] = rates return X + noi
def eval_sinews( model, n_density=10, n_sweeps=3, seed=None, df_det=None, varname="sweep_var", indname="sweep_ind", append=True, skip=False, ): r"""Sweep study Perform coordinate sweeps over each model random variable ("sinew" design). Use random starting points drawn from the joint density. Optionally sweep the deterministic variables. For more expensive models, it can be helpful to tune n_density and n_sweeps to achieve a reasonable runtime. Use gr.plot_auto() to construct a quick visualization of the output dataframe. Use `skip` version to visualize the design, and non-skipped version to visualize the results. Args: model (gr.Model): Model to evaluate n_density (numeric): Number of points along each sweep n_sweeps (numeric): Number of sweeps per-random variable seed (int): Random seed to use df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels, use "swp" to sweep deterministic variables varname (str): Column name to give for sweep variable; default="sweep_var" indname (str): Column name to give for sweep index; default="sweep_ind" append (bool): Append results to conservative inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design Examples: >>> import grama as gr >>> md = gr.make_cantilever_beam() >>> # Skip evaluation, vis. design >>> df_design = md >> gr.ev_sinews(df_det="nom", skip=True) >>> df_design >> gr.pt_auto() >>> # Vis results >>> df_sinew = md >> gr.ev_sinews(df_det="nom") >>> df_sinew >> gr.pt_auto() """ ## Override model if deterministic sweeps desired if df_det == "swp": ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model model = comp_marginals(model, **dicts_var) ## Restore flag df_det = "nom" ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n_density, Integral): print("eval_sinews() is rounding n_density...") n_density = int(n_density) if not isinstance(n_sweeps, Integral): print("eval_sinews() is rounding n_sweeps...") n_sweeps = int(n_sweeps) ## Build quantile sweep data q_random = tile(random((1, model.n_var_rand, n_sweeps)), (n_density, 1, 1)) q_dense = linspace(0, 1, num=n_density) Q_all = zeros((n_density * n_sweeps * model.n_var_rand, model.n_var_rand)) C_var = ["tmp"] * (n_density * n_sweeps * model.n_var_rand) C_ind = [0] * (n_density * n_sweeps * model.n_var_rand) ## Interlace for i_input in range(model.n_var_rand): ind_base = i_input * n_density * n_sweeps for i_sweep in range(n_sweeps): ind_start = ind_base + i_sweep * n_density ind_end = ind_base + (i_sweep + 1) * n_density Q_all[ind_start:ind_end] = q_random[:, :, i_sweep] Q_all[ind_start:ind_end, i_input] = q_dense C_var[ind_start:ind_end] = [model.var_rand[i_input]] * n_density C_ind[ind_start:ind_end] = [i_sweep] * n_density ## Modify endpoints for infinite support if not isfinite( model.density.marginals[model.var_rand[i_input]].q(0)): Q_all[ind_start, i_input] = 1 / n_density / 10 if not isfinite( model.density.marginals[model.var_rand[i_input]].q(1)): Q_all[ind_end - 1, i_input] = 1 - 1 / n_density / 10 ## Assemble sampling plan df_pr = DataFrame(data=Q_all, columns=model.var_rand) df_rand = model.density.pr2sample(df_pr) df_rand[varname] = C_var df_rand[indname] = C_ind ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: ## Evaluation estimate runtime_est = model.runtime(df_samp.shape[0]) if runtime_est > 0: print( "Estimated runtime for design with model ({0:1}):\n {1:4.3} sec" .format(model.name, runtime_est)) else: print( "Design runtime estimates unavailable; model has no timing data." ) ## For autoplot with catch_warnings(): simplefilter("ignore") df_samp._plot_info = { "type": "sinew_inputs", "var": model.var_rand } ## Pass-through return df_samp ## Apply df_res = eval_df(model, df=df_samp, append=append) ## For autoplot with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "sinew_outputs", "var": model.var_rand, "out": model.out, } return df_res
def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False): r"""Draw a random sample Evaluates a model with a random sample of the random model inputs. Generates outer product with deterministic samples. For more expensive models, it can be helpful to tune n to achieve a reasonable runtime. An even more effective approach is to use skip evaluation along with tran_sp() to evaluate a small, representative sample. (See examples below.) Args: model (gr.Model): Model to evaluate n (numeric): number of observations to draw df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. seed (int): random seed to use append (bool): Append results to input values? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design Examples: >>> import grama as gr >>> from grama.models import make_test >>> DF = gr.Intention() >>> >>> # Simple random sample evaluation >>> md = make_test() >>> df = md >> gr.ev_sample(n=1e2, df_det="nom") >>> df.describe() >>> >>> ## Use autoplot to visualize results >>> ( >>> md >>> >> gr.ev_sample(n=1e2, df_det="nom") >>> >> gr.pt_auto() >>> ) >>> >>> ## Cantilever beam examples >>> from grama.models import make_cantilever_beam >>> md_beam = make_cantilever_beam() >>> >>> ## Use iocorr to generate input/output correlation tile plot >>> ( >>> md_beam >>> >> gr.ev_sample(n=1e3, df_det="nom", skip=True) >>> # Generate input/output correlation summary >>> >> gr.tf_iocorr() >>> # Visualize >>> >> gr.pt_auto() >>> ) >>> >>> ## Use support points to reduce model runtime >>> ( >>> md_beam >>> # Generate large input sample but don't evaluate outputs >>> >> gr.ev_sample(n=1e5, df_det="nom", skip=True) >>> # Reduce to a smaller---but representative---sample >>> >> gr.tf_sp(n=50) >>> # Evaluate the outputs >>> >> gr.tf_md(md_beam) >>> ) >>> >>> ## Estimate probabilities >>> ( >>> md_beam >>> # Generate large >>> >> gr.ev_sample(n=1e5, df_det="nom") >>> # Estimate probabilities of failure >>> >> gr.tf_summarize( >>> pof_stress=gr.mean(DF.g_stress <= 0), >>> pof_disp=gr.mean(DF.g_disp <= 0), >>> ) >>> ) """ ## Check invariants if n is None: raise ValueError("Must provide a valid n value.") ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n, Integral): print("eval_sample() is rounding n...") n = int(n) ## Draw samples df_rand = model.density.sample(n=n, seed=seed) ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: ## Evaluation estimate runtime_est = model.runtime(df_samp.shape[0]) if runtime_est > 0: print( "Estimated runtime for design with model ({0:1}):\n {1:4.3} sec" .format(model.name, runtime_est)) else: print( "Design runtime estimates unavailable; model has no timing data." ) ## Attach metadata with catch_warnings(): simplefilter("ignore") df_samp._plot_info = { "type": "sample_inputs", "var": model.var_rand, } return df_samp df_res = eval_df(model, df=df_samp, append=append) ## Attach metadata with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "sample_outputs", "var": model.var, "out": model.out, } return df_res
def eval_hybrid( model, n=1, plan="first", df_det=None, varname="hybrid_var", seed=None, append=True, skip=False, ): r"""Hybrid points for Sobol' indices Use the "hybrid point" design (Sobol', 1999) to support estimating Sobol' indices. Use gr.tran_sobol() to post-process the results and compute estimates. Args: model (gr.Model): Model to evaluate; must have CopulaIndependence n (numeric): Number of points along each sweep plan (str): Sobol' index to compute; plan={"first", "total"} seed (int): Random seed to use df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. varname (str): Column name to give for sweep variable; default="hybrid_var" append (bool): Append results to conservative inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design References: I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" (1999) MMCE, Vol 1. Examples: >>> import grama as gr >>> md = gr.make_cantilever_beam() >>> df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") >>> df_first >> gr.tf_sobol() >>> >>> df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") >>> df_total >> gr.tf_sobol() """ ## Check invariants if not isinstance(model.density.copula, CopulaIndependence): raise ValueError( "model must have CopulaIndependence structure;\n" + "Sobol' indices only defined for independent variables") ## Set seed only if given if seed is not None: set_seed(seed) if not isinstance(n, Integral): print("eval_hybrid() is rounding n...") n = int(n) ## Draw hybrid points X = random((n, model.n_var_rand)) Z = random((n, model.n_var_rand)) ## Reserve space Q_all = zeros((n * (model.n_var_rand + 1), model.n_var_rand)) Q_all[:n] = X # Base samples C_var = ["_"] * (n * (model.n_var_rand + 1)) ## Interleave samples for i_in in range(model.n_var_rand): i_start = (i_in + 1) * n i_end = (i_in + 2) * n if plan == "first": Q_all[i_start:i_end, :] = Z Q_all[i_start:i_end, i_in] = X[:, i_in] elif plan == "total": Q_all[i_start:i_end, :] = X Q_all[i_start:i_end, i_in] = Z[:, i_in] else: raise ValueError("plan must be `first` or `total`") C_var[i_start:i_end] = [model.var_rand[i_in]] * n ## Construct sampling plan df_pr = DataFrame(data=Q_all, columns=model.var_rand) ## Convert samples to desired marginals df_rand = model.density.pr2sample(df_pr) df_rand[varname] = C_var ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: with catch_warnings(): simplefilter("ignore") df_samp._meta = dict( type="eval_hybrid", varname=varname, plan=plan, var_rand=model.var_rand, out=model.out, ) return df_samp df_res = eval_df(model, df=df_samp, append=append) with catch_warnings(): simplefilter("ignore") df_res._meta = dict( type="eval_hybrid", varname=varname, plan=plan, var_rand=model.var_rand, out=model.out, ) return df_res
def eval_monte_carlo(model, n=1, df_det=None, seed=None, append=True, skip=False): r"""Monte Carlo evaluation Evaluates a given model at a given dataframe. Generates outer product with deterministic samples. Args: model (gr.Model): Model to evaluate n (numeric): number of Monte Carlo samples to draw df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. seed (int): random seed to use append (bool): Append results to random values? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design Examples: >>> import grama as gr >>> from grama.models import make_test >>> md = make_test() >>> df = md >> gr.ev_monte_carlo(n=1e2, df_det="nom") >>> df.describe() """ ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n, Integral): print("eval_monte_carlo() is rounding n...") n = int(n) ## Draw samples df_rand = model.density.sample(n=n, seed=seed) ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: ## Evaluation estimate runtime_est = model.runtime(df_samp.shape[0]) if runtime_est > 0: print( "Estimated runtime for design with model ({0:1}):\n {1:4.3} sec" .format(model.name, runtime_est)) else: print( "Design runtime estimates unavailable; model has no timing data." ) ## Attach metadata with warnings.catch_warnings(): warnings.simplefilter("ignore") df_samp._plot_info = { "type": "monte_carlo_inputs", "var": model.var_rand, } return df_samp else: df_res = gr.eval_df(model, df=df_samp, append=append) ## Attach metadata with warnings.catch_warnings(): warnings.simplefilter("ignore") df_res._plot_info = { "type": "monte_carlo_outputs", "out": model.out } return df_res
Test_Experiment = "CNN_GRU_Dual" record_path = "record.csv" datasets = FLAGS.datasets # override datasets = ["Musical_Instruments_5"] seeds = FLAGS.seeds # override # seeds = [55, 66, 77, 88, 99] for get_experiment_result in range(FLAGS.get_result_times): for dataset in datasets: for seed in seeds: from numpy.random import seed as set_seed set_seed(seed=seed) from tensorflow import set_random_seed set_random_seed(seed=seed) src_document_all = os.path.join(os.getcwd(), "data", dataset, "document.all") src_ratings_data = os.path.join(os.getcwd(), "data", dataset, "ratings.dat") np.random.seed(seed) ratings = pd.read_csv( src_ratings_data, sep="::", names=["user", "item", "rating", "timestamp"], engine='python') ratings_array = ratings.values
def tran_kfolds( df, k=None, ft=None, out=None, var_fold=None, suffix="_mean", summaries=None, tf=tf_summarize, shuffle=True, seed=None, ): r"""Perform k-fold CV Perform k-fold cross-validation (CV) using a given fitting procedure (ft). Optionally provide a fold identifier column, or (randomly) assign folds. Args: df (DataFrame): Data to pass to given fitting procedure ft (gr.ft_): Partially-evaluated grama fit function; defines model fitting procedure and outputs to aggregate tf (gr.tf_): Partially-evaluated grama transform function; evaluation of fitted model will be passed to tf and provided with keyword arguments from summaries out (list or None): Outputs for which to compute `summaries`; None uses ft.out var_fold (str or None): Column to treat as fold identifier; overrides `k` suffix (str): Suffix for predicted value; used to distinguish between predicted and actual summaries (dict of functions): Summary functions to pass to tf; will be evaluated for outputs of ft. Each summary must have signature summary(f_pred, f_meas). Grama includes builtin options: gr.mse, gr.rmse, gr.rel_mse, gr.rsq, gr.ndme k (int): Number of folds; k=5 to k=10 recommended [1] shuffle (bool): Shuffle the data before CV? True recommended [1] Notes: - Many grama functions support *partial evaluation*; this allows one to specify things like hyperparameters in fitting functions without providing data and executing the fit. You can take advantage of this functionality to easly do hyperparameter studies. Returns: DataFrame: Aggregated results within each of k-folds using given model and summary transform References: [1] James, Witten, Hastie, and Tibshirani, "An introduction to statistical learning" (2017), Chapter 5. Resampling Methods Examples: >>> import grama as gr >>> from grama.data import df_stang >>> from grama.fit import ft_rf >>> df_kfolds = ( >>> df_stang >>> >> gr.tf_kfolds( >>> k=5, >>> ft=ft_rf(out=["thick"], var=["E", "mu"]), >>> ) """ ## Check invariants if ft is None: raise ValueError("Must provide ft keyword argument") if (k is None) and (var_fold is None): print("... tran_kfolds is using default k=5") k = 5 if summaries is None: print("... tran_kfolds is using default summaries mse and rsq") summaries = dict(mse=mse, rsq=rsq) n = df.shape[0] ## Handle custom folds if not (var_fold is None): ## Check for a valid var_fold if not (var_fold in df.columns): raise ValueError("var_fold must be in df.columns or None") ## Build folds levels = unique(df[var_fold]) k = len(levels) print("... tran_kfolds found {} levels via var_folds".format(k)) Is = [] for l in levels: Is.append(list(arange(n)[df[var_fold] == l])) else: ## Shuffle data indices if shuffle: if seed: set_seed(seed) I = permutation(n) else: I = arange(n) ## Build folds di = int(ceil(n / k)) Is = [I[i * di:min((i + 1) * di, n)] for i in range(k)] ## Iterate over folds df_res = DataFrame() for i in range(k): ## Train by out-of-fold data md_fit = df >> tf_filter(~var_in(X.index, Is[i])) >> ft ## Determine predicted and actual if out is None: out = str_replace(md_fit.out, suffix, "") else: out = str_replace(out, suffix, "") ## Test by in-fold data df_pred = md_fit >> ev_df(df=df >> tf_filter(var_in(X.index, Is[i])), append=False) ## Specialize summaries for output names summaries_all = ChainMap(*[{ key + "_" + o: fun(X[o + suffix], X[o]) for key, fun in summaries.items() } for o in out]) ## Aggregate df_summary_tmp = ( df_pred >> tf_bind_cols(df[out] >> tf_filter(var_in(X.index, Is[i]))) >> tf(**summaries_all) # >> tf_mutate(_kfold=i) ) if var_fold is None: df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i) else: df_summary_tmp[var_fold] = levels[i] df_res = concat((df_res, df_summary_tmp), axis=0).reset_index(drop=True) return df_res
def fit_lolo(df, md=None, var=None, out=None, domain=None, density=None, seed=None, return_std=True, suppress_warnings=True, **kwargs): r"""Fit a random forest Fit a random forest to given data. Specify inputs and outputs, or inherit from an existing model. Args: df (DataFrame): Data for function fitting md (gr.Model): Model from which to inherit metadata var (list(str) or None): List of features or None for all except outputs out (list(str)): List of outputs to fit domain (gr.Domain): Domain for new model density (gr.Density): Density for new model seed (int or None): Random seed for fitting process return_std (bool): Return predictive standard deviations? suppress_warnings (bool): Suppress warnings when fitting? Keyword Arguments: num_trees (int): use_jackknife (bool): bias_learner (): leaf_learner (): subset_strategy (str): min_leaf_instances (int): max_depth (int): uncertainty_calibration (bool): randomize_pivot_location (bool): randomly_rotate_features (bool): Returns: gr.Model: A grama model with fitted function(s) Notes: - Wrapper for lolopy.learners.RandomForestRegressor """ if suppress_warnings: filterwarnings("ignore") n_obs, n_in = df.shape ## Check minimum rows if n_obs < 8: raise ValueError("The lolo random forest requires at least 8 rows") ## Infer fitting metadata, if available if not (md is None): domain = md.domain density = md.density out = md.out ## Check invariants if not set(out).issubset(set(df.columns)): raise ValueError("out must be subset of df.columns") ## Default input value if var is None: var = list(set(df.columns).difference(set(out))) ## Check more invariants set_inter = set(out).intersection(set(var)) if len(set_inter) > 0: raise ValueError( "outputs and inputs must be disjoint; intersect = {}".format( set_inter)) if not set(var).issubset(set(df.columns)): raise ValueError("var must be subset of df.columns") ## Construct gaussian process for each output functions = [] for output in out: rf = RandomForestRegressor(**kwargs) set_seed(seed) rf.fit(df[var].values, df[output].values) name = "RF" fun = FunctionRFR(rf, var, [output], name, 0, return_std) functions.append(fun) ## Construct model return gr.Model(functions=functions, domain=domain, density=density)
def set_state(self, random_state): seed = random_state.stats_seed set_seed(seed) state = random_state.random_state random.setstate(state)
def tran_bootstrap(df, tran=None, n_boot=500, n_sub=25, con=0.90, col_sel=None, seed=None): r"""Estimate bootstrap confidence intervals Estimate bootstrap confidence intervals for a given transform. Uses the "bootstrap-t" procedure discussed in Efron and Tibshirani (1993). Args: df (DataFrame): Data to bootstrap tran (grama tran_ function): Transform procedure which generates statistic n_boot (numeric): Monte Carlo resamples for bootstrap n_sub (numeric): Nested resamples to estimate SE con (float): Confidence level col_sel (list(string)): Columns to include in bootstrap calculation Returns: DataFrame: Results of tran(df), plus _lo and _up columns for numeric columns References and notes: Efron and Tibshirani (1993) "The bootstrap-t procedure... is particularly applicable to location statistics like the sample mean.... The bootstrap-t method, at least in its simple form, cannot be trusted for more general problems, like setting a confidence interval for a correlation coefficient." Examples: """ ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n_boot, Integral): print("tran_bootstrap() is rounding n_boot...") n_boot = int(n_boot) if not isinstance(n_sub, Integral): print("tran_bootstrap() is rounding n_sub...") n_sub = int(n_sub) ## Base results df_base = tran(df) ## Select columns for bootstrap col_numeric = list(df_base.select_dtypes(include="number").columns) if not (col_sel is None): col_numeric = list(set(col_numeric).intersection(set(col_sel))) ## Setup n_samples = df.shape[0] n_row = df_base.shape[0] n_col = len(col_numeric) alpha = (1 - con) / 2 theta_hat = df_base[col_numeric].values theta_all = zeros((n_boot, n_row, n_col)) se_boot_all = zeros((n_boot, n_row, n_col)) z_all = zeros((n_boot, n_row, n_col)) theta_sub = zeros((n_sub, n_row, n_col)) ## Main loop for ind in range(n_boot): ## Construct resample Ib = choice(n_samples, size=n_samples, replace=True) df_tmp = copy_meta(df, df.iloc[Ib, ]) theta_all[ind] = tran(df_tmp)[col_numeric].values ## Internal loop to approximate SE for jnd in range(n_sub): Isub = Ib[choice(n_samples, size=n_samples, replace=True)] df_tmp = copy_meta(df, df.iloc[Isub, ]) theta_sub[jnd] = tran(df_tmp)[col_numeric].values se_boot_all[ind] = std(theta_sub, axis=0) ## Construct approximate pivot z_all[ind] = (theta_all[ind] - theta_hat) / se_boot_all[ind] ## Compute bootstrap table t_lo, t_hi = quantile(z_all, q=[1 - alpha, alpha], axis=0) ## Estimate bootstrap intervals se = std(theta_all, axis=0) theta_lo = theta_hat - t_lo * se theta_hi = theta_hat - t_hi * se ## Assemble output data col_lo = list(map(lambda s: s + "_lo", col_numeric)) col_hi = list(map(lambda s: s + "_up", col_numeric)) df_lo = DataFrame(data=theta_lo, columns=col_lo) df_hi = DataFrame(data=theta_hi, columns=col_hi) df_ci = concat((df_lo, df_hi), axis=1).sort_index(axis=1) df_ci.index = df_base.index return concat((df_base, df_ci), axis=1)
def wrapper(*args, **kwargs): initial_state = get_state() set_seed(seed) return_value = func(*args, **kwargs) set_state(initial_state) return return_value
def normal(X, scale=1.0, seed=None, dt=None): """Add white noise""" set_seed(seed) return X + np.random.normal(0, scale, size=X.shape)
def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False, index=None): r"""Draw a random sample Evaluates a model with a random sample of the random model inputs. Generates outer product with deterministic samples. For more expensive models, it can be helpful to tune n to achieve a reasonable runtime. An even more effective approach is to use skip evaluation along with tran_sp() to evaluate a small, representative sample. (See examples below.) Args: model (gr.Model): Model to evaluate n (numeric): number of observations to draw df_det (DataFrame or None): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. If provided model has no deterministic variables (model.n_var_det == 0), then df_det may equal None. seed (int): random seed to use append (bool): Append results to input values? skip (bool): Skip evaluation of the functions? index (str or None): Name of draw index column; not added if None Returns: DataFrame: Results of evaluation or unevaluated design Examples:: import grama as gr from grama.models import make_test DF = gr.Intention() # Simple random sample evaluation md = make_test() df = md >> gr.ev_sample(n=1e2, df_det="nom") df.describe() ## Use autoplot to visualize results ( md >> gr.ev_sample(n=1e2, df_det="nom") >> gr.pt_auto() ) ## Cantilever beam examples from grama.models import make_cantilever_beam md_beam = make_cantilever_beam() ## Use the draw index to facilitate plotting # Try running this without the `group` aesthetic in `geom_line()`; # without the group the plot will not have multiple lines. ( md_beam >> gr.ev_sample( n=20, df_det=gr.df_make(w=3, t=gr.linspace(2, 4, 100)), index="idx", ) >> gr.ggplot(gr.aes("t", "g_stress")) + gr.geom_line(gr.aes(color="w", group="idx")) ) ## Use iocorr to generate input/output correlation tile plot ( md_beam >> gr.ev_sample(n=1e3, df_det="nom", skip=True) # Generate input/output correlation summary >> gr.tf_iocorr() # Visualize >> gr.pt_auto() ) ## Use support points to reduce model runtime ( md_beam # Generate large input sample but don't evaluate outputs >> gr.ev_sample(n=1e5, df_det="nom", skip=True) # Reduce to a smaller---but representative---sample >> gr.tf_sp(n=50) # Evaluate the outputs >> gr.tf_md(md_beam) ) ## Estimate probabilities ( md_beam # Generate large >> gr.ev_sample(n=1e5, df_det="nom") # Estimate probabilities of failure >> gr.tf_summarize( pof_stress=gr.mean(DF.g_stress <= 0), pof_disp=gr.mean(DF.g_disp <= 0), ) ) """ ## Check invariants invariants_eval_model(model, skip) invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"], acc_none=(model.n_var_det==0)) if n is None: raise ValueError("Must provide a valid n value.") ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n, Integral): print("eval_sample() is rounding n...") n = int(n) ## Draw samples df_rand = model.density.sample(n=n, seed=seed) if not index is None: df_rand[index] = df_rand.index ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: ## Evaluation estimate runtime_est = model.runtime(df_samp.shape[0]) if runtime_est > 0: print( "Estimated runtime for design with model ({0:1}):\n {1:4.3} sec".format( model.name, runtime_est ) ) else: print("Design runtime estimates unavailable; model has no timing data.") ## Attach metadata with catch_warnings(): simplefilter("ignore") df_samp._plot_info = { "type": "sample_inputs", "var": model.var_rand, } return df_samp df_res = eval_df(model, df=df_samp, append=append) ## Attach metadata with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "sample_outputs", "var": model.var, "out": model.out, } return df_res
def gamma(X, shape=2, scale=2, seed=None, dt=None): set_seed(seed) return X + np.random.gamma(shape, scale=scale, size=X.shape)