def _get_uniform_base_draws(n_draws, n_params, sampling_scheme): """Get uniform random draws. Questions: - Should we replace random by quasi-random? - Should we de-correlate the result as a finite sample correction. Args: n_draws (int) n_params (int) sampling_scheme (str): one of ["sobol", "random"] Returns: u_a, u_b (np.ndarray): Arrays of shape (n_draws, n_params) with i.i.d draws from a uniform [0, 1] distribution. """ if sampling_scheme == "sobol": u = cp.generate_samples(order=n_draws * 2 * n_params, rule="S").reshape( n_draws, -1, ) elif sampling_scheme == "random": u = np.random.uniform(low=1e-5, high=1 - 1e-5, size=(n_draws, 2 * n_params)) else: raise ValueError u_a = u[:, :n_params] u_b = u[:, n_params:] # u_a = np.random.uniform(size=(n_draws, n_params)) # u_b = np.random.uniform(size=(n_draws, n_params)) return u_a, u_b
def __init__(self, dist, samples, poly=None, rule="random"): """ Initialize the matrix generator. dist (chaopy.Distribution): distribution to sample from. samples (int): The number of samples to draw for each matrix. poly (numpoly.ndpoly): If provided, evaluated samples through polynomials before returned. rule (str): Scheme for generating random samples. """ self.dist = dist samples_ = chaospy.generate_samples( 2*samples, domain=len(dist), rule=rule) self.samples1 = samples_.T[:samples].T self.samples2 = samples_.T[samples:].T self.poly = poly self.buffer = {}
def __init__(self, vary=None, count=0, polynomial_order=4, regression=False, rule="G", sparse=False, growth=False): """ Create the sampler for the Polynomial Chaos Expansion using pseudo-spectral projection or regression (Point Collocation). Parameters ---------- vary: dict or None keys = parameters to be sampled, values = distributions. count : int, optional Specified counter for Fast forward, default is 0. polynomial_order : int, optional The polynomial order, default is 4. regression : bool, optional If True, regression variante (point collecation) will be used, otherwise projection variante (pseud-spectral) will be used. Default value is False. rule : char, optional The quadrature method, in case of projection (default is Gaussian "G"). The sequence sampler in case of regression (default is Hammersley "M") sparse : bool, optional If True, use Smolyak sparse grid instead of normal tensor product grid. Default value is False. growth (bool, None), optional If True, quadrature point became nested. """ if vary is None: msg = ("'vary' cannot be None. RandomSampler must be passed a " "dict of the names of the parameters you want to vary, " "and their corresponding distributions.") logging.error(msg) raise Exception(msg) if not isinstance(vary, dict): msg = ("'vary' must be a dictionary of the names of the " "parameters you want to vary, and their corresponding " "distributions.") logging.error(msg) raise Exception(msg) if len(vary) == 0: msg = "'vary' cannot be empty." logging.error(msg) raise Exception(msg) self.vary = Vary(vary) self.polynomial_order = polynomial_order # List of the probability distributions of uncertain parameters params_distribution = list(vary.values()) # Multivariate distribution self.distribution = cp.J(*params_distribution) # The orthogonal polynomials corresponding to the joint distribution self.P = cp.expansion.stieltjes(polynomial_order, self.distribution, normed=True) # The quadrature information self.quad_sparse = sparse self.rule = rule # Clenshaw-Curtis should be nested if sparse (#139 chaospy issue) self.quad_growth = growth cc = ['c', 'C', 'clenshaw_curtis', 'Clenshaw_Curtis'] if sparse and rule in cc: self.quad_growth = True # To determinate the PCE vrainte to use self.regression = regression # Regression variante (Point collocation method) if regression: # Change the default rule if rule == "G": self.rule = "M" # Generates samples self._n_samples = 2 * len(self.P) self._nodes = cp.generate_samples(order=self._n_samples, domain=self.distribution, rule=self.rule) self._weights = None # Projection variante (Pseudo-spectral method) else: # Nodes and weights for the integration self._nodes, self._weights = cp.generate_quadrature( order=polynomial_order, dist=self.distribution, rule=self.rule, sparse=sparse, growth=self.quad_growth) # Number of samples self._n_samples = len(self._nodes[0]) # Fast forward to specified count, if possible self.count = 0 if self.count >= self._n_samples: msg = ( f"Attempt to start sampler fastforwarded to count {self.count}, " f"but sampler only has {self.n_samples} samples, therefore" f"this sampler will not provide any more samples.") logging.warning(msg) else: for i in range(count): self.__next__()
def radial_sample( n_rad: int, n_inputs: int, normal: bool = False, sequence: str = "S") -> Tuple[List[np.ndarray], List[np.ndarray]]: """Generate sample in radial design as described in [1]. For each subsample, there are `n_inputs + 1` rows and `n_inputs` colums. Each row is identical except of the diagonal of the sample w/o the first row. Parameters ---------- n_rad Number of subsamples. n_inputs Number of input paramters / columns / rows - 1. seed Random seed. normal Indicates whether to transform points by `scipy.normal.ppt` numeric_zero `if normal is True`: Prevents `scipy.normal.ppt` to return `-Inf` and `Inf` for 0 and 1. sequence Type of quasi-random sequence. Returns ------- sample Random sample in radial design. Dimension `n_inputs` x `n_inputs + 1`. trans_steps Column vector of steps added to base value point. Sorted by parameter/column. Dimension `n_inputs` x `1`. Notes ----- See [2] for abbreviations of the different sequence types. In contrary to the trajectory design, the stepsize differs right from the start by design and only one element changes in each row compared to the first row. All distict elements in the whole sample are drawn at once because the default Sobol' sequence can not be reseeded. References ---------- [1] Ge, Q. and M. Menendez (2017). Extending morris method for qualitative global sensitivityanalysis of models with dependent inputs. Reliability Engineering & System Safety 100 (162), 28–39. [2] <https://github.com/jonathf/chaospy/blob/master/chaospy/distributions/sampler/ generator.py#L62> """ # Draw all elements at once. all_elements = cp.generate_samples(order=n_rad * 2 * n_inputs, rule=sequence) all_elements = all_elements.reshape(n_rad, 2 * n_inputs) rad_list = [] steps_list = [] for row in range(0, n_rad): # Copy first row. rad_temp = np.tile(all_elements[row, 0:n_inputs], (n_inputs + 1, 1)) # Fill diagonal. diag_temp = all_elements[row, n_inputs:] rad_temp[1:, :].flat[::n_inputs + 1] = diag_temp # For standard normally distributed draws. if normal is True: rad_temp = np.apply_along_axis(transform_uniform_stnormal_uncorr, 1, rad_temp) else: pass rad_list.append(rad_temp) # Subtract diagonal elements from first row. steps_temp = np.array([1, n_inputs]) steps_temp = rad_temp[1:, :].flat[::n_inputs + 1] - rad_temp[0, :] steps_list.append(steps_temp) return rad_list, steps_list
def analyse(self, data_frame=None): """Perform PCE analysis on input `data_frame`. Parameters ---------- data_frame : :obj:`pandas.DataFrame` Input data for analysis. Returns ------- dict: Contains analysis results in sub-dicts with keys - ['statistical_moments', 'percentiles', 'sobol_indices', 'correlation_matrices', 'output_distributions'] """ if data_frame is None: raise RuntimeError("Analysis element needs a data frame to " "analyse") elif data_frame.empty: raise RuntimeError( "No data in data frame passed to analyse element") qoi_cols = self.qoi_cols results = { 'statistical_moments': {}, 'percentiles': {}, 'sobols_first': {k: {} for k in qoi_cols}, 'sobols_second': {k: {} for k in qoi_cols}, 'sobols_total': {k: {} for k in qoi_cols}, 'correlation_matrices': {}, 'output_distributions': {}, } # Get the Polynomial P = self.sampler.P # Get the PCE variante to use (Regression or Projection) regression = self.sampler.regression # Compute nodes (and weights) if regression: nodes = cp.generate_samples(order=self.sampler.n_samples, domain=self.sampler.distribution, rule=self.sampler.rule) else: nodes, weights = cp.generate_quadrature( order=self.sampler.quad_order, dist=self.sampler.distribution, rule=self.sampler.rule, sparse=self.sampler.quad_sparse, growth=self.sampler.quad_growth) # Extract output values for each quantity of interest from Dataframe samples = {k: [] for k in qoi_cols} for run_id in data_frame.run_id.unique(): for k in qoi_cols: data = data_frame.loc[data_frame['run_id'] == run_id][k] samples[k].append(data.values) # Compute descriptive statistics for each quantity of interest for k in qoi_cols: # Approximation solver if regression: if samples[k][0].dtype == object: for i in range(self.sampler.count): samples[k][i] = samples[k][i].astype("float64") fit = cp.fit_regression(P, nodes, samples[k], "T") else: fit = cp.fit_quadrature(P, nodes, weights, samples[k]) # Statistical moments mean = cp.E(fit, self.sampler.distribution) var = cp.Var(fit, self.sampler.distribution) std = cp.Std(fit, self.sampler.distribution) results['statistical_moments'][k] = { 'mean': mean, 'var': var, 'std': std } # Percentiles (Pxx) P10 = cp.Perc(fit, 10, self.sampler.distribution) P90 = cp.Perc(fit, 90, self.sampler.distribution) results['percentiles'][k] = {'p10': P10, 'p90': P90} # Sensitivity Analysis: First, Second and Total Sobol indices sobols_first_narr = cp.Sens_m(fit, self.sampler.distribution) sobols_second_narr = cp.Sens_m2(fit, self.sampler.distribution) sobols_total_narr = cp.Sens_t(fit, self.sampler.distribution) sobols_first_dict = {} sobols_second_dict = {} sobols_total_dict = {} ipar = 0 i = 0 for param_name in self.sampler.vary.get_keys(): j = self.sampler.params_size[ipar] sobols_first_dict[param_name] = sobols_first_narr[i:i + j] sobols_second_dict[param_name] = sobols_second_narr[i:i + j] sobols_total_dict[param_name] = sobols_total_narr[i:i + j] i += j ipar += 1 results['sobols_first'][k] = sobols_first_dict results['sobols_second'][k] = sobols_second_dict results['sobols_total'][k] = sobols_total_dict # Correlation matrix results['correlation_matrices'][k] = cp.Corr( fit, self.sampler.distribution) # Output distributions results['output_distributions'][k] = cp.QoI_Dist( fit, self.sampler.distribution) return results
def _unconditional_samples( n_draws, n_params, dist_type, loc, scale, sampling_scheme, seed=0, skip=0, ): """Generate two independent groups of sample points. Parameters ---------- n_draws : int Number of Monte Carlo draws. n_params : int Number of parameters of objective function. dist_type : str The distribution type of input. Options are "Normal", "Exponential" and "Uniform". loc : float or np.ndarray The location(`loc`) keyword passed to `scipy.stats.norm`_ function to shift the location of "standardized" distribution. scale : float or np.ndarray The `scale` keyword passed to `scipy.stats.norm`_ function to adjust the scale of "standardized" distribution. sampling_scheme : str, optional One of ["sobol", "random"] seed : int, optional Random number generator seed. Default is 0. skip : int, optional Number of values to skip of Sobol sequence. Default is `0`. Returns ------- x, x_prime : np.ndarray Two arrays of shape (n_draws, n_params) with i.i.d draws from a joint distribution. """ # Generate uniform distributed samples np.random.seed(seed) if sampling_scheme == "sobol": u = cp.generate_samples( order=n_draws + skip, domain=n_params, rule="S", ).T elif sampling_scheme == "random": u = np.random.uniform(size=(n_draws, n_params)) else: raise ValueError("Argument 'sampling_scheme' is not in {'sobol', 'random'}.") skip = skip if sampling_scheme == "sobol" else 0 u = cp.generate_samples(order=n_draws, domain=2 * n_params, rule="S").T u_1 = u[skip:, :n_params] u_2 = u[skip:, n_params:] # Transform uniform draws into a joint PDF if dist_type == "Normal": z = norm.ppf(u_1) z_prime = norm.ppf(u_2) cholesky = np.linalg.cholesky(scale) x = loc + cholesky.dot(z.T).T x_prime = loc + cholesky.dot(z_prime.T).T elif dist_type == "Exponential": x = expon.ppf(u_1, loc, scale) x_prime = expon.ppf(u_2, loc, scale) elif dist_type == "Uniform": x = uniform.ppf(u_1, loc, scale) x_prime = uniform.ppf(u_2, loc, scale) else: raise NotImplementedError return x, x_prime