def analytic_sobol_indices_from_gaussian_process( gp, variable, interaction_terms, ngp_realizations=1, stat_functions=(np.mean, np.median, np.min, np.max), ninterpolation_samples=500, nvalidation_samples=100, ncandidate_samples=1000, nquad_samples=50, use_cholesky=True, alpha=0): x_train, y_train, K_inv, lscale, kernel_var, transform_quad_rules = \ extract_gaussian_process_attributes_for_integration(gp) if ngp_realizations > 0: gp_realizations = generate_gp_realizations( gp, ngp_realizations, ninterpolation_samples, nvalidation_samples, ncandidate_samples, variable, use_cholesky, alpha) # Check how accurate realizations validation_samples = generate_independent_random_samples( variable, 1000) mean_vals, std = gp(validation_samples, return_std=True) realization_vals = gp_realizations(validation_samples) print(mean_vals[:, 0].mean()) # print(std,realization_vals.std(axis=1)) print('std of realizations error', np.linalg.norm(std-realization_vals.std(axis=1))/np.linalg.norm( std)) print('var of realizations error', np.linalg.norm(std**2-realization_vals.var(axis=1)) / np.linalg.norm(std**2)) print('mean interpolation error', np.linalg.norm((mean_vals[:, 0]-realization_vals[:, -1])) / np.linalg.norm(mean_vals[:, 0])) x_train = gp_realizations.selected_canonical_samples # gp_realizations.train_vals is normalized so unnormalize y_train = gp._y_train_std*gp_realizations.train_vals # kernel_var has already been adjusted by call to # extract_gaussian_process_attributes_for_integration K_inv = np.linalg.inv(gp_realizations.L.dot(gp_realizations.L.T)) K_inv /= gp._y_train_std**2 sobol_values, total_values, means, variances = \ _compute_expected_sobol_indices( gp, variable, interaction_terms, nquad_samples, x_train, y_train, K_inv, lscale, kernel_var, transform_quad_rules, gp._y_train_mean) sobol_values = sobol_values.T total_values = total_values.T result = dict() data = [sobol_values, total_values, variances, means] data_names = ['sobol_indices', 'total_effects', 'variance', 'mean'] for item, name in zip(data, data_names): subdict = dict() for ii, sfun in enumerate(stat_functions): subdict[sfun.__name__] = sfun(item, axis=(0)) subdict['values'] = item result[name] = subdict return result
def sampling_based_sobol_indices_from_gaussian_process( gp, variables, interaction_terms, nsamples, sampling_method='sobol', ngp_realizations=1, normalize=True, nsobol_realizations=1, stat_functions=(np.mean, np.median, np.min, np.max), ninterpolation_samples=500, nvalidation_samples=100, ncandidate_samples=1000, use_cholesky=True, alpha=0): """ Compute sobol indices from Gaussian process using sampling. This function returns the mean and variance of these values with respect to the variability in the GP (i.e. its function error) Following Kennedy and O'hagan we evaluate random realizations of each GP at a discrete set of points. To predict at larger sample sizes we interpolate these points and use the resulting approximation to make any subsequent predictions. This introduces an error but the error can be made arbitrarily small by setting ninterpolation_samples large enough. The geometry of the interpolation samples can effect accuracy of the interpolants. Consequently we use Pivoted cholesky algorithm in Harbrecht et al for choosing the interpolation samples. Parameters ---------- ngp_realizations : integer The number of random realizations of the Gaussian process if ngp_realizations == 0 then the sensitivity indices will only be computed using the mean of the GP. nsobol_realizations : integer The number of random realizations of the random samples used to compute the sobol indices. This number should be similar to ngp_realizations, as mean and stdev are taken over both these random values. stat_functions : list List of callable functions with signature fun(np.ndarray) E.g. np.mean. If fun has arguments then we must wrap then with partial and set a meaniningful __name__, e.g. fun = partial(np.quantile, q=0.5) fun.__name__ == 'quantile-0.25'. Note: np.min and np.min names are amin, amax ninterpolation_samples : integer The number of samples used to interpolate the discrete random realizations of a Gaussian Process nvalidation_samples : integer The number of samples used to assess the accuracy of the interpolants of the random realizations ncandidate_samples : integer The number of candidate samples selected from when building the interpolants of the random realizations Returns ------- result : dictionary Result containing the numpy functions in stat_funtions applied to the mean, variance, sobol_indices and total_effects of the Gaussian process. To access the data associated with a fun in stat_function use the key fun.__name__, For example if the stat_function is np.mean the mean sobol indices are accessed via result['sobol_indices']['mean']. The raw values of each iteration are stored in result['sobol_indices]['values'] """ assert nsobol_realizations > 0 if ngp_realizations > 0: assert ncandidate_samples > ninterpolation_samples gp_realizations = generate_gp_realizations( gp, ngp_realizations, ninterpolation_samples, nvalidation_samples, ncandidate_samples, variables, use_cholesky, alpha) fun = gp_realizations else: fun = gp sobol_values, total_values, variances, means = \ repeat_sampling_based_sobol_indices( fun, variables, interaction_terms, nsamples, sampling_method, nsobol_realizations) result = dict() data = [sobol_values, total_values, variances, means] data_names = ['sobol_indices', 'total_effects', 'variance', 'mean'] for item, name in zip(data, data_names): subdict = dict() for ii, sfun in enumerate(stat_functions): # have to deal with averaging over axis = (0, 1) and axis = (0, 2) # for mean, variance and sobol_indices, total_effects respectively subdict[sfun.__name__] = sfun(item, axis=(0, -1)) subdict['values'] = item result[name] = subdict return result