def plot_discrete_distribution_surface_2d(rv1, rv2, ax=None): """ Only works if rv1 and rv2 are defined on consecutive integers """ from matplotlib import cm from pyapprox.utilities import cartesian_product, outer_product from pyapprox.variables import get_probability_masses if ax is None: fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111, projection='3d') x_1d = [get_probability_masses(rv)[0] for rv in [rv1, rv2]] w_1d = [get_probability_masses(rv)[1] for rv in [rv1, rv2]] samples = cartesian_product(x_1d) weights = outer_product(w_1d) dz = weights cmap = cm.get_cmap('jet') # Get desired colormap - you can change this! max_height = np.max(dz) # get range of colorbars so we can normalize min_height = np.min(dz) # scale each z to [0,1], and get their rgb values rgba = [cmap((k-min_height)/max_height) for k in dz] # Only works if rv1 and rv2 are defined on consecutive integers dx, dy = 1, 1 ax.bar3d(samples[0, :], samples[1, :], 0, dx, dy, dz, color=rgba, zsort='average') angle = 45 ax.view_init(10, angle) ax.set_axis_off()
def test_get_univariate_leja_rule_bounded_discrete(self): from scipy import stats growth_rule = partial(constant_increment_growth_rule, 2) level = 3 nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var_cheb = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() for variable in [ var_cheb, stats.binom(20, 0.5), stats.hypergeom(10 + 10, 10, 9) ]: quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule) # polys of binom, hypergeometric have no canonical domain [-1,1] x, w = quad_rule(level) from pyapprox.variables import get_probability_masses xk, pk = get_probability_masses(variable) true_moment = (xk**(x.shape[0] - 1)).dot(pk) moment = (x**(x.shape[0] - 1)).dot(w[-1]) assert np.allclose(moment, true_moment)
def test_get_univariate_leja_rule_bounded_discrete(self): growth_rule = partial(constant_increment_growth_rule, 2) level = 3 nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var_cheb = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() for variable in [ var_cheb, stats.binom(17, 0.5), stats.hypergeom(10 + 10, 10, 9) ]: quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule) x, w = quad_rule(level) loc, scale = transform_scale_parameters(variable) x = x * scale + loc xk, pk = get_probability_masses(variable) print(x, xk, loc, scale) degree = (x.shape[0] - 1) true_moment = (xk**degree).dot(pk) moment = (x**degree).dot(w[-1]) print(moment, true_moment, variable.dist.name) assert np.allclose(moment, true_moment)
def plot_discrete_distribution_heatmap_2d(rv1, rv2, ax=None, zero_tol=1e-4): """ Only works if rv1 and rv2 are defined on consecutive integers """ import copy from pyapprox.utilities import outer_product from pyapprox.variables import get_probability_masses if ax is None: fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111) x_1d = [get_probability_masses(rv)[0] for rv in [rv1, rv2]] w_1d = [get_probability_masses(rv)[1] for rv in [rv1, rv2]] weights = outer_product(w_1d) Z = np.reshape(weights, (len(x_1d[0]), len(x_1d[1])), order='F') Z[Z < zero_tol] = np.inf cmap = copy.copy(plt.cm.viridis) cmap.set_bad('gray', 1) xx = np.hstack((x_1d[0], x_1d[0].max()+1))-0.5 yy = np.hstack((x_1d[1], x_1d[1].max()+1))-0.5 p = ax.pcolormesh(xx, yy, Z.T, cmap=cmap) plt.colorbar(p, ax=ax)
def univariate_pdf(var, x): if hasattr(var.dist, 'pdf'): return var.pdf(x) else: return var.pmf(x) xk, pk = get_probability_masses(var) x = np.atleast_1d(x) vals = np.zeros(x.shape[0]) for jj in range(x.shape[0]): for ii in range(xk.shape[0]): if xk[ii] == x[jj]: vals[jj] = pk[ii] break return vals
def inverse_transform_sampling_1d(var, ab, ii, u_samples): name = var.dist.name if is_bounded_discrete_variable(var): xk, pk = get_probability_masses(var) if type(var.dist) == float_rv_discrete and name != 'discrete_chebyshev': lb, ub = xk.min(), xk.max() xk = (xk-lb)/(ub-lb)*2-1 return float_rv_discrete_inverse_transform_sampling_1d( xk, pk, ab, ii, u_samples) elif name in _continuous_distns._distn_names: return continuous_induced_measure_ppf(var, ab, ii, u_samples) else: msg = 'induced sampling not yet implemented for var type %s' % name raise Exception(msg) return samples
def get_discrete_univariate_leja_quadrature_rule( variable, growth_rule, initial_points=None, orthonormality_tol=1e-12, return_weights_for_all_levels=True, recursion_opts=None): from pyapprox.variables import get_probability_masses, \ is_bounded_discrete_variable var_name = get_distribution_info(variable)[0] if is_bounded_discrete_variable(variable): xk, pk = get_probability_masses(variable) loc, scale = transform_scale_parameters(variable) xk = (xk - loc) / scale if initial_points is None: initial_points = (np.atleast_2d([variable.ppf(0.5)]) - loc) / scale # initial samples must be in canonical space assert np.all((initial_points >= -1) & (initial_points <= 1)) assert np.all((xk >= -1) & (xk <= 1)) def generate_candidate_samples(num_samples): return xk[None, :] if recursion_opts is None: recursion_opts = {"orthonormality_tol": orthonormality_tol} ab = get_recursion_coefficients_from_variable(variable, xk.shape[0], recursion_opts) quad_rule = partial( candidate_based_christoffel_leja_rule_1d, ab, generate_candidate_samples, xk.shape[0], growth_rule=growth_rule, initial_points=initial_points, return_weights_for_all_levels=return_weights_for_all_levels) return quad_rule raise ValueError('var_name %s not implemented' % var_name)
def get_discrete_univariate_leja_quadrature_rule(variable, growth_rule, initial_points=None, numerically_generated_poly_accuracy_tolerance=1e-12): from pyapprox.variables import get_probability_masses, \ is_bounded_discrete_variable var_name, scales, shapes = get_distribution_info(variable) if is_bounded_discrete_variable(variable): if initial_points is None: initial_points = np.atleast_2d([variable.ppf(0.5)]) xk, pk = get_probability_masses(variable) def generate_candidate_samples(num_samples): return xk[None, :] opts = {'rv_type': var_name, 'shapes': shapes} recursion_coeffs = get_recursion_coefficients( opts, xk.shape[0], numerically_generated_poly_accuracy_tolerance=numerically_generated_poly_accuracy_tolerance) quad_rule = partial( candidate_based_christoffel_leja_rule_1d, recursion_coeffs, generate_candidate_samples, xk.shape[0], growth_rule=growth_rule, initial_points=initial_points) else: raise Exception('var_name %s not implemented' % var_name) return quad_rule
def test_get_recursion_coefficients_from_variable_discrete(self): degree = 4 N = 10 scipy_discrete_var_names = [ n for n in stats._discrete_distns._distn_names ] discrete_var_names = [ "binom", "bernoulli", "nbinom", "geom", "hypergeom", "logser", "poisson", "planck", "boltzmann", "randint", "zipf", "dlaplace", "skellam", "yulesimon" ] # valid shape parameters for each distribution in names # there is a one to one correspondence between entries discrete_var_shapes = [{ "n": 10, "p": 0.5 }, { "p": 0.5 }, { "n": 10, "p": 0.5 }, { "p": 0.5 }, { "M": 20, "n": 7, "N": 12 }, { "p": 0.5 }, { "mu": 1 }, { "lambda_": 1 }, { "lambda_": 2, "N": 10 }, { "low": 0, "high": 10 }, { "a": 2 }, { "a": 1 }, { "mu1": 1, "mu2": 3 }, { "alpha": 1 }] for name in scipy_discrete_var_names: assert name in discrete_var_names # do not support : # yulesimon as there is a bug when interval is called # from a frozen variable # bernoulli which only has two masses # zipf unusual distribution and difficult to compute basis # crystallball is discontinuous and requires special integrator # this can be developed if needed unsupported_discrete_var_names = ["bernoulli", "yulesimon", "zipf"] for name in unsupported_discrete_var_names: ii = discrete_var_names.index(name) del discrete_var_names[ii] del discrete_var_shapes[ii] for name, shapes in zip(discrete_var_names, discrete_var_shapes): # print(name) var = getattr(stats, name)(**shapes) xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 3e-14, "truncated_probability_tol": 1e-15, "numeric": False }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8) # custom discrete variables xk1, pk1 = np.arange(N), np.ones(N) / N xk2, pk2 = np.arange(N)**2, np.ones(N) / N custom_vars = [ float_rv_discrete(name="discrete_chebyshev", values=(xk1, pk1))(), float_rv_discrete(name="float_rv_discrete", values=(xk2, pk2))() ] for var in custom_vars: xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 1e-14, "truncated_probability_tol": 1e-15 }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)
def get_recursion_coefficients_from_variable(var, num_coefs, opts): """ Generate polynomial recursion coefficients by inspecting a random variable. """ var_name, _, shapes = get_distribution_info(var) if var_name == "continuous_monomial": return None loc, scale = transform_scale_parameters(var) if var_name == "rv_function_indpndt_vars": shapes["loc"] = loc shapes["scale"] = scale return get_function_independent_vars_recursion_coefficients( shapes, num_coefs) if var_name == "rv_product_indpndt_vars": shapes["loc"] = loc shapes["scale"] = scale return get_product_independent_vars_recursion_coefficients( shapes, num_coefs) if (var_name in askey_variable_names and opts.get("numeric", False) is False): return get_askey_recursion_coefficients_from_variable(var, num_coefs) orthonormality_tol = opts.get("orthonormality_tol", 1e-8) truncated_probability_tol = opts.get("truncated_probability_tol", 0) if (not is_continuous_variable(var)): if hasattr(shapes, "xk"): xk, pk = shapes["xk"], shapes["pk"] else: xk, pk = get_probability_masses(var, truncated_probability_tol) xk = (xk - loc) / scale return get_numerically_generated_recursion_coefficients_from_samples( xk, pk, num_coefs, orthonormality_tol, truncated_probability_tol) # integration performed in canonical domain so need to map back to # domain of pdf lb, ub = var.interval(1) # Get version var.pdf without error checking which runs much faster pdf = get_pdf(var) def canonical_pdf(x): # print(x, lb, ub, x*scale+loc) # print(var.pdf(x*scale+loc)*scale) # assert np.all(x*scale+loc >= lb) and np.all(x*scale+loc <= ub) return pdf(x * scale + loc) * scale # return var.pdf(x*scale+loc)*scale if (is_bounded_continuous_variable(var) or is_bounded_discrete_variable(var)): can_lb, can_ub = -1, 1 elif is_continuous_variable(var): can_lb = (lb - loc) / scale can_ub = (ub - loc) / scale return predictor_corrector_known_pdf(num_coefs, can_lb, can_ub, canonical_pdf, opts)
def help_discrete_induced_sampling(self, var1, var2, envelope_factor): degree = 3 var_trans = AffineRandomVariableTransformation([var1, var2]) pce_opts = define_poly_options_from_variable_transformation(var_trans) pce = PolynomialChaosExpansion() pce.configure(pce_opts) indices = compute_hyperbolic_indices(pce.num_vars(), degree, 1.0) pce.set_indices(indices) num_samples = int(3e4) np.random.seed(1) canonical_samples = generate_induced_samples(pce, num_samples) samples = var_trans.map_from_canonical_space(canonical_samples) np.random.seed(1) # canonical_xk = [2*get_distribution_info(var1)[2]['xk']-1, # 2*get_distribution_info(var2)[2]['xk']-1] xk = np.array( [get_probability_masses(var)[0] for var in var_trans.variable.all_variables()]) pk = np.array( [get_probability_masses(var)[1] for var in var_trans.variable.all_variables()]) canonical_xk = var_trans.map_to_canonical_space(xk) basis_matrix_generator = partial( basis_matrix_generator_1d, pce, degree) canonical_samples1 = discrete_induced_sampling( basis_matrix_generator, pce.indices, canonical_xk, pk, num_samples) samples1 = var_trans.map_from_canonical_space(canonical_samples1) def univariate_pdf(var, x): if hasattr(var.dist, 'pdf'): return var.pdf(x) else: return var.pmf(x) xk, pk = get_probability_masses(var) x = np.atleast_1d(x) vals = np.zeros(x.shape[0]) for jj in range(x.shape[0]): for ii in range(xk.shape[0]): if xk[ii] == x[jj]: vals[jj] = pk[ii] break return vals def density(x): # some issue with native scipy.pmf # assert np.allclose(var1.pdf(x[0, :]),var1.pmf(x[0, :])) return univariate_pdf(var1, x[0, :])*univariate_pdf(var2, x[1, :]) def generate_proposal_samples(n): samples = np.vstack([var1.rvs(n), var2.rvs(n)]) return samples proposal_density = density # unlike fekete and leja sampling can and should use # pce.basis_matrix here. If use canonical_basis_matrix then # densities must be mapped to this space also which can be difficult samples2 = random_induced_measure_sampling( num_samples, pce.num_vars(), pce.basis_matrix, density, proposal_density, generate_proposal_samples, envelope_factor) def induced_density(x): vals = density(x)*christoffel_function( x, pce.basis_matrix, True) return vals from pyapprox.utilities import cartesian_product, outer_product from pyapprox.polynomial_sampling import christoffel_function quad_samples = cartesian_product([xk[0], xk[1]]) quad_weights = outer_product([pk[0], pk[1]]) # print(canonical_samples.min(axis=1),canonical_samples.max(axis=1)) # print(samples.min(axis=1),samples.max(axis=1)) # print(canonical_samples1.min(axis=1),canonical_samples1.max(axis=1)) # print(samples1.min(axis=1),samples1.max(axis=1)) # import matplotlib.pyplot as plt # plt.plot(quad_samples[0,:],quad_samples[1,:],'s') # plt.plot(samples[0,:],samples[1,:],'o') # plt.plot(samples1[0,:],samples1[1,:],'*') # plt.show() rtol = 1e-2 assert np.allclose(quad_weights, density(quad_samples)) assert np.allclose(density(quad_samples).sum(), 1) assert np.allclose( christoffel_function(quad_samples, pce.basis_matrix, True).dot( quad_weights), 1.0) true_induced_mean = quad_samples.dot(induced_density(quad_samples)) # print(true_induced_mean) # print(samples.mean(axis=1)) # print(samples1.mean(axis=1)) # print(samples2.mean(axis=1)) # print(samples1.mean(axis=1)-true_induced_mean, # true_induced_mean*rtol) # print(samples2.mean(axis=1)) assert np.allclose(samples.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples1.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples2.mean(axis=1), true_induced_mean, rtol=rtol)