def test_discrete_induced_sampling(self): nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 512.0, num=nmasses1) #mass_locations1 = np.arange(0,nmasses1) masses1 = np.ones(nmasses1, dtype=float) / nmasses1 var1 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations1, masses1))() nmasses2 = 10 mass_locations2 = np.arange(0, nmasses2) # if increase from 16 unmodififed becomes ill conditioned masses2 = np.geomspace(1.0, 16.0, num=nmasses2) #masses2 = np.ones(nmasses2,dtype=float)/nmasses2 masses2 /= masses2.sum() var2 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations2, masses2))() self.help_discrete_induced_sampling(var1, var2, 30) num_type1, num_type2, num_trials = [10, 10, 9] var1 = stats.hypergeom(num_type1 + num_type2, num_type1, num_trials) var2 = var1 self.help_discrete_induced_sampling(var1, var2, 300) num_type1, num_type2, num_trials = [10, 10, 9] var1 = stats.binom(10, 0.5) var2 = var1 self.help_discrete_induced_sampling(var1, var2, 300) N = 10 xk, pk = np.arange(N), np.ones(N) / N var1 = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() var2 = var1 self.help_discrete_induced_sampling(var1, var2, 30)
def test_variables_equivalent(self): nmasses = 10 xk = np.array(range(nmasses), dtype='float') pk = np.ones(nmasses)/nmasses xk2 = np.array(range(nmasses), dtype='float') # pk2 = np.ones(nmasses)/(nmasses) pk2 = np.geomspace(1.0, 512.0, num=nmasses) pk2 /= pk2.sum() var1 = float_rv_discrete( name='float_rv_discrete', values=(xk, pk))() var2 = float_rv_discrete( name='float_rv_discrete', values=(xk2, pk2))() assert variables_equivalent(var1, var2) == False
def setUp(self): uniform_var1 = {'var_type': 'uniform', 'range': [-1, 1]} uniform_var2 = {'var_type': 'uniform', 'range': [0, 1]} beta_var1 = { 'var_type': 'beta', 'range': [-1, 1], 'alpha_stat': 1, 'beta_stat': 1 } beta_var2 = { 'var_type': 'beta', 'range': [-2, 1], 'alpha_stat': 2, 'beta_stat': 1 } gaussian_var = {'var_type': 'gaussian', 'mean': -1., 'variance': 4.} #self.continuous_variables = [ # uniform_var1,beta_var1,gaussian_var,uniform_var2,uniform_var1, # beta_var2] self.continuous_variables = [ uniform(-1, 2), beta(1, 1, -1, 2), norm(-1, 2), uniform(), uniform(-1, 2), beta(2, 1, -2, 3) ] self.continuous_mean = np.array( [0., 0., -1, 0.5, 0., beta.mean(a=2, b=1, loc=-2, scale=3)]) nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 32.0, num=nmasses1) masses1 = np.ones(nmasses1, dtype=float) / nmasses1 nmasses2 = 10 mass_locations2 = np.arange(0, nmasses2) masses2 = np.geomspace(1.0, 32.0, num=nmasses2) masses2 /= masses2.sum() # second () is to freeze variable which creates var.dist member # variable var1 = float_rv_discrete(name='var1', values=(mass_locations1, masses1))() var2 = float_rv_discrete(name='var2', values=(mass_locations2, masses2))() self.discrete_variables = [var1, var2] self.discrete_mean = np.empty(len(self.discrete_variables)) for ii, var in enumerate(self.discrete_variables): self.discrete_mean[ii] = var.moment(1)
def test_get_univariate_leja_rule_float_rv_discrete(self): nmasses = 20 xk = np.array(range(1, nmasses + 1), dtype='float') pk = np.ones(nmasses) / nmasses variable = float_rv_discrete(name='float_rv_discrete', values=(xk, pk))() growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule(variable, growth_rule) level = 3 scales, shapes = get_distribution_info(variable)[1:] print(scales) x, w = quad_rule(level) # x in [-1,1], scales for x in [0,1] loc, scale = scales['loc'], scales['scale'] scale /= 2 loc = loc + scale x = x * scale + loc true_moment = (xk**(x.shape[0] - 1)).dot(pk) moment = (x**(x.shape[0] - 1)).dot(w[-1]) #print(moment) #print(true_moment) assert np.allclose(moment, true_moment)
def test_get_univariate_leja_rule_bounded_discrete(self): from scipy import stats growth_rule = partial(constant_increment_growth_rule, 2) level = 3 nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var_cheb = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() for variable in [ var_cheb, stats.binom(20, 0.5), stats.hypergeom(10 + 10, 10, 9) ]: quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule) # polys of binom, hypergeometric have no canonical domain [-1,1] x, w = quad_rule(level) from pyapprox.variables import get_probability_masses xk, pk = get_probability_masses(variable) true_moment = (xk**(x.shape[0] - 1)).dot(pk) moment = (x**(x.shape[0] - 1)).dot(w[-1]) assert np.allclose(moment, true_moment)
def test_float_discrete_variable(self): nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 32.0, num=nmasses1) masses1 = np.ones(nmasses1, dtype=float)/nmasses1 var1 = float_rv_discrete( name='var1', values=(mass_locations1, masses1))() for power in [1, 2, 3]: assert np.allclose( var1.moment(power), (mass_locations1**power).dot(masses1)) np.random.seed(1) num_samples = int(1e6) samples = var1.rvs(size=(1, num_samples)) assert np.allclose(samples.mean(), var1.moment(1), atol=1e-2) # import matplotlib.pyplot as plt # xx = np.linspace(0,33,301) # plt.plot(mass_locations1,np.cumsum(masses1),'rss') # plt.plot(xx,var1.cdf(xx),'-'); plt.show() assert np.allclose(np.cumsum(masses1), var1.cdf(mass_locations1)) # import matplotlib.pyplot as plt # yy = np.linspace(0,1,51) # plt.plot(mass_locations1,np.cumsum(masses1),'rs') # plt.plot(var1.ppf(yy),yy,'-o',ms=2); plt.show() xx = mass_locations1 assert np.allclose(xx, var1.ppf(var1.cdf(xx))) xx = mass_locations1 assert np.allclose(xx, var1.ppf(var1.cdf(xx+1e-1)))
def test_get_univariate_leja_rule_bounded_discrete(self): growth_rule = partial(constant_increment_growth_rule, 2) level = 3 nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var_cheb = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() for variable in [ var_cheb, stats.binom(17, 0.5), stats.hypergeom(10 + 10, 10, 9) ]: quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule) x, w = quad_rule(level) loc, scale = transform_scale_parameters(variable) x = x * scale + loc xk, pk = get_probability_masses(variable) print(x, xk, loc, scale) degree = (x.shape[0] - 1) true_moment = (xk**degree).dot(pk) moment = (x**degree).dot(w[-1]) print(moment, true_moment, variable.dist.name) assert np.allclose(moment, true_moment)
def test_get_univariate_leja_rule_float_rv_discrete(self): nmasses = 20 xk = np.array(range(1, nmasses + 1), dtype='float') pk = np.ones(nmasses) / nmasses variable = float_rv_discrete(name='float_rv_discrete', values=(xk, pk))() growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule, orthonormality_tol=1e-10, return_weights_for_all_levels=False) level = 3 x, w = quad_rule(level) loc, scale = transform_scale_parameters(variable) x = x * scale + loc degree = x.shape[0] - 1 true_moment = (xk**degree).dot(pk) moment = (x**degree).dot(w) # print(moment, true_moment) assert np.allclose(moment, true_moment)
def preconditioned_barycentric_weights(): nmasses = 20 xk = np.array(range(nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var1 = float_rv_discrete(name='float_rv_discrete', values=(xk, pk))() univariate_variables = [var1] variable = IndependentMultivariateRandomVariable(univariate_variables) var_trans = AffineRandomVariableTransformation(variable) growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule(var1, growth_rule) samples = quad_rule(3)[0] num_samples = samples.shape[0] poly = PolynomialChaosExpansion() poly_opts = define_poly_options_from_variable_transformation(var_trans) poly_opts['numerically_generated_poly_accuracy_tolerance'] = 1e-5 poly.configure(poly_opts) poly.set_indices(np.arange(num_samples)) # precond_weights = np.sqrt( # (poly.basis_matrix(samples[np.newaxis,:])**2).mean(axis=1)) precond_weights = np.ones(num_samples) bary_weights = compute_barycentric_weights_1d( samples, interval_length=samples.max() - samples.min()) def barysum(x, y, w, f): x = x[:, np.newaxis] y = y[np.newaxis, :] temp = w * f / (x - y) return np.sum(temp, axis=1) def function(x): return np.cos(2 * np.pi * x) y = samples print(samples) w = precond_weights * bary_weights # x = np.linspace(-3,3,301) x = np.linspace(-1, 1, 301) f = function(y) / precond_weights # cannot interpolate on data II = [] for ii, xx in enumerate(x): if xx in samples: II.append(ii) x = np.delete(x, II) r1 = barysum(x, y, w, f) r2 = barysum(x, y, w, 1 / precond_weights) interp_vals = r1 / r2 # import matplotlib.pyplot as plt # plt.plot(x, interp_vals, 'k') # plt.plot(samples, function(samples), 'ro') # plt.plot(x, function(x), 'r--') # plt.plot(samples,function(samples),'ro') # print(num_samples) # print(precond_weights) print(np.linalg.norm(interp_vals - function(x)))
def test_discrete_chebyshev(self): N, degree = 100, 5 xk, pk = np.arange(N), np.ones(N) / N rv = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk)) ab = discrete_chebyshev_recurrence(degree + 1, N) p = evaluate_orthonormal_polynomial_1d(xk, degree, ab) w = rv.pmf(xk) assert np.allclose(np.dot(p.T * w, p), np.eye(degree + 1))
def test_continuous_rv_sample(self): N, degree = int(1e6), 5 xk, pk = np.random.normal(0, 1, N), np.ones(N) / N rv = float_rv_discrete(name='continuous_rv_sample', values=(xk, pk)) ab = modified_chebyshev_orthonormal(degree + 1, [xk, pk]) hermite_ab = hermite_recurrence(degree + 1, 0, True) x, w = gauss_quadrature(hermite_ab, degree + 1) p = evaluate_orthonormal_polynomial_1d(x, degree, ab) gaussian_moments = np.zeros(degree + 1) gaussian_moments[0] = 1 assert np.allclose(p.T.dot(w), gaussian_moments, atol=1e-2) assert np.allclose(np.dot(p.T * w, p), np.eye(degree + 1), atol=7e-2)
def test_float_rv_discrete_chebyshev(self): N, degree = 10, 5 xk, pk = np.geomspace(1.0, 512.0, num=N), np.ones(N) / N rv = float_rv_discrete(name='float_rv_discrete', values=(xk, pk))() var_trans = AffineRandomVariableTransformation([rv]) poly = PolynomialChaosExpansion() poly_opts = define_poly_options_from_variable_transformation(var_trans) poly_opts['numerically_generated_poly_accuracy_tolerance'] = 1e-9 poly.configure(poly_opts) poly.set_indices(np.arange(degree + 1)[np.newaxis, :]) p = poly.basis_matrix(xk[np.newaxis, :]) w = pk assert np.allclose(np.dot(p.T * w, p), np.eye(degree + 1))
def test_discrete_chebyshev(self): N, degree = 10, 5 xk, pk = np.arange(N), np.ones(N) / N rv = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() var_trans = AffineRandomVariableTransformation([rv]) poly = PolynomialChaosExpansion() poly_opts = define_poly_options_from_variable_transformation(var_trans) poly.configure(poly_opts) poly.set_indices(np.arange(degree + 1)[np.newaxis, :]) p = poly.basis_matrix(xk[np.newaxis, :]) w = pk # print((np.dot(p.T*w,p),np.eye(degree+1))) assert np.allclose(np.dot(p.T * w, p), np.eye(degree + 1))
def test_rv_discrete_large_moments(self): """ When Modified_chebyshev_orthonormal is used when the moments of discrete variable are very large it will fail. To avoid this rescale the variables to [-1,1] like is done for continuous random variables """ N, degree = 100, 5 xk, pk = np.arange(N), np.ones(N) / N rv = float_rv_discrete(name='float_rv_discrete', values=(xk, pk)) xk_canonical = xk / (N - 1) * 2 - 1 ab = modified_chebyshev_orthonormal(degree + 1, [xk_canonical, pk]) p = evaluate_orthonormal_polynomial_1d(xk_canonical, degree, ab) w = rv.pmf(xk) assert np.allclose(np.dot(p.T * w, p), np.eye(degree + 1))
def test_float_rv_discrete_pdf(self): nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 32.0, num=nmasses1) masses1 = np.ones(nmasses1, dtype=float)/nmasses1 var1 = float_rv_discrete( name='var1', values=(mass_locations1, masses1))() xk = var1.dist.xk.copy() II = np.random.permutation(xk.shape[0])[:3] xk[II] = -1 pdf_vals = var1.pdf(xk) assert np.allclose(pdf_vals[II], np.zeros_like(II, dtype=float)) assert np.allclose( np.delete(pdf_vals, II), np.delete(var1.dist.pk, II))
def test_sampled_based_christoffel_leja_quadrature_rule(self): nsamples = int(1e6) samples = np.random.normal(0, 1, (1, nsamples)) variable = float_rv_discrete(name='continuous_rv_sample', values=(samples[0, :], np.ones(nsamples) / nsamples))() growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule, method='christoffel', numerically_generated_poly_accuracy_tolerance=1e-8) level = 5 quad_samples, weights = quad_rule(level) # print(quad_samples) # print((quad_samples**2).dot(weights[-1])) # print((samples**2).mean()) assert np.allclose((quad_samples**2).dot(weights[-1]), (samples**2).mean())
def test_get_univariate_leja_rule_discrete_chebyshev(self): nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses variable = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule(variable, growth_rule) level = 3 scales, shapes = get_distribution_info(variable)[1:] x, w = quad_rule(level) true_moment = (xk**(x.shape[0] - 1)).dot(pk) moment = (x**(x.shape[0] - 1)).dot(w[-1]) #print(moment) #print(true_moment) assert np.allclose(moment, true_moment)
def test_map_rv_discrete(self): nvars = 2 mass_locs = np.arange(5, 501, step=50) nmasses = mass_locs.shape[0] mass_probs = np.ones(nmasses, dtype=float) / float(nmasses) univariate_variables = [ float_rv_discrete(name='float_rv_discrete', values=(mass_locs, mass_probs))() ] * nvars variables = IndependentMultivariateRandomVariable(univariate_variables) var_trans = AffineRandomVariableTransformation(variables) samples = np.vstack( [mass_locs[np.newaxis, :], mass_locs[0] * np.ones((1, nmasses))]) canonical_samples = var_trans.map_to_canonical_space(samples) assert (canonical_samples[0].min() == -1) assert (canonical_samples[0].max() == 1) recovered_samples = var_trans.map_from_canonical_space( canonical_samples) assert np.allclose(recovered_samples, samples)
def test_get_recursion_coefficients_from_variable_discrete(self): degree = 4 N = 10 scipy_discrete_var_names = [ n for n in stats._discrete_distns._distn_names ] discrete_var_names = [ "binom", "bernoulli", "nbinom", "geom", "hypergeom", "logser", "poisson", "planck", "boltzmann", "randint", "zipf", "dlaplace", "skellam", "yulesimon" ] # valid shape parameters for each distribution in names # there is a one to one correspondence between entries discrete_var_shapes = [{ "n": 10, "p": 0.5 }, { "p": 0.5 }, { "n": 10, "p": 0.5 }, { "p": 0.5 }, { "M": 20, "n": 7, "N": 12 }, { "p": 0.5 }, { "mu": 1 }, { "lambda_": 1 }, { "lambda_": 2, "N": 10 }, { "low": 0, "high": 10 }, { "a": 2 }, { "a": 1 }, { "mu1": 1, "mu2": 3 }, { "alpha": 1 }] for name in scipy_discrete_var_names: assert name in discrete_var_names # do not support : # yulesimon as there is a bug when interval is called # from a frozen variable # bernoulli which only has two masses # zipf unusual distribution and difficult to compute basis # crystallball is discontinuous and requires special integrator # this can be developed if needed unsupported_discrete_var_names = ["bernoulli", "yulesimon", "zipf"] for name in unsupported_discrete_var_names: ii = discrete_var_names.index(name) del discrete_var_names[ii] del discrete_var_shapes[ii] for name, shapes in zip(discrete_var_names, discrete_var_shapes): # print(name) var = getattr(stats, name)(**shapes) xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 3e-14, "truncated_probability_tol": 1e-15, "numeric": False }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8) # custom discrete variables xk1, pk1 = np.arange(N), np.ones(N) / N xk2, pk2 = np.arange(N)**2, np.ones(N) / N custom_vars = [ float_rv_discrete(name="discrete_chebyshev", values=(xk1, pk1))(), float_rv_discrete(name="float_rv_discrete", values=(xk2, pk2))() ] for var in custom_vars: xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 1e-14, "truncated_probability_tol": 1e-15 }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)
def test_discrete_induced_sampling(self): degree = 3 nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 512.0, num=nmasses1) #mass_locations1 = np.arange(0,nmasses1) masses1 = np.ones(nmasses1, dtype=float) / nmasses1 var1 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations1, masses1))() nmasses2 = 10 mass_locations2 = np.arange(0, nmasses2) # if increase from 16 unmodififed becomes ill conditioned masses2 = np.geomspace(1.0, 16.0, num=nmasses2) #masses2 = np.ones(nmasses2,dtype=float)/nmasses2 masses2 /= masses2.sum() var2 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations2, masses2))() var_trans = AffineRandomVariableTransformation([var1, var2]) pce_opts = define_poly_options_from_variable_transformation(var_trans) pce = PolynomialChaosExpansion() pce.configure(pce_opts) indices = compute_hyperbolic_indices(pce.num_vars(), degree, 1.0) pce.set_indices(indices) num_samples = int(1e4) np.random.seed(1) canonical_samples = generate_induced_samples(pce, num_samples) samples = var_trans.map_from_canonical_space(canonical_samples) np.random.seed(1) canonical_xk = [ 2 * get_distribution_info(var1)[2]['xk'] - 1, 2 * get_distribution_info(var2)[2]['xk'] - 1 ] basis_matrix_generator = partial(basis_matrix_generator_1d, pce, degree) canonical_samples1 = discrete_induced_sampling( basis_matrix_generator, pce.indices, canonical_xk, [var1.dist.pk, var2.dist.pk], num_samples) samples1 = var_trans.map_from_canonical_space(canonical_samples1) def density(x): return var1.pdf(x[0, :]) * var2.pdf(x[1, :]) envelope_factor = 30 def generate_proposal_samples(n): samples = np.vstack([var1.rvs(n), var2.rvs(n)]) return samples proposal_density = density # unlike fekete and leja sampling can and should use # pce.basis_matrix here. If use canonical_basis_matrix then # densities must be mapped to this space also which can be difficult samples2 = random_induced_measure_sampling(num_samples, pce.num_vars(), pce.basis_matrix, density, proposal_density, generate_proposal_samples, envelope_factor) def induced_density(x): vals = density(x) * christoffel_function(x, pce.basis_matrix, True) return vals from pyapprox.utilities import cartesian_product, outer_product from pyapprox.polynomial_sampling import christoffel_function quad_samples = cartesian_product([var1.dist.xk, var2.dist.xk]) quad_weights = outer_product([var1.dist.pk, var2.dist.pk]) #print(canonical_samples.min(axis=1),canonical_samples.max(axis=1)) #print(samples.min(axis=1),samples.max(axis=1)) #print(canonical_samples1.min(axis=1),canonical_samples1.max(axis=1)) #print(samples1.min(axis=1),samples1.max(axis=1)) # import matplotlib.pyplot as plt # plt.plot(quad_samples[0,:],quad_samples[1,:],'s') # plt.plot(samples[0,:],samples[1,:],'o') # plt.plot(samples1[0,:],samples1[1,:],'*') # plt.show() rtol = 1e-2 assert np.allclose(quad_weights, density(quad_samples)) assert np.allclose(density(quad_samples).sum(), 1) assert np.allclose( christoffel_function(quad_samples, pce.basis_matrix, True).dot(quad_weights), 1.0) true_induced_mean = quad_samples.dot(induced_density(quad_samples)) print(true_induced_mean) print(samples.mean(axis=1)) print(samples1.mean(axis=1)) print(samples2.mean(axis=1)) print( samples1.mean(axis=1) - true_induced_mean, true_induced_mean * rtol) #print(samples2.mean(axis=1)) assert np.allclose(samples.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples1.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples2.mean(axis=1), true_induced_mean, rtol=rtol)