def test_values( distr: PiecewiseLinear, target: List[float], expected_target_cdf: List[float], expected_target_crps: List[float], ): target = mx.nd.array(target).reshape(shape=(len(target),)) expected_target_cdf = np.array(expected_target_cdf).reshape( (len(expected_target_cdf),) ) expected_target_crps = np.array(expected_target_crps).reshape( (len(expected_target_crps),) ) assert all(np.isclose(distr.cdf(target).asnumpy(), expected_target_cdf)) assert all(np.isclose(distr.crps(target).asnumpy(), expected_target_crps)) # compare with empirical cdf from samples num_samples = 100_000 samples = distr.sample(num_samples).asnumpy() assert np.isfinite(samples).all() emp_cdf, edges = empirical_cdf(samples) calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy() assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
def test_sampling(distr_class, params, serialize_fn) -> None: distr = distr_class(**params) distr = serialize_fn(distr) samples = distr.sample() assert samples.shape == (2, ) num_samples = 1_000_000 samples = distr.sample(num_samples) assert samples.shape == (num_samples, 2) np_samples = samples.asnumpy() # avoid accuracy issues with float32 when calculating std # see https://github.com/numpy/numpy/issues/8869 np_samples = np_samples.astype(np.float64) assert np.isfinite(np_samples).all() assert np.allclose(np_samples.mean(axis=0), distr.mean.asnumpy(), atol=1e-2, rtol=1e-2) emp_std = np_samples.std(axis=0) assert np.allclose(emp_std, distr.stddev.asnumpy(), atol=1e-1, rtol=1e-1) if distr_class in DISTRIBUTIONS_WITH_CDF: emp_cdf, edges = empirical_cdf(np_samples) calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy() assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2) if distr_class in DISTRIBUTIONS_WITH_QUANTILE_FUNCTION: levels = np.linspace(1.0e-3, 1.0 - 1.0e-3, 100) emp_qfunc = np.percentile(np_samples, levels * 100, axis=0) calc_qfunc = distr.quantile(mx.nd.array(levels)).asnumpy() assert np.allclose(calc_qfunc, emp_qfunc, rtol=1e-1)
def test_mixture( distr1: Distribution, distr2: Distribution, p: Tensor, serialize_fn ) -> None: # sample from component distributions, and select samples samples1 = distr1.sample(num_samples=NUM_SAMPLES_LARGE) samples2 = distr2.sample(num_samples=NUM_SAMPLES_LARGE) # TODO: for multivariate case, test should not sample elements from different components in the event_dim dimension rand = mx.nd.random.uniform(shape=(NUM_SAMPLES_LARGE, *p.shape)) choice = (rand < p.expand_dims(axis=0)).broadcast_like(samples1) samples_ref = mx.nd.where(choice, samples1, samples2) # construct mixture distribution and sample from it mixture_probs = mx.nd.stack(p, 1.0 - p, axis=-1) mixture = MixtureDistribution( mixture_probs=mixture_probs, components=[distr1, distr2] ) mixture = serialize_fn(mixture) samples_mix = mixture.sample(num_samples=NUM_SAMPLES_LARGE) # check that shapes are right assert ( samples1.shape == samples2.shape == samples_mix.shape == samples_ref.shape ) # check mean and stddev calc_mean = mixture.mean.asnumpy() calc_std = mixture.stddev.asnumpy() sample_mean = samples_mix.asnumpy().mean(axis=0) sample_std = samples_mix.asnumpy().std(axis=0) assert np.allclose(calc_mean, sample_mean, atol=1e-1) assert np.allclose(calc_std, sample_std, atol=2e-1) # check that histograms are close assert ( diff( histogram(samples_mix.asnumpy()), histogram(samples_ref.asnumpy()) ) < 0.05 ) # can only calculated cdf for gaussians currently if isinstance(distr1, Gaussian) and isinstance(distr2, Gaussian): emp_cdf, edges = empirical_cdf(samples_mix.asnumpy()) calc_cdf = mixture.cdf(mx.nd.array(edges)).asnumpy() assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
def test_sampling(distr_class, params) -> None: distr = distr_class(**params) samples = distr.sample() assert samples.shape == (2,) num_samples = 100_000 samples = distr.sample(num_samples) assert samples.shape == (num_samples, 2) np_samples = samples.asnumpy() assert np.isfinite(np_samples).all() assert np.allclose( np_samples.mean(axis=0), distr.mean.asnumpy(), atol=1e-2, rtol=1e-2 ) emp_std = np_samples.std(axis=0) assert np.allclose(emp_std, distr.stddev.asnumpy(), atol=1e-1, rtol=5e-2) if distr_class in DISTRIBUTIONS_WITH_CDF: emp_cdf, edges = empirical_cdf(np_samples) calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy() assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)