def testEmpiricalCdfAgainstDirichletMultinomial(self): # This test is too slow for Eager mode. if tf.executing_eagerly(): return seed_stream = test_util.test_seed_stream() n = 10 c1 = self.evaluate( 1. + 2. * tf.random.uniform(shape=[3], dtype=tf.float32, seed=seed_stream())) c0 = self.evaluate( 1. + 2. * tf.random.uniform(shape=[3], dtype=tf.float32, seed=seed_stream())) beta_binomial = tfd.BetaBinomial(n, c1, c0, validate_args=True) dirichlet_multinomial = tfd.DirichletMultinomial(n, tf.stack([c1, c0], axis=-1), validate_args=True) num_samples_to_draw = tf.math.floor( 1 + st.min_num_samples_for_dkwm_cdf_two_sample_test(.02)[0]) beta_binomial_samples = beta_binomial.sample(num_samples_to_draw) dirichlet_multinomial_samples = dirichlet_multinomial.sample( num_samples_to_draw) dirichlet_multinomial_samples = tf.squeeze( dirichlet_multinomial_samples[..., 0]) self.evaluate( st.assert_true_cdf_equal_by_dkwm_two_sample( beta_binomial_samples, dirichlet_multinomial_samples))
def check_catches_mistake(wrong_probs): wrong_samples = rng.choice( len(wrong_probs), size=shape, p=wrong_probs).astype(dtype=dtype) with self.assertRaisesOpError( 'Empirical CDFs outside joint K-S envelope'): self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample( samples1, wrong_samples, false_fail_rate=1e-6))
def test_dkwm_cdf_two_sample_batch_discrete_assertion(self, dtype): rng = np.random.RandomState(seed=0) num_samples = 52000 batch_shape = [3, 2] shape = [num_samples] + batch_shape probs = [0.1, 0.2, 0.3, 0.4] samples1 = rng.choice(4, size=shape, p=probs).astype(dtype=dtype) samples2 = rng.choice(4, size=shape, p=probs).astype(dtype=dtype) self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample( samples1, samples2, false_fail_rate=1e-6)) def check_catches_mistake(wrong_probs): wrong_samples = rng.choice( len(wrong_probs), size=shape, p=wrong_probs).astype(dtype=dtype) with self.assertRaisesOpError( 'Empirical CDFs outside joint K-S envelope'): self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample( samples1, wrong_samples, false_fail_rate=1e-6)) n = tf.ones(batch_shape) * num_samples d = st.min_discrepancy_of_true_cdfs_detectable_by_dkwm_two_sample( n, n, false_fail_rate=1e-6, false_pass_rate=1e-6) self.assertTrue(np.all(self.evaluate(d) < 0.05)) check_catches_mistake([0.1, 0.2, 0.3, 0.3, 0.1]) check_catches_mistake([0.2, 0.2, 0.3, 0.3])
def testMixtureTargetLogProb(self, make_kernel_fn, optimal_accept): seed = test_util.test_seed() n = 4 mu = np.ones(n) * (1. / 2) w = 0.1 proposal = tfd.Sample(tfd.Normal(0., 10.), sample_shape=n) init_state = proposal.sample(5000, seed=seed) likelihood_dist = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=[w, 1. - w]), components_distribution=tfd.MultivariateNormalDiag( loc=np.asarray([mu, -mu]).astype(np.float32), scale_identity_multiplier=[.1, .2])) # Uniform prior init_log_prob = tf.zeros_like(proposal.log_prob(init_state)) [n_stage, final_state, _] = tfp.experimental.mcmc.sample_sequential_monte_carlo( lambda x: init_log_prob, likelihood_dist.log_prob, init_state, make_kernel_fn=make_kernel_fn, tuning_fn=functools.partial(simple_heuristic_tuning, optimal_accept=optimal_accept), max_num_steps=50, parallel_iterations=1, seed=None if tf.executing_eagerly() else seed) assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample( final_state, likelihood_dist.sample(5000, seed=seed)) n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample)) self.assertTrue(n_stage, 15)
def testLatentsOfMixedRank(self, batch_shape, num_steps): strm = test_util.test_seed_stream() init0 = [tf.ones(batch_shape + [6])] init1 = [ tf.ones(batch_shape + []), tf.ones(batch_shape + [1]), tf.ones(batch_shape + [2, 2]) ] @tf.function(autograph=False) def run_two_chains(init0, init1): def log_prob0(x): return tf.squeeze( tfd.Independent(tfd.Normal(tf.range(6, dtype=tf.float32), tf.constant(1.)), reinterpreted_batch_ndims=1).log_prob(x)) kernel0 = tfp.experimental.mcmc.PreconditionedNoUTurnSampler( log_prob0, step_size=0.3) [results0] = tfp.mcmc.sample_chain(num_results=num_steps, num_burnin_steps=10, current_state=init0, kernel=kernel0, trace_fn=None, seed=strm()) def log_prob1(state0, state1, state2): return tf.squeeze( tfd.Normal(tf.constant(0.), tf.constant(1.)).log_prob( state0) + tfd.Independent( tfd.Normal(tf.constant([1.]), tf.constant(1.)), reinterpreted_batch_ndims=1).log_prob(state1) + tfd.Independent(tfd.Normal( tf.constant([[2., 3.], [4., 5.]]), tf.constant(1.)), reinterpreted_batch_ndims=2).log_prob( state2)) kernel1 = tfp.experimental.mcmc.PreconditionedNoUTurnSampler( log_prob1, step_size=0.3) results1_ = tfp.mcmc.sample_chain(num_results=num_steps, num_burnin_steps=10, current_state=init1, kernel=kernel1, trace_fn=None, seed=strm()) results1 = tf.concat([ tf.reshape(x, [num_steps] + batch_shape + [-1]) for x in results1_ ], axis=-1) return results0, results1 results0, results1 = run_two_chains(init0, init1) self.evaluate( st.assert_true_cdf_equal_by_dkwm_two_sample(results0, results1))
def testLatentsOfMixedRank(self, batch_shape, num_steps): strm = tfp.util.SeedStream(5, salt='LatentsOfMixedRankTest') init0 = [tf.ones(batch_shape + [6])] init1 = [tf.ones(batch_shape + []), tf.ones(batch_shape + [1]), tf.ones(batch_shape + [2, 2])] def log_prob0(x): return tf.squeeze(tfd.Independent( tfd.Normal(tf.range(6, dtype=tf.float32), tf.constant(1.)), reinterpreted_batch_ndims=1).log_prob(x)) kernel0 = tfp.mcmc.NoUTurnSampler( log_prob0, step_size=0.3, seed=strm()) [results0] = tfp.mcmc.sample_chain( num_results=num_steps, num_burnin_steps=10, current_state=init0, kernel=kernel0, trace_fn=None, parallel_iterations=1) def log_prob1(state0, state1, state2): return tf.squeeze( tfd.Normal(tf.constant(0.), tf.constant(1.)).log_prob(state0) + tfd.Independent( tfd.Normal(tf.constant([1.]), tf.constant(1.)), reinterpreted_batch_ndims=1).log_prob(state1) + tfd.Independent( tfd.Normal(tf.constant([[2., 3.], [4., 5.]]), tf.constant(1.)), reinterpreted_batch_ndims=2).log_prob(state2) ) kernel1 = tfp.mcmc.NoUTurnSampler( log_prob1, step_size=0.3, seed=strm()) results1_ = tfp.mcmc.sample_chain( num_results=num_steps, num_burnin_steps=10, current_state=init1, kernel=kernel1, trace_fn=None, parallel_iterations=1) results1 = tf.concat( [tf.reshape(x, [num_steps] + batch_shape + [-1]) for x in results1_], axis=-1) self.evaluate( st.assert_true_cdf_equal_by_dkwm_two_sample(results0, results1))
def testMixtureMultiBatch(self): seed = test_util.test_seed() # Generate 3 copies (batches) of 2 component Gaussian Mixture in 2 dimension nd = 2 n_batch = 3 w = tf.constant([0.1, .25, .5], tf.float64) mixture_weight = tf.transpose(tf.stack([w, 1. - w])) mu = np.ones(nd) * .5 loc = tf.cast(np.asarray([mu, -mu]), tf.float64) component_loc = tf.repeat(loc[tf.newaxis, ...], n_batch, axis=0) likelihood_dist = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical( probs=mixture_weight), components_distribution=tfd.MultivariateNormalDiag( loc=component_loc, scale_identity_multiplier=[.1, .2])) proposal = tfd.Sample(tfd.Normal(tf.constant(0., tf.float64), 10.), sample_shape=nd) init_state = proposal.sample([5000, n_batch], seed=seed) log_prob_fn = likelihood_dist.log_prob print(log_prob_fn(init_state).shape) # Uniform prior init_log_prob = tf.zeros_like(log_prob_fn(init_state)) [ n_stage, final_state, _ ] = tfp.experimental.mcmc.sample_sequential_monte_carlo( lambda x: init_log_prob, log_prob_fn, init_state, make_kernel_fn=make_test_nuts_kernel_fn, tuning_fn=functools.partial(simple_heuristic_tuning, optimal_accept=0.8), max_num_steps=50, parallel_iterations=1, seed=None if tf.executing_eagerly() else seed) assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample( final_state, likelihood_dist.sample(5000, seed=seed), 1e-5) n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample)) self.assertLess(n_stage, 15)
def testMixtureTargetLogProb(self, make_kernel_fn, optimal_accept): if tf.executing_eagerly(): self.skipTest('Skipping eager-mode test to reduce test weight.') seed = test_util.test_seed() # Generate a 2 component Gaussian Mixture in 3 dimension nd = 3 w = 0.1 mixture_weight = tf.constant([w, 1. - w], tf.float64) mu = np.ones(nd) * .5 component_loc = tf.cast(np.asarray([mu, -mu]), tf.float64) proposal = tfd.Sample(tfd.Normal(tf.constant(0., tf.float64), 10.), sample_shape=nd) init_state = proposal.sample(5000, seed=seed) likelihood_dist = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=mixture_weight), components_distribution=tfd.MultivariateNormalDiag( loc=component_loc, scale_identity_multiplier=[.1, .2])) # Uniform prior init_log_prob = tf.zeros_like(proposal.log_prob(init_state)) [ n_stage, final_state, _ ] = tfp.experimental.mcmc.sample_sequential_monte_carlo( lambda x: init_log_prob, likelihood_dist.log_prob, init_state, make_kernel_fn=make_kernel_fn, tuning_fn=functools.partial(simple_heuristic_tuning, optimal_accept=optimal_accept), max_num_steps=50, seed=seed) assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample( final_state, likelihood_dist.sample(5000, seed=seed), 1e-5) n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample)) self.assertLess(n_stage, 15)