def test_can_specify_a_gaussian_proposal_distribution(net: BayesNet) -> None: algo = MetropolisHastingsSampler(proposal_distribution="gaussian", latents=net.iter_latent_vertices(), proposal_distribution_sigma=np.array(1.)) generate_samples(net=net, sample_from=net.iter_latent_vertices(), sampling_algorithm=algo)
def test_sample_throws_if_vertices_in_sample_from_are_missing_labels() -> None: sigma = Gamma(1., 1) vertex = Gaussian(0., sigma, label="gaussian") assert sigma.get_label() is None net = BayesNet([sigma, vertex]) with pytest.raises(ValueError, match=r"Vertices in sample_from must be labelled."): samples = sample(net=net, sample_from=net.iter_latent_vertices())
def test_probe_for_non_zero_probability_from_bayes_net() -> None: gamma = Gamma(1., 1.) poisson = Poisson(gamma) net = BayesNet([poisson, gamma]) assert not gamma.has_value() assert not poisson.has_value() net.probe_for_non_zero_probability(100, KeanuRandom()) assert gamma.has_value() assert poisson.has_value()
def test_iter_returns_same_result_as_sample(algo: str) -> None: draws = 100 model = thermometers.model() net = BayesNet(model.temperature.get_connected_graph()) set_starting_state(model) samples = sample(net=net, sample_from=net.get_latent_vertices(), algo=algo, draws=draws) set_starting_state(model) iter_samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), algo=algo) samples_dataframe = pd.DataFrame() [samples_dataframe.append(pd.DataFrame(list(next_sample.items()))) for next_sample in islice(iter_samples, draws)] for vertex_id in samples_dataframe: np.testing.assert_almost_equal(samples_dataframe[vertex_id].mean(), np.average(samples[vertex_id]))
def test_construct_bayes_net() -> None: uniform = UniformInt(0, 1) graph = set(uniform.get_connected_graph()) vertex_ids = [vertex.get_id() for vertex in graph] assert len(vertex_ids) == 3 assert uniform.get_id() in vertex_ids net = BayesNet(graph) latent_vertex_ids = [ vertex.get_id() for vertex in net.get_latent_vertices() ] assert len(latent_vertex_ids) == 1 assert uniform.get_id() in latent_vertex_ids
def test_coalmining() -> None: coal_mining = CoalMining() model = coal_mining.model() model.disasters.observe(coal_mining.training_data()) net = BayesNet(model.switchpoint.iter_connected_graph()) samples = sample(net=net, sample_from=net.iter_latent_vertices(), draws=2000, drop=100, down_sample_interval=5) vertex_samples: List[primitive_types] = samples["switchpoint"] vertex_samples_concatentated: np.ndarray = np.array(vertex_samples) switch_year = np.argmax(np.bincount(vertex_samples_concatentated)) assert switch_year == 1890
def test_sampling_returns_multi_indexed_dict_of_list_of_scalars_for_mixed_net( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm]) -> None: exp = Exponential(1.) add_rank_2 = exp + np.array([1., 2., 3., 4.]).reshape((2, 2)) add_rank_3 = exp + np.array([1., 2., 3., 4., 1., 2., 3., 4.]).reshape( (2, 2, 2)) gaussian_rank_2 = Gaussian(add_rank_2, 2.) gaussian_rank_3 = Gaussian(add_rank_3, 1.) exp.set_label("exp") gaussian_rank_2.set_label("gaussian") gaussian_rank_3.set_label("gaussian2") mixed_net = BayesNet(exp.iter_connected_graph()) draws = 5 sample_from = list(mixed_net.iter_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=mixed_net, sample_from=sample_from, sampling_algorithm=algo(mixed_net), draws=draws) assert type(samples) == dict __assert_valid_samples(draws, samples) assert ('exp', (0, )) in samples for i in (0, 1): for j in (0, 1): assert (('gaussian', (i, j)) in samples) df = pd.DataFrame(samples) expected_num_columns = {"exp": 1, "gaussian": 4, "gaussian2": 8} expected_tuple_size = {"exp": 1, "gaussian": 2, "gaussian2": 3} assert len(df.columns.levels[0]) == 3 for parent_column in df.columns.levels[0]: assert parent_column in vertex_labels assert len( df[parent_column].columns) == expected_num_columns[parent_column] for child_column in df[parent_column].columns: assert type(child_column) == tuple assert len(child_column) == expected_tuple_size[parent_column] assert len(df[parent_column][child_column]) == 5 assert type(df[parent_column][child_column][0]) == np.float64
def test_iter_with_live_plot(net: BayesNet) -> None: KeanuRandom.set_default_random_seed(1) _, ax = plt.subplots(3, 1, squeeze=False) samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), live_plot=True, refresh_every=5, ax=ax) for sample in islice(samples, 5): pass reorder_subplots(ax) assert len(ax) == 3 assert all(len(ax[i][0].get_lines()) == 1 for i in range(3)) assert np.allclose( ax[0][0].get_lines()[0].get_ydata(), [0.49147822, 0.49147822, 0.49147822, 0.20033212, 0.20033212]) assert np.allclose( ax[1][0].get_lines()[0].get_ydata(), [0.87268333, 1.10409369, 1.10409369, 1.10409369, 0.69098161]) assert np.allclose( ax[2][0].get_lines()[0].get_ydata(), [-14.46166855, -14.46166855, 0.32305686, 0.32305686, 0.32305686])
def test_can_save_and_load(tmpdir) -> None: PROTO_FILE_NAME = str(tmpdir.join("test.proto")) JSON_FILE_NAME = str(tmpdir.join("test.json")) DOT_FILE_NAME = str(tmpdir.join("test.dot")) gamma = Gamma(1.0, 1.0) gamma.set_value(2.5) # %%SNIPPET_START%% PythonSaveSnippet net = BayesNet(gamma.iter_connected_graph()) metadata = {"Author": "Documentation Team"} protobuf_saver = ProtobufSaver(net) protobuf_saver.save(PROTO_FILE_NAME, True, metadata) json_saver = JsonSaver(net) json_saver.save(JSON_FILE_NAME, True, metadata) dot_saver = DotSaver(net) dot_saver.save(DOT_FILE_NAME, True, metadata) # %%SNIPPET_END%% PythonSaveSnippet # %%SNIPPET_START%% PythonLoadSnippet protobuf_loader = ProtobufLoader() new_net_from_proto = protobuf_loader.load(PROTO_FILE_NAME) json_loader = JsonLoader() new_net_from_json = json_loader.load(JSON_FILE_NAME)
def test_can_get_vertices_from_bayes_net(get_method: str, latent: bool, observed: bool, continuous: bool, discrete: bool) -> None: gamma = Gamma(1., 1.) gamma.observe(0.5) poisson = Poisson(gamma) cauchy = Cauchy(gamma, 1.) assert gamma.is_observed() assert not poisson.is_observed() assert not cauchy.is_observed() net = BayesNet([gamma, poisson, cauchy]) vertex_ids = [vertex.get_id() for vertex in getattr(net, get_method)()] if observed and continuous: assert gamma.get_id() in vertex_ids if latent and discrete: assert poisson.get_id() in vertex_ids if latent and continuous: assert cauchy.get_id() in vertex_ids assert len(vertex_ids) == (observed and continuous) + ( latent and discrete) + (latent and continuous)
def test_can_iter_through_samples(algo: str, net: BayesNet) -> None: draws = 10 samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), algo=algo, down_sample_interval=1) count = 0 for sample in islice(samples, draws): count += 1 assert count == draws
def test_can_get_acceptance_rates(net: BayesNet) -> None: acceptance_rate_tracker = AcceptanceRateTracker() latents = list(net.iter_latent_vertices()) algo = MetropolisHastingsSampler( proposal_distribution='prior', latents=net.iter_latent_vertices(), proposal_listeners=[acceptance_rate_tracker]) samples = sample(net=net, sample_from=latents, sampling_algorithm=algo, drop=3) for latent in latents: rate = acceptance_rate_tracker.get_acceptance_rate(latent) assert 0 <= rate <= 1
def test_dropping_samples(net: BayesNet) -> None: draws = 10 drop = 3 samples = sample(net=net, sample_from=net.get_latent_vertices(), draws=draws, drop=drop) expected_num_samples = draws - drop assert all(len(vertex_samples) == expected_num_samples for vertex_id, vertex_samples in samples.items())
def test_thermometers_max_likelihood_gradient(model: Model) -> None: net = BayesNet(model.temperature.get_connected_graph()) gradient_optimizer = GradientOptimizer(net) logProb = gradient_optimizer.max_likelihood() assert logProb < 0. temperature = model.temperature.get_value() assert 20.995 < temperature < 21.005
def thermometers_max_likelihood_gradient(model: Model, algorithm) -> None: net = BayesNet(model.temperature.iter_connected_graph()) gradient_optimizer = GradientOptimizer(net, algorithm) result = gradient_optimizer.max_likelihood() assert result.fitness() < 0. temperature = result.value_for(model.temperature) assert 20.99 < temperature < 21.01
def test_iter_returns_same_result_as_sample(algo: Callable) -> None: draws = 100 model = thermometers.model() net = BayesNet(model.temperature.get_connected_graph()) set_starting_state(model) sampler = algo() samples = sample(net=net, sample_from=net.get_latent_vertices(), sampling_algorithm=sampler, draws=draws) set_starting_state(model) sampler = algo() iter_samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), sampling_algorithm=sampler) samples_dataframe = pd.DataFrame() for iter_sample in islice(iter_samples, draws): samples_dataframe = samples_dataframe.append(iter_sample, ignore_index=True) for vertex_label in samples_dataframe: np.testing.assert_almost_equal(samples_dataframe[vertex_label].mean(), np.average(samples[vertex_label]))
def test_down_sample_interval(net: BayesNet) -> None: draws = 10 down_sample_interval = 2 samples = sample( net=net, sample_from=net.get_latent_vertices(), draws=draws, down_sample_interval=down_sample_interval) expected_num_samples = draws / down_sample_interval assert all(len(vertex_samples) == expected_num_samples for vertex_id, vertex_samples in samples.items())
def test_coalmining() -> None: KeanuRandom.set_default_random_seed(1) coal_mining = CoalMining() model = coal_mining.model() model.disasters.observe(coal_mining.training_data()) net = BayesNet(model.switchpoint.get_connected_graph()) samples = sample(net=net, sample_from=net.get_latent_vertices(), draws=50000, drop=10000, down_sample_interval=5) vertex_samples: List[numpy_types] = samples["switchpoint"] vertex_samples_primitive: List[List[primitive_types]] = list(map( lambda a: a.tolist(), vertex_samples)) # because you can't concatenate 0-d arrays vertex_samples_concatentated: np.ndarray = np.array(vertex_samples_primitive) switch_year = np.argmax(np.bincount(vertex_samples_concatentated)) assert switch_year == 1890
def test_sample_with_plot(net: BayesNet) -> None: KeanuRandom.set_default_random_seed(1) _, ax = plt.subplots(3, 1, squeeze=False) sample(net=net, sample_from=net.get_latent_vertices(), draws=5, plot=True, ax=ax) reorder_subplots(ax) assert len(ax) == 3 assert all(len(ax[i][0].get_lines()) == 1 for i in range(3)) assert all(len(ax[i][0].get_lines()[0].get_ydata()) == 5 for i in range(3))
def test_it_throws_if_you_pass_in_a_proposal_listener_but_you_didnt_specify_the_proposal_type( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), proposal_listeners=[AcceptanceRateTracker()], drop=3) assert str( excinfo.value ) == "If you pass in proposal_listeners you must also specify proposal_distribution"
def test_it_throws_if_you_pass_in_a_proposal_listener_but_the_algo_isnt_metropolis( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), algo="hamiltonian", proposal_listeners=[AcceptanceRateTracker()], drop=3) assert str( excinfo.value ) == "Only Metropolis Hastings supports the proposal_listeners parameter"
def test_sampling_returns_multi_indexed_dict_of_list_of_scalars_for_tensor_in_sample_from( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm], tensor_net: BayesNet) -> None: draws = 5 sample_from = list(tensor_net.iter_latent_vertices()) samples = sample(net=tensor_net, sample_from=sample_from, sampling_algorithm=algo(tensor_net), draws=draws) assert type(samples) == dict __assert_valid_samples(draws, samples)
def test_it_throws_if_you_pass_in_a_proposal_distribution_but_the_algo_isnt_metropolis( net: BayesNet) -> None: with pytest.raises(TypeError) as excinfo: sample(net=net, sample_from=net.get_latent_vertices(), algo="hamiltonian", proposal_distribution="prior", drop=3) assert str( excinfo.value ) == "Only Metropolis Hastings supports the proposal_distribution parameter"
def test_can_track_acceptance_rate_when_iterating(net: BayesNet) -> None: acceptance_rate_tracker = AcceptanceRateTracker() latents = list(net.get_latent_vertices()) algo = MetropolisHastingsSampler(proposal_distribution='prior', proposal_listeners=[acceptance_rate_tracker]) samples = generate_samples(net=net, sample_from=latents, sampling_algorithm=algo, drop=3) draws = 100 for _ in islice(samples, draws): for latent in latents: rate = acceptance_rate_tracker.get_acceptance_rate(latent) assert 0 <= rate <= 1
def test_sampling_returns_dict_of_list_of_ndarrays_for_vertices_in_sample_from( algo: Callable[[BayesNet], PosteriorSamplingAlgorithm], net: BayesNet) -> None: draws = 5 sample_from = list(net.iter_latent_vertices()) samples = sample(net=net, sample_from=sample_from, sampling_algorithm=algo(net), draws=draws) assert len(samples) == len(sample_from) assert type(samples) == dict __assert_valid_samples(draws, samples)
def test_sample_dict_can_be_loaded_in_to_dataframe(net: BayesNet) -> None: sample_from = list(net.iter_latent_vertices()) vertex_labels = [vertex.get_label() for vertex in sample_from] samples = sample(net=net, sample_from=sample_from, draws=5) df = pd.DataFrame(samples) for column in df: header = df[column].name vertex_label = header assert vertex_label in vertex_labels assert len(df[column]) == 5 assert type(df[column][0]) == np.float64
def test_can_iter_through_samples(algo: Callable[[BayesNet], PosteriorSamplingAlgorithm], net: BayesNet) -> None: draws = 10 samples = generate_samples(net=net, sample_from=net.iter_latent_vertices(), sampling_algorithm=algo(net), down_sample_interval=1) count = 0 for sample in islice(samples, draws): count += 1 assert count == draws
def test_can_get_acceptance_rates(net: BayesNet) -> None: acceptance_rate_tracker = AcceptanceRateTracker() latents = list(net.get_latent_vertices()) samples = sample(net=net, sample_from=latents, proposal_distribution='prior', proposal_listeners=[acceptance_rate_tracker], drop=3) for latent in latents: rate = acceptance_rate_tracker.get_acceptance_rate(latent) assert 0 <= rate <= 1
def test_can_specify_nuts_params(net: BayesNet) -> None: algo = NUTSSampler(adapt_count=1000, target_acceptance_prob=0.65, adapt_step_size_enabled=True, adapt_potential_enabled=True, initial_step_size=0.1, max_tree_height=10) samples = sample(net, list(net.iter_latent_vertices()), algo, draws=500, drop=100)
def test_iter_with_live_plot(net: BayesNet) -> None: KeanuRandom.set_default_random_seed(1) _, ax = plt.subplots(3, 1, squeeze=False) samples = generate_samples(net=net, sample_from=net.get_latent_vertices(), live_plot=True, refresh_every=5, ax=ax) for sample in islice(samples, 5): pass reorder_subplots(ax) assert len(ax) == 3 assert all(len(ax[i][0].get_lines()) == 1 for i in range(3)) assert all(len(ax[i][0].get_lines()[0].get_ydata() == 5) for i in range(3))