def extend(): import numpy as np from spn.structure.leaves.parametric.Parametric import Leaf class Pareto(Leaf): def __init__(self, a, scope=None): Leaf.__init__(self, scope=scope) self.a = a def pareto_likelihood(node, data=None, dtype=np.float64): probs = np.ones((data.shape[0], 1), dtype=dtype) from scipy.stats import pareto probs[:] = pareto.pdf(data[:, node.scope], node.a) return probs from spn.algorithms.Inference import add_node_likelihood add_node_likelihood(Pareto, pareto_likelihood) spn = 0.3 * Pareto(2.0, scope=0) + 0.7 * Pareto(3.0, scope=0) from spn.algorithms.Inference import log_likelihood print("pareto", log_likelihood(spn, np.array([1.5]).reshape(-1, 1)))
def test_hierarchical_sum_multiple_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic computations in a hierarchy spn = 0.3 * (0.2 * Leaf(scope=[0, 1]) + 0.8 * Leaf(scope=[0, 1])) + 0.7 * ( 0.4 * Leaf(scope=[0, 1]) + 0.6 * Leaf(scope=[0, 1]) ) data = np.random.rand(10, 3) self.assert_correct(spn, data, data[:, 0] * data[:, 1]) add_node_likelihood(Leaf, multiply_ll) # test different node contributions spn = 0.3 * (0.2 * Leaf(scope=[0, 1]) + 0.8 * Leaf(scope=[0, 1])) + 0.7 * ( 0.4 * Leaf(scope=[0, 1]) + 0.6 * Leaf(scope=[0, 1]) ) spn.children[0].children[0].multiplier = 2 spn.children[0].children[1].multiplier = 3 spn.children[1].children[0].multiplier = 4 spn.children[1].children[1].multiplier = 5 data = np.random.rand(10, 3) dprod = data[:, 0] * data[:, 1] r = 0.3 * (0.2 * 2 * dprod + 0.8 * 3 * dprod) + 0.7 * (0.4 * 4 * dprod + 0.6 * 5 * dprod) self.assert_correct(spn, data, r)
def test_sum_one_dimension(self): add_node_likelihood(Leaf, identity_ll) # test that we get basic computations right spn = 0.5 * Leaf(scope=0) + 0.5 * Leaf(scope=0) data = np.random.rand(10, 1) self.assert_correct(spn, data, data) spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 1) self.assert_correct(spn, data, data) # test that we can pass whatever dataset, and the scopes are being respected # this is important for inner nodes spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 3) r = 0.1 * data[:, 0] + 0.9 * data[:, 0] r = r.reshape(-1, 1) self.assert_correct(spn, data, r) # test that it fails if the weights are not normalized spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) spn.weights[1] = 0.2 data = np.random.rand(10, 3) with self.assertRaises(AssertionError): l = likelihood(spn, data) with self.assertRaises(AssertionError): log_likelihood(spn, data) # test the log space spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 3) r = 0.1 * data[:, 0] + 0.9 * data[:, 0] r = r.reshape(-1, 1) self.assert_correct(spn, data, r)
def test_sample_spn_weights(self): n = 100000 l = 0.1 * leaf(0.0001) + 0.9 * leaf(0.99) r = 0.3 * leaf(0.5) + 0.7 * leaf(0.1) spn = 0.1 * l + 0.9 * r add_node_likelihood(Leaf, node_fixed_ll) rand_gen = RandomState(12345) data = rand_gen.rand(n, 2) sample_induced_trees(spn, data, rand_gen) sample_spn_weights(spn, rand_gen, omega_uninf_prior=0) remaining_instances = n - spn.edge_counts[ 0] # this are the unseen instances for L expected_obs = np.array(l.edge_counts) expected_obs[0] += 0.1 * remaining_instances expected_obs[1] += 0.9 * remaining_instances self.assertGreaterEqual( chisquare(np.array(l.weights) * n, expected_obs).pvalue, 0.05) remaining_instances = n - spn.edge_counts[ 1] # this are the unseen instances for R expected_obs = np.array(r.edge_counts) expected_obs[0] += 0.3 * remaining_instances expected_obs[1] += 0.7 * remaining_instances self.assertGreaterEqual( chisquare(np.array(r.weights) * n, expected_obs).pvalue, 0.05)
def test_ll_matrix(self): add_node_likelihood(Leaf, sum_and_multiplier_ll) node_1_1_1_1 = leaf(2, 1) node_1_1_1_2 = leaf(2, 2) node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2 node_1_1_2 = leaf([0, 1], 3) node_1_1 = node_1_1_1 * node_1_1_2 node_1_2_1_1_1 = leaf(0, 5) node_1_2_1_1_2 = leaf(1, 4) node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2 node_1_2_1_2 = leaf([0, 1], 6) node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2 node_1_2_2 = leaf(2, 3) node_1_2 = node_1_2_1 * node_1_2_2 spn = 0.4 * node_1_1 + 0.6 * node_1_2 assign_ids(spn) max_id = max([n.id for n in get_nodes_by_type(spn)]) data = np.random.rand(10, 10) node_1_1_1_1_r = data[:, 2] * 1 node_1_1_1_2_r = data[:, 2] * 2 node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r node_1_1_2_r = 3 * (data[:, 0] + data[:, 1]) node_1_1_r = node_1_1_1_r * node_1_1_2_r node_1_2_1_1_1_r = data[:, 0] * 5 node_1_2_1_1_2_r = data[:, 1] * 4 node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1]) node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r node_1_2_2_r = data[:, 2] * 3 node_1_2_r = node_1_2_1_r * node_1_2_2_r spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r self.assert_correct(spn, data, spn_r) lls = np.zeros((data.shape[0], max_id + 1)) likelihood(spn, data, lls_matrix=lls) llls = np.zeros((data.shape[0], max_id + 1)) log_likelihood(spn, data, lls_matrix=llls) self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls)))) self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
def test_sum_multiple_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic computations in multiple dimensions spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * Leaf(scope=[0, 1]) data = np.random.rand(10, 2) l = likelihood(spn, data) self.assert_correct(spn, data, data[:, 0] * data[:, 1])
def test_prod_multiple_dimension(self): add_node_likelihood(Leaf, sums_ll) # test basic computations in multiple dimensions spn = Leaf(scope=[0, 1]) * Leaf(scope=[2, 3]) data = np.random.rand(10, 4) r = (data[:, 0] + data[:, 1]) * (data[:, 2] + data[:, 3]) self.assert_correct(spn, data, r)
def test_induced_trees(self): add_node_likelihood(Leaf, constant_equal_ll) n = 100000 spn = 0.1 * ((0.1 * (Leaf(0) * Leaf(1)) + 0.9 * (Leaf(0) * Leaf(1))) * Leaf(2)) + 0.9 * ( (0.8 * (Leaf(0) * Leaf(1)) + 0.2 * (Leaf(0) * Leaf(1))) * Leaf(2)) self.check_induced_tree(spn, n) self.check_counts(spn, n) # so far we have tested that the induced trees produce a proper map, does the sampling and the lls are fine # we need to check still that the right counts are being passed down accordingly # and that the actual computation of the samples and probabilities is correct spn = 0.3 * (0.0001 * (Leaf(0) * Leaf(1)) + 0.9999 * (Leaf(0) * Leaf(1))) + 0.7 * ( 0.1 * (0.3 * (Leaf(0) * Leaf(1)) + 0.7 * (Leaf(0) * Leaf(1))) + 0.9 * (0.4 * (Leaf(0) * Leaf(1)) + 0.6 * (Leaf(0) * Leaf(1)))) self.check_induced_tree(spn, n) self.check_counts(spn, n) l = 0.1 * leaf(0.0001) + 0.9 * leaf(0.99) r = 0.1 * leaf(0.5) + 0.9 * leaf(0.1) spn = 0.1 * l + 0.9 * r add_node_likelihood(Leaf, node_fixed_ll) rand_gen = RandomState(12345) data = rand_gen.rand(n, 2) sample_induced_trees(spn, data, rand_gen) self.check_counts(spn, n) s = 0.1 * (0.1 * 0.0001 + 0.9 * 0.99) + 0.9 * (0.1 * 0.5 + 0.9 * 0.1) expected_freq = np.array( [0.1 * (0.1 * 0.0001 + 0.9 * 0.99), 0.9 * (0.1 * 0.5 + 0.9 * 0.1)]) / s expected_obs = expected_freq * n self.assertGreaterEqual( chisquare(spn.edge_counts, expected_obs).pvalue, 0.05) s = 0.1 * 0.0001 + 0.9 * 0.99 expected_freq = np.array([0.1 * 0.0001, 0.9 * 0.99]) / s expected_obs = expected_freq * spn.edge_counts[0] self.assertGreaterEqual( chisquare(l.edge_counts, expected_obs).pvalue, 0.05) s = 0.1 * 0.5 + 0.9 * 0.1 expected_freq = np.array([0.1 * 0.5, 0.9 * 0.1]) / s expected_obs = expected_freq * spn.edge_counts[1] self.assertGreaterEqual( chisquare(r.edge_counts, expected_obs).pvalue, 0.05)
def test_type(self): add_node_likelihood(Leaf, identity_ll) # test that we get basic computations right spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * (Leaf(scope=0) * Leaf(scope=1)) data = np.random.rand(10, 4) l = likelihood(spn, data, dtype=np.float32) self.assertEqual(l.dtype, np.float32) l = likelihood(spn, data, dtype=np.float128) self.assertEqual(l.dtype, np.float128)
def test_prod_one_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic product spn = Leaf(scope=0) * Leaf(scope=1) data = np.random.rand(10, 2) self.assert_correct(spn, data, data[:, 0] * data[:, 1]) # test respecting the scopes spn = Leaf(scope=0) * Leaf(scope=1) data = np.random.rand(10, 3) self.assert_correct(spn, data, data[:, 0] * data[:, 1])
def test_handmade_multidim(self): add_node_likelihood(Leaf, sum_and_multiplier_ll) spn = 0.3 * ((0.9 * (leaf(0, 1) * leaf(1, 2)) + 0.1 * (leaf(0, 3) * leaf(1, 4))) * leaf(2, 5)) + 0.7 * ( 0.6 * leaf([0, 1, 2], 6) + 0.4 * leaf([0, 1, 2], 7) ) data = np.random.rand(10, 10) r = 0.3 * ( (0.9 * (data[:, 0] * 2 * data[:, 1]) + 0.1 * (3 * data[:, 0] * 4 * data[:, 1])) * 5 * data[:, 2] ) + 0.7 * (0.6 * 6 * (data[:, 0] + data[:, 1] + data[:, 2]) + 0.4 * 7 * (data[:, 0] + data[:, 1] + data[:, 2])) self.assert_correct(spn, data, r)
def test_hierarchical_prod_multiple_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic computations in a hierarchy spn = (Leaf(scope=[0, 1]) * Leaf(scope=[2, 3])) * (Leaf(scope=[4, 5]) * Leaf(scope=[6, 7])) data = np.random.rand(10, 8) self.assert_correct(spn, data, np.prod(data, axis=1)) add_node_likelihood(Leaf, sums_ll) # test different node contributions spn = (Leaf(scope=[0, 1]) * Leaf(scope=[2, 3])) * (Leaf(scope=[4, 5]) * Leaf(scope=[6, 7])) data = np.random.rand(10, 10) dprod = (data[:, 0] + data[:, 1]) * (data[:, 2] + data[:, 3]) * (data[:, 4] + data[:, 5]) * ( data[:, 6] + data[:, 7]) self.assert_correct(spn, data, dprod)
def test_hierarchical_sum_one_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic computations in a hierarchy + respecting the scopes spn = 0.3 * (0.2 * Leaf(scope=0) + 0.8 * Leaf(scope=0)) + 0.7 * (0.4 * Leaf(scope=0) + 0.6 * Leaf(scope=0)) data = np.random.rand(10, 3) self.assert_correct(spn, data, data[:, 0]) add_node_likelihood(Leaf, multiply_ll) # test that the different nodes contribute differently spn = 0.3 * (0.2 * Leaf(scope=0) + 0.8 * Leaf(scope=0)) + 0.7 * (0.4 * Leaf(scope=0) + 0.6 * Leaf(scope=0)) spn.children[0].children[0].multiplier = 2 spn.children[0].children[1].multiplier = 3 spn.children[1].children[0].multiplier = 4 spn.children[1].children[1].multiplier = 5 data = np.random.rand(10, 3) r = 0.3 * (0.2 * 2 * data[:, 0] + 0.8 * 3 * data[:, 0]) + 0.7 * (0.4 * 4 * data[:, 0] + 0.6 * 5 * data[:, 0]) self.assert_correct(spn, data, r)
def add_conditional_inference_support(): add_node_likelihood(Conditional_Gaussian, conditional_likelihood) add_node_likelihood(Conditional_Poisson, conditional_likelihood) add_node_likelihood(Conditional_Bernoulli, conditional_likelihood) add_node_mpe_likelihood(Conditional_Gaussian, conditional_mpe_log_likelihood) add_node_mpe_likelihood(Conditional_Poisson, conditional_mpe_log_likelihood) add_node_mpe_likelihood(Conditional_Bernoulli, conditional_mpe_log_likelihood)
def add_piecewise_inference_support(): add_node_likelihood(PiecewiseLinear, piecewise_log_likelihood) add_node_mpe_likelihood(PiecewiseLinear, piecewise_mpe_likelihood)
def add_cltree_inference_support(): add_node_likelihood(CLTree, log_lambda_func=cltree_log_likelihood)
def add_histogram_inference_support(): add_node_likelihood(Histogram, log_lambda_func=histogram_log_likelihood)
def add_parametric_inference_range_support(): add_node_likelihood(Categorical, categorical_likelihood_range)
def add_parametric_inference_support(): add_node_likelihood(Gaussian, parametric_likelihood) add_node_likelihood(Gamma, parametric_likelihood) add_node_likelihood(LogNormal, parametric_likelihood) add_node_likelihood(Poisson, parametric_likelihood) add_node_likelihood(Bernoulli, parametric_likelihood) add_node_likelihood(Categorical, parametric_likelihood) add_node_likelihood(NegativeBinomial, parametric_likelihood) add_node_likelihood(Hypergeometric, parametric_likelihood) add_node_likelihood(Geometric, parametric_likelihood) add_node_likelihood(Exponential, parametric_likelihood) add_node_likelihood(Uniform, parametric_likelihood) add_node_likelihood(CategoricalDictionary, parametric_likelihood) add_node_mpe_likelihood(Gaussian, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Gamma, parametric_mpe_log_likelihood) add_node_mpe_likelihood(LogNormal, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Poisson, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Bernoulli, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Categorical, parametric_mpe_log_likelihood) add_node_mpe_likelihood(NegativeBinomial, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Hypergeometric, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Geometric, parametric_mpe_log_likelihood) add_node_mpe_likelihood(Exponential, parametric_mpe_log_likelihood)
def add_utility_inference_support(): add_node_likelihood(Utility, histogram_likelihood)
def add_parametric_inference_support(): add_node_likelihood(Gaussian, continuous_likelihood) add_node_likelihood(Gamma, gamma_likelihood) add_node_likelihood(LogNormal, lognormal_likelihood) add_node_likelihood(Poisson, discrete_likelihood) add_node_likelihood(Bernoulli, bernoulli_likelihood) add_node_likelihood(Categorical, categorical_likelihood) add_node_likelihood(Geometric, geometric_likelihood) add_node_likelihood(Exponential, exponential_likelihood) add_node_likelihood(Uniform, uniform_likelihood) add_node_likelihood(CategoricalDictionary, categorical_dictionary_likelihood)
def add_cltree_inference_support(): add_node_likelihood(CLTree, cltree_likelihood)
def add_static_inference_range_support(): add_node_likelihood(StaticNumeric, static_likelihood_range)
def add_parametric_inference_support(): add_node_likelihood(MultivariateGaussian, lambda_func=continuous_multivariate_likelihood) add_node_likelihood(Gaussian, lambda_func=continuous_likelihood, log_lambda_func=continuous_log_likelihood) add_node_likelihood(Hypergeometric, log_lambda_func=continuous_log_likelihood) add_node_likelihood(Gamma, log_lambda_func=gamma_log_likelihood) add_node_likelihood(LogNormal, log_lambda_func=continuous_log_likelihood) add_node_likelihood(Poisson, log_lambda_func=discrete_log_likelihood) add_node_likelihood(Bernoulli, lambda_func=discrete_likelihood, log_lambda_func=discrete_log_likelihood) add_node_likelihood(Categorical, log_lambda_func=categorical_log_likelihood) add_node_likelihood(Geometric, log_lambda_func=discrete_log_likelihood) add_node_likelihood(Exponential, log_lambda_func=continuous_log_likelihood) add_node_likelihood(Uniform, log_lambda_func=uniform_log_likelihood) add_node_likelihood(CategoricalDictionary, log_lambda_func=categorical_dictionary_log_likelihood)
def add_histogram_inference_support(): add_node_likelihood(Histogram, histogram_likelihood) add_node_mpe_likelihood(Histogram, histogram_mpe_log_likelihood)
def add_piecewise_inference_range_support(): add_node_likelihood(PiecewiseLinear, piecewise_likelihood_range)
def add_parametric_inference_support(): for p in SUPPORTED_PARAM_FORMS: add_node_likelihood(p, parametric_log_likelihood) add_node_mpe_likelihood(p, parametric_mpe_log_likelihood)
def add_piecewise_inference_support(): add_node_likelihood(PiecewiseLinear, log_lambda_func=piecewise_log_likelihood)
def add_parametric_inference_range_support(): add_node_likelihood(Categorical, log_lambda_func=categorical_log_likelihood_range)
def add_conditional_inference_support(): add_node_likelihood(SupervisedOr, conditional_supervised_likelihood, conditional_supervised_likelihood) add_node_likelihood(SupervisedLeaf, supervised_leaf_likelihood)