def test_leaf_bernoulli_bootstrap(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 100), np.random.multivariate_normal([1, 1], np.eye(2), 100), ), axis=0, ) y = np.array([1] * 100 + [0] * 100).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) neg_data = np.concatenate([1 - y, x], axis=1) lneg = likelihood(leaf, neg_data) np.testing.assert_array_almost_equal(l + lneg, 1.0) self.assertTrue(np.all(l >= 0.5)) self.assertTrue(np.all(lneg < 0.5))
def meu(node, input_data, node_top_down_meu=_node_top_down_meu, node_bottom_up_meu=_node_bottom_up_meu, in_place=False): valid, err = is_valid(node) assert valid, err if in_place: data = input_data else: data = np.array(input_data) nodes = get_nodes_by_type(node) lls_per_node = np.zeros((data.shape[0], len(nodes))) # one pass bottom up evaluating the likelihoods # log_likelihood(node, data, dtype=data.dtype, node_log_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node) likelihood(node, data, dtype=data.dtype, node_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node) meu_val = lls_per_node[:, 0] instance_ids = np.arange(data.shape[0]) # one pass top down to decide on the max branch until it reaches a leaf; returns all_result, decisions at each max node for each instance. all_result, all_decisions = eval_spn_top_down_meu(node, node_top_down_meu, parent_result=instance_ids, data=data, lls_per_node=lls_per_node) decisions = merge_rows_for_decisions(all_decisions) return meu_val, decisions
def test_leaf_no_variance_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([1] * 1000).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data[:, 0] = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data3 = np.array(data) data3[:, 0] = 3 leaf = create_conditional_leaf(data3, ds_context, [0]) l = likelihood(leaf, data) self.assertAlmostEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.241970724519143)
def test_leaf_categorical(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([20, 20], np.eye(2), 500), np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Categorical]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x)) l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x)) l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x)) np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0) self.assertTrue(np.all(l0[1000:1500] > 0.85)) self.assertTrue(np.all(l0[0:1000] < 0.15)) self.assertTrue(np.all(l1[500:1000] > 0.85)) self.assertTrue(np.all(l1[0:500] < 0.15)) self.assertTrue(np.all(l1[1000:1500] < 0.15)) self.assertTrue(np.all(l2[0:500] > 0.85)) self.assertTrue(np.all(l2[500:15000] < 0.15))
def meu(root, input_data, node_bottom_up_meu=_node_bottom_up_meu, in_place=False): # valid, err = is_valid(node) # assert valid, err if in_place: data = input_data else: data = np.copy(input_data) nodes = get_nodes_by_type(root) utility_scope = set() for node in nodes: if type(node) is Utility: utility_scope.add(node.scope[0]) assert np.all(np.isnan(data[:, list(utility_scope)]) ), "Please specify all utility values as np.nan" likelihood_per_node = np.zeros((data.shape[0], len(nodes))) meu_per_node = np.zeros((data.shape[0], len(nodes))) meu_per_node.fill(np.nan) # one pass bottom up evaluating the likelihoods likelihood(root, data, dtype=data.dtype, lls_matrix=likelihood_per_node) eval_spmn_bottom_up_meu(root, _node_bottom_up_meu, meu_per_node=meu_per_node, data=data, lls_per_node=likelihood_per_node) result = meu_per_node[:, root.id] return result
def Expectation(spn, feature_id, ranges, node_expectation, node_likelihood): def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] == feature_id: t_node = type(node) if t_node in node_expectation: exps = np.zeros((data.shape[0], 1), dtype=dtype) exps[:] = node_expectation[t_node](node) return exps else: raise Exception("Node type unknown for expectation: " + str(t_node)) else: t_node = type(node) if t_node in node_likelihood: return node_likelihood[t_node](node, ranges, node_likelihood=node_likelihood) node_expectations = { type(leaf): leaf_expectation for leaf in get_nodes_by_type(spn, Leaf) } node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood}) expectation = likelihood(spn, ranges, node_likelihood=node_expectations) expectation = expectation / likelihood( spn, ranges, node_likelihood=node_likelihood) return expectation
def test_ll_matrix(self): add_node_likelihood(Leaf, sum_and_multiplier_ll) node_1_1_1_1 = leaf(2, 1) node_1_1_1_2 = leaf(2, 2) node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2 node_1_1_2 = leaf([0, 1], 3) node_1_1 = node_1_1_1 * node_1_1_2 node_1_2_1_1_1 = leaf(0, 5) node_1_2_1_1_2 = leaf(1, 4) node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2 node_1_2_1_2 = leaf([0, 1], 6) node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2 node_1_2_2 = leaf(2, 3) node_1_2 = node_1_2_1 * node_1_2_2 spn = 0.4 * node_1_1 + 0.6 * node_1_2 assign_ids(spn) max_id = max([n.id for n in get_nodes_by_type(spn)]) data = np.random.rand(10, 10) node_1_1_1_1_r = data[:, 2] * 1 node_1_1_1_2_r = data[:, 2] * 2 node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r node_1_1_2_r = 3 * (data[:, 0] + data[:, 1]) node_1_1_r = node_1_1_1_r * node_1_1_2_r node_1_2_1_1_1_r = data[:, 0] * 5 node_1_2_1_1_2_r = data[:, 1] * 4 node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1]) node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r node_1_2_2_r = data[:, 2] * 3 node_1_2_r = node_1_2_1_r * node_1_2_2_r spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r self.assert_correct(spn, data, spn_r) lls = np.zeros((data.shape[0], max_id + 1)) likelihood(spn, data, lls_matrix=lls) llls = np.zeros((data.shape[0], max_id + 1)) log_likelihood(spn, data, lls_matrix=llls) self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls)))) self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
def Expectation(spn, feature_scope, evidence_scope, evidence, node_expectation=_node_expectation): """Compute the Expectation: E[X_feature_scope | X_evidence_scope] given the spn and the evidence data Keyword arguments: spn -- the spn to compute the probabilities from feature_scope -- set() of integers, the scope of the features to get the expectation from evidence_scope -- set() of integers, the scope of the evidence features evidence -- numpy 2d array of the evidence data """ if evidence_scope is None: evidence_scope = set() assert not (len(evidence_scope) > 0 and evidence is None) assert len(feature_scope.intersection(evidence_scope)) == 0 marg_spn = marginalize(spn, keep=feature_scope | evidence_scope) def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_expectation: exps = np.zeros((data.shape[0], 1), dtype=dtype) exps[:] = node_expectation[t_node](node) return exps else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence) node_expectations = { type(leaf): leaf_expectation for leaf in get_nodes_by_type(marg_spn, Leaf) } node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood}) if evidence is None: #fake_evidence is not used fake_evidence = np.zeros((1, len(spn.scope))).reshape(1, -1) expectation = likelihood(marg_spn, fake_evidence, node_likelihood=node_expectations) return expectation #if we have evidence, we want to compute the conditional expectation expectation = likelihood(marg_spn, evidence, node_likelihood=node_expectations) expectation = expectation / likelihood( marginalize(marg_spn, keep=evidence_scope), evidence) return expectation
def get_mutual_information_correlation(spn, context): categoricals = get_categoricals(spn, context) num_features = len(spn.scope) correlation_matrix = [] for x in range(num_features): if x not in categoricals: correlation_matrix.append(np.full((num_features), np.nan)) else: x_correlation = [np.nan] * num_features x_range = context.get_domains_by_scope([x])[0] spn_x = marginalize(spn, [x]) query_x = np.array([[np.nan] * num_features] * len(x_range)) query_x[:, x] = x_range for y in categoricals: if x == y: x_correlation[x] = 1 continue spn_y = marginalize(spn, [y]) spn_xy = marginalize(spn, [x, y]) y_range = context.get_domains_by_scope([y])[0] query_y = np.array([[np.nan] * num_features] * len(y_range)) query_y[:, y] = y_range query_xy = np.array([[np.nan] * num_features] * (len(x_range + 1) * (len(y_range + 1)))) xy = np.mgrid[x_range[0]:x_range[-1]:len(x_range) * 1j, y_range[0]:y_range[-1]:len(y_range) * 1j] xy = xy.reshape(2, -1) query_xy[:, x] = xy[0, :] query_xy[:, y] = xy[1, :] results_xy = likelihood(spn_xy, query_xy) results_xy = results_xy.reshape(len(x_range), len(y_range)) results_x = likelihood(spn_x, query_x) results_y = likelihood(spn_y, query_y) xx, yy = np.mgrid[0:len(x_range) - 1:len(x_range) * 1j, 0:len(y_range) - 1:len(y_range) * 1j] xx = xx.astype(int) yy = yy.astype(int) grid_results_x = results_x[xx] grid_results_y = results_y[yy] grid_results_xy = results_xy log = np.log( grid_results_xy / (np.multiply(grid_results_x, grid_results_y).squeeze())) prod = np.prod(np.array([log, grid_results_xy]), axis=0) log_x = np.log(results_x) log_y = np.log(results_y) entropy_x = -1 * np.sum(np.multiply(log_x, results_x)) entropy_y = -1 * np.sum(np.multiply(log_y, results_y)) x_correlation[y] = (np.sum(prod) / np.sqrt(entropy_x * entropy_y)) correlation_matrix.append(np.array(x_correlation)) return np.array(correlation_matrix)
def assert_correct(self, spn, data, result): l = likelihood(spn, data) self.assertEqual(l.shape[0], data.shape[0]) self.assertEqual(l.shape[1], 1) self.assertTrue(np.alltrue(np.isclose(result.reshape(-1, 1), l))) self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(spn, data)))) self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(spn, data, debug=True)))) self.assertTrue(np.alltrue(np.isclose(l, likelihood(spn, data, debug=True))))
def test_type(self): add_node_likelihood(Leaf, identity_ll) # test that we get basic computations right spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * (Leaf(scope=0) * Leaf(scope=1)) data = np.random.rand(10, 4) l = likelihood(spn, data, dtype=np.float32) self.assertEqual(l.dtype, np.float32) l = likelihood(spn, data, dtype=np.float128) self.assertEqual(l.dtype, np.float128)
def conditional_probability(spn, y_index, x_instance): """ calculation of conditional probability P(Y|X) :param spn: :param y_index: index of y in vector x_instance :param x_instance: vector, with value of requested RVs(and y) and NaN of non-requested RVs :return: vector x_instance includes the value at y th index """ x_without_y = np.copy(x_instance) x_without_y[0][y_index] = np.nan # P(Y|X) p_y_given_x = likelihood(spn, x_instance) / likelihood(spn, x_without_y) return p_y_given_x
def test_piecewise_linear_multiplied(self): piecewise_spn = ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[1])) evidence = np.array([ [-2, -2], [-1.5, -1.5], [-1, -1], [-0.5, -0.5], [0, 0], [0.5, 0.5], [1, 1], [1.5, 1.5], [2, 2], [3, 3], [-3, -3], [0, 100], ]) results = likelihood(piecewise_spn, evidence) expected_results = np.array([[0], [0.25], [0.5], [0.25], [0], [0.25], [0.5], [0.25], [0], [0], [0], [0]])**2 self.assertTrue(np.all(np.equal(results, expected_results)))
def _density(self, x): # map all inputs from categorical to numeric values for i in range(len(x)): if self.names[i] in self._categorical_variables: inverse_mapping = self._categorical_variables[self.names[i]]['name_to_int'] x[i] = inverse_mapping[x[i]] # if variable has integer representation round elif self.data.dtypes[i] == int: x[i] = round(x[i]) # Copy the current state of the network input = self._density_mask.copy() counter = 0 for i in range(input.shape[1]): # if variable on index i is not conditioned or marginalized # set input i to the value in the input array indicated by counter if input[0, i] == 2: input[0, i] = x[counter] counter += 1 # if the variable i is conditioned set the input value to the condition elif input[0, i] == 1: input[0, i] = self._condition[0, i] # else the value of input at index i is np.nan by initialization and indicates a marginalized variable res = likelihood(self._spn, input) return res[0][0]
def assert_correct_node_sampling_continuous(self, node, samples, plot): node.scope = [0] rand_gen = np.random.RandomState(1234) samples_gen = sample_parametric_node(node, 1000000, rand_gen) if plot: import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) x = np.linspace(np.min(samples), np.max(samples), 1000) ax.plot(x, likelihood(node, x.reshape(-1, 1)), 'r-', lw=2, alpha=0.6, label=node.__class__.__name__ + ' pdf') ax.hist(samples, normed=True, histtype='stepfilled', alpha=0.7, bins=1000) ax.legend(loc='best', frameon=False) plt.show() scipy_obj, params = get_scipy_obj_params(node) # H_0 dist are identical test_outside_samples = kstest(samples, lambda x: scipy_obj.cdf(x, **params)) # reject H_0 (dist are identical) if p < 0.05 # we pass the test if they are identical, pass if p >= 0.05 self.assertGreaterEqual(test_outside_samples.pvalue, 0.05) test_generated_samples = kstest(samples_gen, lambda x: scipy_obj.cdf(x, **params)) # reject H_0 (dist are identical) if p < 0.05 # we pass the test if they are identical, pass if p >= 0.05 self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
def execute_python(): start = time.perf_counter() py_ll = likelihood(spn, test_data) end = time.perf_counter() elapsed = (end - start) return py_ll, elapsed * 1000000000
def plot_density(spn, data): import matplotlib.pyplot as plt import numpy as np x_max = data[:, 0].max() x_min = data[:, 0].min() y_max = data[:, 1].max() y_min = data[:, 1].min() nbinsx = int(x_max - x_min) / 1 nbinsy = int(y_max - y_min) / 1 xi, yi = np.mgrid[x_min:x_max:nbinsx * 1j, y_min:y_max:nbinsy * 1j] spn_input = np.vstack([xi.flatten(), yi.flatten()]).T marg_spn = marginalize(spn, set([0, 1])) zill = likelihood(marg_spn, spn_input) z = zill.reshape(xi.shape) # Make the plot # plt.pcolormesh(xi, yi, z) plt.imshow(z + 1, extent=(x_min, x_max, y_min, y_max), cmap=cm.hot, norm=PowerNorm(gamma=1. / 5.)) # plt.pcolormesh(xi, yi, z) plt.colorbar() plt.show()
def test_leaf_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array( np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) self.assertFalse(np.any(np.isnan(likelihood(leaf, data)))) self.assertGreater(get_ll(leaf, [20, 10, 10]), get_ll(leaf, [20, 1, 1])) self.assertGreater(get_ll(leaf, [60, 1, 1]), get_ll(leaf, [60, 10, 10])) self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652) self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
def test_sum_one_dimension(self): add_node_likelihood(Leaf, identity_ll) # test that we get basic computations right spn = 0.5 * Leaf(scope=0) + 0.5 * Leaf(scope=0) data = np.random.rand(10, 1) self.assert_correct(spn, data, data) spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 1) self.assert_correct(spn, data, data) # test that we can pass whatever dataset, and the scopes are being respected # this is important for inner nodes spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 3) r = 0.1 * data[:, 0] + 0.9 * data[:, 0] r = r.reshape(-1, 1) self.assert_correct(spn, data, r) # test that it fails if the weights are not normalized spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) spn.weights[1] = 0.2 data = np.random.rand(10, 3) with self.assertRaises(AssertionError): l = likelihood(spn, data) with self.assertRaises(AssertionError): log_likelihood(spn, data) # test the log space spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0) data = np.random.rand(10, 3) r = 0.1 * data[:, 0] + 0.9 * data[:, 0] r = r.reshape(-1, 1) self.assert_correct(spn, data, r)
def test_piecewise_linear_constant(self): piecewise_spn = ((0.5 * PiecewiseLinear([1, 2], [1, 1], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1], [1, 1], [], scope=[0]))) evidence = np.array([[-3000]]) results = likelihood(piecewise_spn, evidence) expected_results = np.array([[1]]) self.assertTrue(np.all(np.equal(results, expected_results)))
def test_piecewise_linear_simple(self): piecewise_spn = 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + \ 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0]) evidence = np.array([[-2], [-1.5], [-1], [-0.5], [0], [0.5], [1], [1.5], [2], [3], [-3]]) results = likelihood(piecewise_spn, evidence) expected_results = np.array([[0], [0.25], [0.5], [0.25], [0], [0.25], [0.5], [0.25], [0], [0], [0]]) self.assertTrue(np.all(np.equal(results, expected_results)))
def test_sum_multiple_dimension(self): add_node_likelihood(Leaf, identity_ll) # test basic computations in multiple dimensions spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * Leaf(scope=[0, 1]) data = np.random.rand(10, 2) l = likelihood(spn, data) self.assert_correct(spn, data, data[:, 0] * data[:, 1])
def plot_likelihoods(spn, classes, plot_pdf, res=100, test_sample=None): """Generates for each class a 2D heightmap of likelihoods.""" if len(classes) > 10: raise Exception( "Not more than 10 distinct classes allowed for likelihood " "plot, but given %d." % len(classes)) # Samples lin = np.linspace(0.5, 127.5, res) xgrid, ygrid = np.meshgrid(lin, lin) samples = np.asarray(list(itertools.product(lin, lin)), dtype=np.float32) # Compute likelihood values for each sample regarding each class n = res**2 likelihoods = np.empty((0, n)) for c in classes: data = np.column_stack((samples, c * np.ones(n))) likelihoods_of_c = likelihood(spn, data).reshape((1, n)) * 100000 likelihoods = np.append(likelihoods, likelihoods_of_c, axis=0) # Determine colormap levels l_max = np.max(likelihoods) l_min = np.min(likelihoods) levels = np.linspace(l_min, l_max, 15, endpoint=True) # Get default colormap for default colors def_cmap = plt.get_cmap("tab10") # Plot the likelihoods for i, c in enumerate(classes): fig, ax = plt.subplots(figsize=(4, 3.5)) zgrid = griddata(samples, likelihoods[i], (xgrid, ygrid)) cmap = matplotlib.colors.LinearSegmentedColormap.from_list( "", [(1, 1, 1, 0), def_cmap(i)]) cont = plt.contourf(xgrid, ygrid, zgrid, levels, cmap=cmap) if test_sample is not None: ax.scatter(test_sample[0], test_sample[1], c='gold', s=200, edgecolors='w', linewidth='3', label=r'$z_{\mathrm{test}}$') plt.legend() # plt.title('Learned Probability Distribution\nfor Class %d' % c) plt.xlabel('x') plt.ylabel('y') plt.axis('equal') ax.set_xlim([0, 128]) ax.set_ylim([0, 128]) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size=0.3, pad=0.1) fig.colorbar(cont, cax=cax, ticks=[]) plot_pdf.savefig(fig) plt.show()
def assert_correct(self, node, x, result): self.tested.add(type(node)) data = np.array([x], dtype=np.float).reshape(-1, 1) node.scope = [0] l = likelihood(node, data) self.assertAlmostEqual(result, l[0, 0], 5) self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(node, data)))) data = np.random.rand(10, 10) data[:, 5] = x node.scope = [5] l = likelihood(node, data) self.assertEqual(l.shape[0], data.shape[0]) self.assertEqual(l.shape[1], 1) self.assertTrue(np.isclose(np.var(l), 0)) self.assertTrue(np.alltrue(np.isclose(result, l[0, 0]))) self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))
def assert_correct_node_sampling_discrete(self, node, samples, plot): node.scope = [0] rand_gen = np.random.RandomState(1234) samples_gen = sample_parametric_node(node, 1000000, rand_gen) fvals, fobs = np.unique(samples, return_counts=True) # H_0 data comes from same dist test_outside_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0]) # reject H_0 (data comes from dist) if p < 0.05 # we pass the test if they come from the dist, pass if p >= 0.05 self.assertGreaterEqual(test_outside_samples.pvalue, 0.05) fvals, fobs = np.unique(samples_gen, return_counts=True) test_generated_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0]) # reject H_0 (data comes from dist) if p < 0.05 # we pass the test if they come from the dist, pass if p >= 0.05 self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
def approximate_density(dist_node, X, bins=100): if dist_node.type.meta_type == MetaType.DISCRETE: x = np.array( [i for i in range(int(np.nanmin(X)), int(np.nanmax(X)) + 1)]) else: x = np.linspace(np.nanmin(X), np.nanmax(X), bins) x = x.reshape(-1, 1) y = likelihood(dist_node, x) return x[:, 0], y[:, 0]
def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_expectation: exps = np.zeros((data.shape[0], 1), dtype=dtype) exps[:] = node_expectation[t_node](node) return exps else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence)
def expectation_recursive_batch(node, feature_scope, inverted_features, relevant_scope, evidence, node_expectation, node_likelihoods): if isinstance(node, Product): llchildren = np.concatenate([ expectation_recursive_batch( child, feature_scope, inverted_features, relevant_scope, evidence, node_expectation, node_likelihoods) for child in node.children if len(relevant_scope.intersection(child.scope)) > 0 ], axis=1) return np.nanprod(llchildren, axis=1).reshape(-1, 1) elif isinstance(node, Sum): if len(relevant_scope.intersection(node.scope)) == 0: return np.full((evidence.shape[0], 1), np.nan) llchildren = np.concatenate([ expectation_recursive_batch( child, feature_scope, inverted_features, relevant_scope, evidence, node_expectation, node_likelihoods) for child in node.children ], axis=1) relevant_children_idx = np.where(np.isnan(llchildren[0]) == False)[0] if len(relevant_children_idx) == 0: return np.array([np.nan]) weights_normalizer = sum(node.weights[j] for j in relevant_children_idx) b = np.array(node.weights)[relevant_children_idx] / weights_normalizer return np.dot(llchildren[:, relevant_children_idx], b).reshape(-1, 1) else: if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_expectation: exps = np.zeros((evidence.shape[0], 1)) feature_idx = feature_scope.index(node.scope[0]) inverted = inverted_features[feature_idx] exps[:] = node_expectation[t_node](node, evidence, inverted=inverted) return exps else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence, node_likelihood=node_likelihoods)
def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_distinct_vals: vals = node_distinct_vals[t_node](node, evidence) return vals else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence, node_likelihood=node_likelihoods)
def assert_correct(self, node, x, result): self.tested.add(type(node)) data = np.array([x], dtype=np.float).reshape(1, -1) node.scope = list(range(data.shape[1])) l = likelihood(node, data) self.assertAlmostEqual(result, l[0, 0], 5) self.assertTrue( np.alltrue(np.isclose(np.log(l), log_likelihood(node, data)))) new_scope = (np.array(node.scope) + 5).tolist() data = np.random.rand(10, max(new_scope) + 2) data[:, new_scope] = x node.scope = new_scope l = likelihood(node, data) self.assertEqual(l.shape[0], data.shape[0]) self.assertEqual(l.shape[1], 1) self.assertTrue(np.isclose(np.var(l), 0)) self.assertTrue(np.alltrue(np.isclose(result, l[0, 0]))) self.assertTrue( np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))