def create_disj(data, scope, assignments, alpha): unq_data, counts = np.unique(data, axis=0, return_counts=True) probs = np.zeros(assignments.shape[0]) for i in range(assignments.shape[0]): index = np.where(np.all(assignments[i] == unq_data, axis=1))[0] if len(index): probs[i] = counts[index[0]] probs = (probs + alpha) / (probs + alpha).sum() indicators = { var: [Bernoulli(scope=[var], p=0), Bernoulli(scope=[var], p=1)] for var in scope } prods = [] for i in range(assignments.shape[0]): children = [] for j in range(assignments.shape[1]): children.append(indicators[scope[j]][assignments[i, j]]) # children.append(Bernoulli(scope=[scope[j]], p=assignments[i, j])) prods.append(Product(children=children)) if len(prods) > 1: disj = Sum(children=prods, weights=probs) else: disj = prods[0] assign_ids(disj) rebuild_scopes_bottom_up(disj) return disj
def create_spflow_spn(n_feats, ctype=Gaussian): children1 = [] children2 = [] for i in range(n_feats): if ctype == Gaussian: c1 = Gaussian(np.random.randn(), np.random.rand(), scope=i) c2 = Gaussian(np.random.randn(), np.random.rand(), scope=i) else: #c1 = Bernoulli(p=1.0, scope=i) #c2 = Bernoulli(p=1.0, scope=i) c1 = Bernoulli(p=np.random.rand(), scope=i) c2 = Bernoulli(p=np.random.rand(), scope=i) children1.append(c1) children2.append(c2) prods1 = [] prods2 = [] for i in range(0, n_feats, 2): p1 = Product([children1[i], children1[i + 1]]) p2 = Product([children2[i], children2[i + 1]]) prods1.append(p1) prods2.append(p2) sums = [] for i in range(n_feats // 2): s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]]) sums.append(s) spflow_spn = Product(sums) assign_ids(spflow_spn) rebuild_scopes_bottom_up(spflow_spn) return spflow_spn
def test_eval_parametric(self): data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape( (1, 7)) spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) * Exponential(l=1.0, scope=[1]) * Gamma(alpha=1.0, beta=1.0, scope=[2]) * LogNormal(mean=1.0, stdev=1.0, scope=[3]) * Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) * Categorical(p=[0.1, 0.2, 0.7], scope=[6])) ll = log_likelihood(spn, data) tf_ll = eval_tf(spn, data) self.assertTrue(np.all(np.isclose(ll, tf_ll))) spn_copy = Copy(spn) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph( spn_copy, data, 1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_graph_to_spn(variable_dict) str_val = spn_to_str_equation(spn) str_val2 = spn_to_str_equation(spn_copy) self.assertEqual(str_val, str_val2)
def create_conj(data, scope, alpha): conj = Product(children=[ Bernoulli(scope=[scope[k]], p=(data[0][k] * data.shape[0] + alpha) / (data.shape[0] + 2 * alpha)) for k in range(len(scope)) ]) assign_ids(conj) rebuild_scopes_bottom_up(conj) return conj
def create_naive_fact(data, scope, alpha): """ It returns a naive factorization of the data. Laplace's correction is not needed, but if not used may cause underflow. """ probs = (np.sum(data, axis=0) + alpha) / (data.shape[0] + 2 * alpha) naive_fact = Product(children=[ Bernoulli(p=probs[k], scope=[scope[k]]) for k in range(len(scope)) ]) assign_ids(naive_fact) rebuild_scopes_bottom_up(naive_fact) return naive_fact
def test_binary(self): A = 0.4 * ( Bernoulli(p=0.8, scope=0) * (0.3 * (Bernoulli(p=0.7, scope=1) * Bernoulli(p=0.6, scope=2)) + 0.7 * (Bernoulli(p=0.5, scope=1) * Bernoulli(p=0.4, scope=2))) ) + 0.6 * (Bernoulli(p=0.8, scope=0) * Bernoulli(p=0.7, scope=1) * Bernoulli(p=0.6, scope=2)) setup_cpp_bridge(A) spn_cc_eval_func_bernoulli = get_cpp_function(A) num_data = 200000 data = (np.random.binomial( 1, 0.3, size=(num_data)).astype("float32").tolist() + np.random.binomial( 1, 0.3, size=(num_data)).astype("float32").tolist() + np.random.binomial(1, 0.3, size=(num_data)).astype("float32").tolist()) data = np.array(data).reshape((-1, 3)) num_nodes = len(get_nodes_by_type(A)) lls_matrix = np.zeros((num_data, num_nodes)) # Test for every single lls_maxtrix element. _ = log_likelihood(A, data, lls_matrix=lls_matrix) c_ll = spn_cc_eval_func_bernoulli(data) self.assertTrue(np.allclose(lls_matrix, c_ll)) ### Testing for MPE. spn_cc_mpe_func_bernoulli = get_cpp_mpe_function(A) # drop some data. for i in range(data.shape[0]): drop_data = np.random.binomial(data.shape[1] - 1, 0.5) data[i, drop_data] = np.nan cc_completion = spn_cc_mpe_func_bernoulli(data) py_completion = mpe(A, data) self.assertTrue(np.allclose(py_completion, cc_completion))
# # poisson poisson = Poisson(mean=5, scope=[0]) pdf_x, pdf_y = approximate_density(poisson, x_range) fig, ax = plt.subplots(1, 1) ax.plot(pdf_x, pdf_y, label="poisson") print('Poisson Mode:', poisson.mode) plt.axvline(x=poisson.mode, color='r') if show_plots: plt.show() # # bernoulli bernoulli = Bernoulli(p=.7, scope=[0]) pdf_x, pdf_y = approximate_density(bernoulli, [0.0, 1.0]) fig, ax = plt.subplots(1, 1) ax.plot(pdf_x, pdf_y, label="bernoulli") print('Bernoulli Mode:', bernoulli.mode) plt.axvline(x=bernoulli.mode, color='r') if show_plots: plt.show() # # NegativeBinomial # negativebinomial = NegativeBinomial(n=5, p=0.7, scope=[0]) # pdf_x, pdf_y = approximate_density(negativebinomial, x_range) # fig, ax = plt.subplots(1, 1)
import matplotlib.pyplot as plt from spn.structure.Base import assign_ids, rebuild_scopes_bottom_up # data1 = [1.0, 5.0] * 100 # data2 = [10.0, 12.0] * 100 # data = data1 + data2 # data = np.array(data).reshape((-1,2)) # data = data.astype(np.float32) # g0 = Gaussian(mean=0, stdev=1, scope=0) # g1 = Gaussian(mean=0, stdev=1, scope=1) # p0 = Product(children=[g0,g1]) # p1 = Product(children=[g0,g1]) # spn1 = Sum(weights=[0.5,0.5], children=[p0,p1]) x = Bernoulli(p=0.9, scope=0) y = Bernoulli(p=0.3, scope=1) a1 = Bernoulli(p=0.5, scope=2) a2 = Bernoulli(p=0.01, scope=2) b1 = Bernoulli(p=0.09, scope=3) b2 = Bernoulli(p=0.03, scope=3) s0 = Sum_sharedWeights(weights=[0.34, 0.66], children=[a1, a2]) s1 = Sum_sharedWeights(sibling=s0, children=[b1, b2]) # s1 = Sum_sharedWeights(weights=[0.1,0.9], children=[b1,b2]) spn = Product(children=[s0, s1, x, y]) assign_ids(spn) rebuild_scopes_bottom_up(spn) valid, err = is_valid(spn) print(f"Model is valid: {valid}\n")