def valid(): spn = create_SPN() spn_marg = marginalize() from spn.algorithms.Validity import is_valid print(is_valid(spn)) print(is_valid(spn_marg))
def SPN_Reshape(node, max_children=2): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum)) while len(nodes) > 0: n = nodes.pop() if len(n.children) <= max_children: continue # node has more than 2 nodes, create binary hierarchy new_children = [] new_weights = [] for i in range(0, len(n.children), max_children): children = n.children[i:i + max_children] if len(children) > 1: if isinstance(n, Product): newChild = Product() for c in children: newChild.scope.extend(c.scope) newChild.children.extend(children) new_children.append(newChild) else: # Sum weights = n.weights[i:i + max_children] branch_weight = sum(weights) new_weights.append(branch_weight) newChild = Sum() newChild.scope.extend(children[0].scope) newChild.children.extend(children) newChild.weights.extend( [w / branch_weight for w in weights]) newChild.weights[0] = 1.0 - sum(newChild.weights[1:]) new_children.append(newChild) else: new_children.extend(children) if isinstance(n, Sum): new_weights.append(1.0 - sum(new_weights)) n.children = new_children if isinstance(n, Sum): n.weights = new_weights nodes.append(n) assign_ids(node) v, err = is_valid(node) assert v, err return node
def meu(node, input_data, node_top_down_meu=_node_top_down_meu, node_bottom_up_meu=_node_bottom_up_meu, in_place=False): valid, err = is_valid(node) assert valid, err if in_place: data = input_data else: data = np.array(input_data) nodes = get_nodes_by_type(node) lls_per_node = np.zeros((data.shape[0], len(nodes))) # one pass bottom up evaluating the likelihoods # log_likelihood(node, data, dtype=data.dtype, node_log_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node) likelihood(node, data, dtype=data.dtype, node_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node) meu_val = lls_per_node[:, 0] instance_ids = np.arange(data.shape[0]) # one pass top down to decide on the max branch until it reaches a leaf; returns all_result, decisions at each max node for each instance. all_result, all_decisions = eval_spn_top_down_meu(node, node_top_down_meu, parent_result=instance_ids, data=data, lls_per_node=lls_per_node) decisions = merge_rows_for_decisions(all_decisions) return meu_val, decisions
def test_piecewise_linear_simple(self): piecewise_spn = 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + \ 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0]) self.assertTrue(is_valid(piecewise_spn)) mean = get_means(piecewise_spn) self.assertTrue(np.all(mean == np.array([[0]])))
def test_piecewise_leaf(self): piecewise1 = PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) piecewise2 = PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0]) self.assertTrue(is_valid(piecewise1)) self.assertTrue(is_valid(piecewise2)) self.assertTrue( np.array_equal(mpe(piecewise1, np.array([[np.nan]])), np.array([[1]])), "mpe should be 1") self.assertTrue( np.array_equal(mpe(piecewise2, np.array([[np.nan]])), np.array([[-1]])), "mpe should be -1") with self.assertRaises(AssertionError) as error: mpe(piecewise1, np.array([[1]]))
def sample_instances(node, input_data, rand_gen, node_sampling=_node_sampling, in_place=False): """ Implementing hierarchical sampling """ # first, we do a bottom-up pass to compute the likelihood taking into account marginals. # then we do a top-down pass, to sample taking into account the likelihoods. if in_place: data = input_data else: data = np.array(input_data) valid, err = is_valid(node) assert valid, err assert np.all( np.any(np.isnan(data), axis=1)), "each row must have at least a nan value where the samples will be substituted" nodes = get_nodes_by_type(node) lls_per_node = np.zeros((data.shape[0], len(nodes))) log_likelihood(node, data, dtype=data.dtype, lls_matrix=lls_per_node) instance_ids = np.arange(data.shape[0]) eval_spn_top_down(node, node_sampling, input_vals=instance_ids, data=data, lls_per_node=lls_per_node, rand_gen=rand_gen) return data
def mpe( node, input_data, node_top_down_mpe=_node_top_down_mpe, node_bottom_up_mpe_log=_node_bottom_up_mpe_log, in_place=False, ): valid, err = is_valid(node) assert valid, err assert np.all( np.any(np.isnan(input_data), axis=1) ), "each row must have at least a nan value where the samples will be substituted" if in_place: data = input_data else: data = np.array(input_data) nodes = get_nodes_by_type(node) lls_per_node = np.zeros((data.shape[0], len(nodes))) # one pass bottom up evaluating the likelihoods log_likelihood(node, data, dtype=data.dtype, node_log_likelihood=node_bottom_up_mpe_log, lls_matrix=lls_per_node) instance_ids = np.arange(data.shape[0]) # one pass top down to decide on the max branch until it reaches a leaf, then it fills the nan slot with the mode eval_spn_top_down(node, node_top_down_mpe, parent_result=instance_ids, data=data, lls_per_node=lls_per_node) return data
def marginalize(node, scope): assert isinstance(scope, set), "scope must be a set" def marg_recursive(node): node_scope = set(node.scope) if node_scope.issubset(scope): return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception('Leaf Node with |scope| > 1') return node newNode = node.__class__() #a sum node gets copied with all its children, or gets removed completely if isinstance(node, Sum): newNode.weights.extend(node.weights) for i, c in enumerate(node.children): newChildren = marg_recursive(c) if newChildren is None: continue newNode.children.append(newChildren) return newNode newNode = marg_recursive(node) rebuild_scopes_bottom_up(newNode) newNode = prune(newNode) assert is_valid(newNode) assign_ids(node) return newNode
def EM_optimization(spn, data, iterations=5, node_updates=_node_updates, skip_validation=False, **kwargs): if not skip_validation: valid, err = is_valid(spn) assert valid, "invalid spn: " + err lls_per_node = np.zeros((data.shape[0], get_number_of_nodes(spn))) for _ in range(iterations): # one pass bottom up evaluating the likelihoods log_likelihood(spn, data, dtype=data.dtype, lls_matrix=lls_per_node) gradients = gradient_backward(spn, lls_per_node) R = lls_per_node[:, 0] for node_type, func in node_updates.items(): for node in get_nodes_by_type(spn, node_type): func( node, node_lls=lls_per_node[:, node.id], node_gradients=gradients[:, node.id], root_lls=R, all_lls=lls_per_node, all_gradients=gradients, data=data, **kwargs )
def create_SPN2(): from spn.structure.Base import assign_ids from spn.structure.Base import rebuild_scopes_bottom_up from spn.algorithms.Validity import is_valid from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.Base import Sum, Product p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(spn) rebuild_scopes_bottom_up(spn) val, msg = is_valid(spn) assert val, msg return spn
def test_piecewise_linear_simple(self): piecewise_spn = 0.5 * PiecewiseLinear( [0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear( [-2, -1, 0], [0, 1, 0], [], scope=[0]) self.assertTrue(is_valid(piecewise_spn)) mean = get_mean(piecewise_spn) self.assertAlmostEqual(np.array([[0]]), mean, 5)
def Prune(node): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum, Max)) while len(nodes) > 0: n = nodes.pop() n_type = type(n) is_sum = n_type == Sum i = 0 while i < len(n.children): c = n.children[i] # if my children has only one node, we can get rid of it # and link directly to that grandchildren if not (isinstance(c, Leaf) or isinstance(c, Max)) and \ len(c.children) == 1: n.children[i] = c.children[0] continue if n_type == type(c): del n.children[i] n.children.extend(c.children) if is_sum: w = n.weights[i] del n.weights[i] n.weights.extend([cw * w for cw in c.weights]) continue i += 1 if is_sum and i > 0: n.weights[0] = 1.0 - sum(n.weights[1:]) if isinstance(node, (Product, Sum)) and len(node.children) == 1: node = node.children[0] assign_ids(node) v, err = is_valid(node) assert v, err return node
def Prune(node, contract_single_parents=True, ds_context=None): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum)) while len(nodes) > 0: n = nodes.pop() n_type = type(n) is_sum = n_type == Sum i = 0 while i < len(n.children): c = n.children[i] # if my children has only one node, we can get rid of it and link directly to that grandchildren if contract_single_parents and not isinstance(c, Leaf) and len( c.children) == 1: n.children[i] = c.children[0] continue if n_type == type(c): del n.children[i] n.children.extend(c.children) if is_sum: w = n.weights[i] del n.weights[i] # #merge rules # n.rule = n.rule.merge(c.rule, ds_context) n.weights.extend([cw * w for cw in c.weights]) continue i += 1 if is_sum and i > 0: n.weights[0] = 1.0 - sum(n.weights[1:]) if contract_single_parents and isinstance(node, (Product, Sum)) and len( node.children) == 1: node = node.children[0] assign_ids(node) v, err = is_valid(node) assert v, err return node
def test_histogram_combined(self): piecewise_spn = ((0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * (0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) + 0.5 * PiecewiseLinear([-1, 0, 1], [0, 1, 0], [], scope=[1]))) self.assertTrue(is_valid(piecewise_spn)) mean = get_means(piecewise_spn) self.assertTrue(np.all(mean == np.array([[0., 0.5]])))
def test_compression_leaves(self): C1 = Gaussian(mean=1, stdev=0, scope=0) C2 = Gaussian(mean=1, stdev=0, scope=0) A = 0.7 * C1 + 0.3 * C2 Compress(A) self.assertTrue(*is_valid(A)) self.assertEqual(id(A.children[0]), id(A.children[1])) C1 = Gaussian(mean=1, stdev=0, scope=0) C2 = Gaussian(mean=1, stdev=0, scope=1) B = C1 * C2 Compress(B) self.assertTrue(*is_valid(B)) self.assertNotEqual(id(B.children[0]), id(B.children[1]))
def test_histogram_combined(self): piecewise_spn = ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) + 0.5 * PiecewiseLinear([-1, 0, 1], [0, 1, 0], [], scope=[1])) self.assertTrue(is_valid(piecewise_spn)) mean = get_mean(piecewise_spn) self.assertAlmostEqual(0.0, mean[0, 0], 5) self.assertAlmostEqual(0.5, mean[0, 1], 5)
def learn_classifier(data, ds_context, spn_learn_wrapper, label_idx, **kwargs): spn = Sum() for label, count in zip(*np.unique(data[:, label_idx], return_counts=True)): branch = spn_learn_wrapper(data[data[:, label_idx] == label, :], ds_context, **kwargs) spn.children.append(branch) spn.weights.append(count / data.shape[0]) spn.scope.extend(branch.scope) assign_ids(spn) valid, err = is_valid(spn) assert valid, "invalid spn: " + err return spn
def test_compression_internal_nodes(self): C1 = Gaussian(mean=1, stdev=0, scope=0) C2 = Gaussian(mean=1, stdev=1, scope=1) C3 = Gaussian(mean=1, stdev=0, scope=0) C4 = Gaussian(mean=1, stdev=1, scope=1) R = 0.4 * (C1 * C2) + 0.6 * (C3 * C4) Compress(R) self.assertTrue(*is_valid(R)) self.assertEqual(id(R.children[0]), id(R.children[1])) self.assertEqual(id(R.children[0].children[0]), id(C1)) self.assertEqual(id(R.children[0].children[1]), id(C2))
def create_SPN(): from spn.algorithms.Validity import is_valid from spn.structure.leaves.parametric.Parametric import Categorical spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * \ (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + \ 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \ + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * \ Categorical(p=[0.3, 0.7], scope=1) * \ Categorical(p=[0.4, 0.6], scope=2)) assert is_valid(spn) return spn
def _deserialize_model(self, model): rootID = model.rootNode featureType = model.featureType name = model.name if name == "": name = None rootNodes = self._binary_deserialize_graph(model.nodes) for root in rootNodes: rebuild_scopes_bottom_up(root) assert is_valid(root), "SPN invalid after deserialization" rootNode = next((root for root in rootNodes if root.id == rootID), None) if rootNode is None: logger.error(f"Did not find serialized root node {rootID}") return SPNModel(rootNode, featureType, name)
def test_piecewise_linear_simple(self): piecewise_spn = 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + \ 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0]) self.assertTrue(is_valid(piecewise_spn)) evidence = np.array([[0.5], [1.5], [-0.5], [-1.5]]) results = gradient_forward(piecewise_spn, evidence) expected_results = np.array([[0.5], [-0.5], [-0.5], [0.5]]) for i, _ in enumerate(evidence): self.assertTrue( results[i] == expected_results[i], 'Expected result was {}, but computed result was {}'.format( expected_results[i], results[i]))
def test_compression_leaves_deeper(self): C1 = Gaussian(mean=1, stdev=0, scope=0) C2 = Gaussian(mean=1, stdev=1, scope=1) C3 = Gaussian(mean=1, stdev=0, scope=0) C4 = Gaussian(mean=2, stdev=0, scope=1) R = 0.4 * (C1 * C2) + 0.6 * (C3 * C4) Compress(R) self.assertTrue(*is_valid(R)) self.assertNotEqual(id(R.children[0]), id(R.children[1])) self.assertEqual(id(R.children[0].children[0]), id(C1)) self.assertEqual(id(R.children[0].children[1]), id(C2)) self.assertEqual(id(R.children[1].children[0]), id(C1)) self.assertEqual(id(R.children[1].children[1]), id(C4))
def test_piecewise_linear_combined(self): piecewise_spn = ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * ( 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) + 0.5 * PiecewiseLinear([-1, 0, 1], [0, 1, 0], [], scope=[1])) self.assertTrue(is_valid(piecewise_spn)) evidence = np.array([[0.5, 0], [-0.5, -0.5], [-1.5, 0.5]]) results = feature_gradient(piecewise_spn, evidence) expected_results = np.array([[0.25, 0.125], [-0.125, 0.125], [0.25, 0]]) self.assertTrue( np.all(np.isclose(results, expected_results, atol=0.000001)), "Expected result was {}, but computed result was {}".format( expected_results, results), )
def test_piecewise_linear_combined(self): piecewise_spn = ( (0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + 0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * (0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) + 0.5 * PiecewiseLinear([-1, 0, 1], [0, 1, 0], [], scope=[1]))) self.assertTrue(is_valid(piecewise_spn)) evidence = np.array([[0.5, 0], [100, 36], [-0.5, -0.5], [-1.5, 0.5]]) results = gradient_forward(piecewise_spn, evidence) expected_results = np.array([[0.25, 0.125], [0, 0], [-0.125, 0.125], [0.25, 0]]) for i, _ in enumerate(evidence): self.assertTrue( np.all(np.equal(results[i], expected_results[i])), 'Expected result was {}, but computed result was {}'.format( expected_results[i], results[i]))
def Compress(node): all_parents = get_parents(node) cache = {} for n in get_topological_order(node): params = (n.parameters, tuple(sorted(n.scope))) cached_node = cache.get(params, None) if cached_node is None: cache[params] = n else: for parent, pos in all_parents[n]: parent.children[pos] = cached_node assign_ids(node) val, msg = is_valid(node) assert val, msg return node
def marginalize(node, keep): #keep must be a set of features that you want to keep keep = set(keep) def marg_recursive(node): new_node_scope = keep.intersection(set(node.scope)) if len(new_node_scope) == 0: # we are summing out this node return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception('Leaf Node with |scope| > 1') return deepcopy(node) newNode = node.__class__() if isinstance(node, Sum): newNode.weights.extend(node.weights) for c in node.children: new_c = marg_recursive(c) if new_c is None: continue newNode.children.append(new_c) newNode.scope.extend(new_node_scope) return newNode newNode = marg_recursive(node) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return newNode
def EM_optimization(spn, data, iterations=5, node_updates=_node_updates, skip_validation=False, **kwargs): if not skip_validation: valid, err = is_valid(spn) assert valid, "invalid spn: " + err lls_per_node = np.zeros((data.shape[0], get_number_of_nodes(spn))) # node_updates = {Sum_sharedWeights: sum_em_update_shared} for _ in range(iterations): # one pass bottom up evaluating the likelihoods log_likelihood(spn, data, lls_matrix=lls_per_node) # dtype=data.dtype gradients = gradient_backward(spn, lls_per_node) weights = [ node.weights if isinstance(node, Sum_sharedWeights) else None for node in get_nodes_by_type(spn) ] R = lls_per_node[:, 0] for node_type, func in node_updates.items(): for node in get_nodes_by_type(spn, node_type): func(node, node_lls=lls_per_node[:, node.id], node_gradients=gradients[:, node.id], root_lls=R, all_lls=lls_per_node, all_gradients=gradients, data=data, spn=spn, weights=weights, **kwargs)
def _serialize_model(self, model): msg = spflow_capnp.Model.new_message() assert is_valid(model.root), "SPN invalid before serialization" # Assign (new) IDs to the nodes # Keep track of already assigned IDs, so the IDs are # unique for the whole file. assign_ids(model.root, self.assignedIDs) # Rebuild scopes bottom-up rebuild_scopes_bottom_up(model.root) msg.rootNode = model.root.id msg.numFeatures = len(model.root.scope) msg.featureType = model.featureType scope = msg.init("scope", len(model.root.scope)) for i,v in enumerate(model.root.scope): scope[i] = self._unwrap_value(v) name = "" if model.name is not None: name = model.name msg.name = name numNodes = get_number_of_nodes(model.root) nodes = msg.init("nodes", numNodes) nodeList = ListHandler(nodes) self._serialize_graph([model.root], nodeList) return msg
def learn_structure( dataset, ds_context, split_rows, split_cols, create_leaf, next_operation=get_next_operation(), initial_scope=None, data_slicer=default_slicer, ): assert dataset is not None assert ds_context is not None assert split_rows is not None assert split_cols is not None assert create_leaf is not None assert next_operation is not None root = Product() root.children.append(None) if initial_scope is None: initial_scope = list(range(dataset.shape[1])) num_conditional_cols = None elif len(initial_scope) < dataset.shape[1]: num_conditional_cols = dataset.shape[1] - len(initial_scope) else: num_conditional_cols = None assert len(initial_scope) > dataset.shape[ 1], "check initial scope: %s" % initial_scope tasks = deque() tasks.append((dataset, root, 0, initial_scope, False, False)) while tasks: local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft( ) operation, op_params = next_operation( local_data, scope, create_leaf, no_clusters=no_clusters, no_independencies=no_independencies, is_first=(parent is root), ) logging.debug("OP: {} on slice {} (remaining tasks {})".format( operation, local_data.shape, len(tasks))) if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES: node = Product() node.scope.extend(scope) parent.children[children_pos] = node rest_scope = set(range(len(scope))) for col in op_params: rest_scope.remove(col) node.children.append(None) tasks.append(( data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True, )) next_final = False if len(rest_scope) == 0: continue elif len(rest_scope) == 1: next_final = True node.children.append(None) c_pos = len(node.children) - 1 rest_cols = list(rest_scope) rest_scope = [scope[col] for col in rest_scope] tasks.append(( data_slicer(local_data, rest_cols, num_conditional_cols), node, c_pos, rest_scope, next_final, next_final, )) continue elif operation == Operation.SPLIT_ROWS: split_start_t = perf_counter() data_slices = split_rows(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, True, False)) continue node = Sum() node.scope.extend(scope) parent.children[children_pos] = node # assert parent.scope == node.scope for data_slice, scope_slice, proportion in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) node.weights.append(proportion) tasks.append((data_slice, node, len(node.children) - 1, scope, False, False)) continue elif operation == Operation.SPLIT_COLUMNS: split_start_t = perf_counter() data_slices = split_cols(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, False, True)) assert np.shape(data_slices[0][0]) == np.shape(local_data) assert data_slices[0][1] == scope continue node = Product() node.scope.extend(scope) parent.children[children_pos] = node for data_slice, scope_slice, _ in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) tasks.append((data_slice, node, len(node.children) - 1, scope_slice, False, False)) continue elif operation == Operation.NAIVE_FACTORIZATION: node = Product() node.scope.extend(scope) parent.children[children_pos] = node local_tasks = [] local_children_params = [] split_start_t = perf_counter() for col in range(len(scope)): node.children.append(None) # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True)) local_tasks.append(len(node.children) - 1) child_data_slice = data_slicer(local_data, [col], num_conditional_cols) local_children_params.append( (child_data_slice, ds_context, [scope[col]])) result_nodes = pool.starmap(create_leaf, local_children_params) # result_nodes = [] # for l in tqdm(local_children_params): # result_nodes.append(create_leaf(*l)) # result_nodes = [create_leaf(*l) for l in local_children_params] for child_pos, child in zip(local_tasks, result_nodes): node.children[child_pos] = child split_end_t = perf_counter() logging.debug( "\t\tnaive factorization {} columns (in {:.5f} secs)".format( len(scope), split_end_t - split_start_t)) continue elif operation == Operation.CREATE_LEAF: leaf_start_t = perf_counter() node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node leaf_end_t = perf_counter() logging.debug( "\t\t created leaf {} for scope={} (in {:.5f} secs)".format( node.__class__.__name__, scope, leaf_end_t - leaf_start_t)) else: raise Exception("Invalid operation: " + operation) node = root.children[0] assign_ids(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err node = Prune(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err return node
def spn_for_evidence(spn, evidence_ranges, node_likelihood=None, distribution_update_ranges=None): from spn.structure.Base import Sum, Product, Leaf, assign_ids from spn.algorithms.TransformStructure import Prune from spn.algorithms.Validity import is_valid from copy import deepcopy def spn_for_evidence_recursive(node): if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception("Leaf Node with |scope| > 1") if evidence_ranges[node.scope[0]] is not None: t_node = type(node) if t_node in node_likelihood: ranges = np.array([evidence_ranges]) prob = node_likelihood[t_node]( node, ranges, node_likelihood=node_likelihood)[0][0] if prob == 0: newNode = deepcopy(node) else: newNode = deepcopy(node) distribution_update_ranges[t_node]( newNode, evidence_ranges[node.scope[0]]) else: raise Exception( 'No log-likelihood method specified for node type: ' + str(type(node))) else: prob = 1 newNode = deepcopy(node) return prob, newNode newNode = node.__class__() newNode.scope = node.scope if isinstance(node, Sum): new_weights = [] new_childs = [] for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob = prob * node.weights[i] if new_prob > 0: new_weights.append(new_prob) new_childs.append(new_child) new_weights = np.array(new_weights) newNode.weights = new_weights / np.sum(new_weights) newNode.children = new_childs return np.sum(new_weights), newNode elif isinstance(node, Product): new_childs = [] new_prob = 1. for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob *= prob new_childs.append(new_child) newNode.children = new_childs return new_prob, newNode prob, newNode = spn_for_evidence_recursive(spn) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return prob, newNode