def to_layers(spn, sparse=True, copy=True): with elapsed_timer() as e: if copy: spn = Copy(spn) print('copy', e()) spn = Prune(spn, contract_single_parents=False) print('prune', e()) complete_layers([spn], type(spn)) print('complete layers', e()) node_layers = get_topological_order_layers(spn) print('topo search', e()) print('nr layers', len(node_layers)) layers = [LeafLayer(node_layers[0])] for i in tqdm(range(1, len(node_layers))): cur_layer = node_layers[i] prev_layer = node_layers[i - 1] scope = get_scope(cur_layer, prev_layer, sparse) if isinstance(cur_layer[0], Sum): weights = np.concatenate( list(map(lambda x: x.weights, cur_layer))) layers.append(SumLayer(cur_layer, scope, weights)) else: layers.append(ProductLayer(cur_layer, scope)) print('to layer objects', e()) return layers
def condition(spn, evidence): scope = set( [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]]) node_conditions = { type(leaf): leaf_condition for leaf in get_nodes_by_type(spn, Leaf) } node_conditions.update({Sum: sum_condition, Product: prod_condition}) new_root, val = eval_spn_bottom_up(spn, node_conditions, input_vals=evidence, scope=scope) assign_ids(new_root) return Prune(new_root)
def learn_spmn(self, data): """ :param :return: learned spmn """ index = 0 curr_information_set_scope = np.array( range(len(self.params.partial_order[0]))).tolist() remaining_vars_scope = np.array(range(len( self.params.feature_names))).tolist() self.set_next_operation('Any') self.spmn_structure = self.__learn_spmn_structure( data, remaining_vars_scope, curr_information_set_scope, index) Prune(self.spmn_structure) return self.spmn_structure
def to_compressed_layers(spn): with elapsed_timer() as e: spn = Copy(spn) print('copy', e()) spn = Prune(spn, contract_single_parents=False) print('prune', e()) complete_layers([spn], type(spn)) print('complete layers', e()) node_layers = get_topological_order_layers(spn) print('topo search', e()) print('nr layers', len(node_layers)) layers = [LeafLayer(node_layers[0])] for i in range(1, len(node_layers)): cur_layer = node_layers[i] prev_layer = node_layers[i - 1] cur_is_sum = isinstance(cur_layer[0], Sum) prev_is_prod = isinstance(prev_layer[0], Product) # print(i, cur_is_sum, prev_is_prod) if cur_is_sum: weights = list(map(lambda x: x.weights, cur_layer)) if cur_is_sum and prev_is_prod: # build sp layer # remove prod from previous layer layers.pop() scopes = get_two_layer_scopes(cur_layer, node_layers[i - 2], True) layers.append(SumProductLayer(cur_layer, scopes, weights)) else: scope = get_scope(cur_layer, prev_layer, True) if cur_is_sum: layers.append(SumLayer(cur_layer, scope, weights)) else: layers.append(ProductLayer(cur_layer, scope)) print('to layer objects', e()) return layers
def marginalize(node, keep): #keep must be a set of features that you want to keep keep = set(keep) def marg_recursive(node): new_node_scope = keep.intersection(set(node.scope)) if len(new_node_scope) == 0: # we are summing out this node return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception('Leaf Node with |scope| > 1') return deepcopy(node) newNode = node.__class__() if isinstance(node, Sum): newNode.weights.extend(node.weights) for c in node.children: new_c = marg_recursive(c) if new_c is None: continue newNode.children.append(new_c) newNode.scope.extend(new_node_scope) return newNode newNode = marg_recursive(node) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return newNode
def learn_structure( dataset, ds_context, split_rows, split_cols, create_leaf, next_operation=get_next_operation(), initial_scope=None, data_slicer=default_slicer, ): assert dataset is not None assert ds_context is not None assert split_rows is not None assert split_cols is not None assert create_leaf is not None assert next_operation is not None root = Product() root.children.append(None) if initial_scope is None: initial_scope = list(range(dataset.shape[1])) num_conditional_cols = None elif len(initial_scope) < dataset.shape[1]: num_conditional_cols = dataset.shape[1] - len(initial_scope) else: num_conditional_cols = None assert len(initial_scope) > dataset.shape[ 1], "check initial scope: %s" % initial_scope tasks = deque() tasks.append((dataset, root, 0, initial_scope, False, False)) while tasks: local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft( ) operation, op_params = next_operation( local_data, scope, create_leaf, no_clusters=no_clusters, no_independencies=no_independencies, is_first=(parent is root), ) logging.debug("OP: {} on slice {} (remaining tasks {})".format( operation, local_data.shape, len(tasks))) if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES: node = Product() node.scope.extend(scope) parent.children[children_pos] = node rest_scope = set(range(len(scope))) for col in op_params: rest_scope.remove(col) node.children.append(None) tasks.append(( data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True, )) next_final = False if len(rest_scope) == 0: continue elif len(rest_scope) == 1: next_final = True node.children.append(None) c_pos = len(node.children) - 1 rest_cols = list(rest_scope) rest_scope = [scope[col] for col in rest_scope] tasks.append(( data_slicer(local_data, rest_cols, num_conditional_cols), node, c_pos, rest_scope, next_final, next_final, )) continue elif operation == Operation.SPLIT_ROWS: split_start_t = perf_counter() data_slices = split_rows(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, True, False)) continue node = Sum() node.scope.extend(scope) parent.children[children_pos] = node # assert parent.scope == node.scope for data_slice, scope_slice, proportion in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) node.weights.append(proportion) tasks.append((data_slice, node, len(node.children) - 1, scope, False, False)) continue elif operation == Operation.SPLIT_COLUMNS: split_start_t = perf_counter() data_slices = split_cols(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, False, True)) assert np.shape(data_slices[0][0]) == np.shape(local_data) assert data_slices[0][1] == scope continue node = Product() node.scope.extend(scope) parent.children[children_pos] = node for data_slice, scope_slice, _ in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) tasks.append((data_slice, node, len(node.children) - 1, scope_slice, False, False)) continue elif operation == Operation.NAIVE_FACTORIZATION: node = Product() node.scope.extend(scope) parent.children[children_pos] = node local_tasks = [] local_children_params = [] split_start_t = perf_counter() for col in range(len(scope)): node.children.append(None) # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True)) local_tasks.append(len(node.children) - 1) child_data_slice = data_slicer(local_data, [col], num_conditional_cols) local_children_params.append( (child_data_slice, ds_context, [scope[col]])) result_nodes = pool.starmap(create_leaf, local_children_params) # result_nodes = [] # for l in tqdm(local_children_params): # result_nodes.append(create_leaf(*l)) # result_nodes = [create_leaf(*l) for l in local_children_params] for child_pos, child in zip(local_tasks, result_nodes): node.children[child_pos] = child split_end_t = perf_counter() logging.debug( "\t\tnaive factorization {} columns (in {:.5f} secs)".format( len(scope), split_end_t - split_start_t)) continue elif operation == Operation.CREATE_LEAF: leaf_start_t = perf_counter() node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node leaf_end_t = perf_counter() logging.debug( "\t\t created leaf {} for scope={} (in {:.5f} secs)".format( node.__class__.__name__, scope, leaf_end_t - leaf_start_t)) else: raise Exception("Invalid operation: " + operation) node = root.children[0] assign_ids(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err node = Prune(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err return node
rg_layers = rg.make_layers() print("random graph built in ", (time.perf_counter() - start)) start = time.perf_counter() vector_list, root = Make_SPN_from_RegionGraph(rg_layers, np.random.RandomState(100), num_classes=1, num_gauss=20, num_sums=20) print("Make_SPN_from_RegionGraph in ", (time.perf_counter() - start)) start = time.perf_counter() print(get_structure_stats(root)) print("get_structure_stats in ", (time.perf_counter() - start)) old_root = Copy(root) start = time.perf_counter() root = Prune(root) print("Prune in ", (time.perf_counter() - start)) start = time.perf_counter() root = SPN_Reshape(root, 2) print("SPN_Reshape in ", (time.perf_counter() - start)) start = time.perf_counter() print(get_structure_stats(root)) print("get_structure_stats in ", (time.perf_counter() - start)) start = time.perf_counter() layers, layer_types = get_execution_layers(root) print("get_execution_layers in ", (time.perf_counter() - start)) for i, lt in enumerate(layer_types):
def spn_for_evidence(spn, evidence_ranges, node_likelihood=None, distribution_update_ranges=None): from spn.structure.Base import Sum, Product, Leaf, assign_ids from spn.algorithms.TransformStructure import Prune from spn.algorithms.Validity import is_valid from copy import deepcopy def spn_for_evidence_recursive(node): if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception("Leaf Node with |scope| > 1") if evidence_ranges[node.scope[0]] is not None: t_node = type(node) if t_node in node_likelihood: ranges = np.array([evidence_ranges]) prob = node_likelihood[t_node]( node, ranges, node_likelihood=node_likelihood)[0][0] if prob == 0: newNode = deepcopy(node) else: newNode = deepcopy(node) distribution_update_ranges[t_node]( newNode, evidence_ranges[node.scope[0]]) else: raise Exception( 'No log-likelihood method specified for node type: ' + str(type(node))) else: prob = 1 newNode = deepcopy(node) return prob, newNode newNode = node.__class__() newNode.scope = node.scope if isinstance(node, Sum): new_weights = [] new_childs = [] for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob = prob * node.weights[i] if new_prob > 0: new_weights.append(new_prob) new_childs.append(new_child) new_weights = np.array(new_weights) newNode.weights = new_weights / np.sum(new_weights) newNode.children = new_childs return np.sum(new_weights), newNode elif isinstance(node, Product): new_childs = [] new_prob = 1. for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob *= prob new_childs.append(new_child) newNode.children = new_childs return new_prob, newNode prob, newNode = spn_for_evidence_recursive(spn) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return prob, newNode
def learn_structure_cnet( dataset, ds_context, conditioning, create_leaf, next_operation_cnet=get_next_operation_cnet(), initial_scope=None, data_slicer=default_slicer, ): assert dataset is not None assert ds_context is not None assert create_leaf is not None assert next_operation_cnet is not None root = Product() root.children.append(None) if initial_scope is None: initial_scope = list(range(dataset.shape[1])) tasks = deque() tasks.append((dataset, root, 0, initial_scope)) while tasks: local_data, parent, children_pos, scope = tasks.popleft() operation, op_params = next_operation_cnet(local_data, scope) logging.debug("OP: {} on slice {} (remaining tasks {})".format( operation, local_data.shape, len(tasks))) if operation == Operation.CONDITIONING: from spn.algorithms.splitting.Base import split_data_by_clusters conditioning_start_t = perf_counter() col_conditioning, found_conditioning = conditioning(local_data) if not found_conditioning: node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node continue clusters = (local_data[:, col_conditioning] == 1).astype(int) data_slices = split_data_by_clusters(local_data, clusters, scope, rows=True) node = Sum() node.scope.extend(scope) parent.children[children_pos] = node for data_slice, scope_slice, proportion in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.weights.append(proportion) product_node = Product() node.children.append(product_node) node.children[-1].scope.extend(scope) right_data_slice = np.hstack( (data_slice[:, :col_conditioning], data_slice[:, (col_conditioning + 1):])).reshape( data_slice.shape[0], data_slice.shape[1] - 1) product_node.children.append(None) tasks.append(( right_data_slice, product_node, len(product_node.children) - 1, scope_slice[:col_conditioning] + scope_slice[col_conditioning + 1:], )) left_data_slice = data_slice[:, col_conditioning].reshape( data_slice.shape[0], 1) product_node.children.append(None) tasks.append((left_data_slice, product_node, len(product_node.children) - 1, [scope_slice[col_conditioning]])) conditioning_end_t = perf_counter() logging.debug("\t\tconditioning (in {:.5f} secs)".format( conditioning_end_t - conditioning_start_t)) continue elif operation == Operation.CREATE_LEAF: cltree_start_t = perf_counter() node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node cltree_end_t = perf_counter() else: raise Exception("Invalid operation: " + operation) node = root.children[0] assign_ids(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err node = Prune(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err return node
def marg_rang_special(spn, rang, node_likelihood, distribution_update_ranges=distribution_update_ranges): def spn_for_evidence_recursive(node): if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception("Leaf Node with |scope| > 1") if rang[node.scope[0]] is not None: t_node = type(node) if t_node in node_likelihood: ranges = np.array([rang]) prob = node_likelihood[t_node]( node, ranges, node_likelihood=node_likelihood)[0][0] if prob == 0: newNode = deepcopy(node) else: newNode = deepcopy(node) distribution_update_ranges[t_node](newNode, rang[node.scope[0]]) else: raise Exception( 'No log-likelihood method specified for node type: ' + str(type(node))) else: prob = 1 newNode = deepcopy(node) return prob, newNode newNode = node.__class__() newNode.scope = node.scope if isinstance(node, Sum): new_weights = [] new_childs = [] for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob = prob * node.weights[i] if new_prob > 0: new_weights.append(new_prob) new_childs.append(new_child) new_weights = np.array(new_weights) newNode.weights = new_weights / np.sum(new_weights) newNode.children = new_childs return np.sum(new_weights), newNode elif isinstance(node, Product): new_childs = [] new_prob = 1. for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob *= prob new_childs.append(new_child) newNode.children = new_childs return new_prob, newNode prob, newNode = spn_for_evidence_recursive(spn) assign_ids(newNode) newSPN = Prune(newNode) valid, err = is_valid(newSPN) assert valid, err return prob, newSPN
def get_flat_spn(spn, target_id): from spn.structure.Base import Sum, Product, Leaf, assign_ids from spn.algorithms.TransformStructure import Prune from spn.algorithms.Validity import is_valid from copy import deepcopy flat_spn = Sum() flat_spn.scope=spn.scope def create_flat_spn_recursive(node, distribution_mix, prob=1.0, independent_nodes=[]): if isinstance(node, Sum): for i, c in enumerate(node.children): forwarded_weight = node.weights[i] * prob create_flat_spn_recursive(c, distribution_mix, forwarded_weight, independent_nodes.copy()) elif isinstance(node, Product): stop = False next_node = None for c in node.children: if target_id in c.scope: if len(c.scope) == 1: stop = True independent_nodes.append(deepcopy(c)) else: next_node = c else: for feature_id in c.scope: weighted_nodes = get_nodes_with_weight(c, feature_id) t_node = type(weighted_nodes[0][1]) mixed_node = distribution_mix[t_node](weighted_nodes) independent_nodes.append(mixed_node) if stop: flat_spn.weights.append(prob) prod = Product(children=independent_nodes) prod.scope = spn.scope flat_spn.children.append(prod) else: create_flat_spn_recursive(next_node, distribution_mix, prob, independent_nodes) else: raise Exception("Can only iterate over Sum and Product nodes") from simple_spn.internal.MixDistributions import mix_categorical distribution_mix = {Categorical : mix_categorical} create_flat_spn_recursive(spn, distribution_mix) assign_ids(flat_spn) flat_spn = Prune(flat_spn) valid, err = is_valid(flat_spn) assert valid, err return flat_spn
def learn_structure( dataset, ds_context, split_rows, split_cols, create_leaf, next_operation=get_next_operation(), initial_scope=None, num_conditional_cols=None, data_slicer=default_slicer, l_rfft=None, is_2d=False, ): assert dataset is not None assert ds_context is not None assert split_rows is not None assert split_cols is not None assert create_leaf is not None assert next_operation is not None root = Product() root.children.append(None) if initial_scope is None: initial_scope = list(range(dataset.shape[1])) ## num_conditional_cols = None ## elif len(initial_scope) < dataset.shape[1]: ## num_conditional_cols = dataset.shape[1] - len(initial_scope) ## else: ## num_conditional_cols = None ## assert len(initial_scope) > dataset.shape[1], "check initial scope: %s" % initial_scope tasks = deque() ## tasks.append((dataset, root, 0, initial_scope, False, False)) tasks.append((default_slicer(dataset, initial_scope), root, 0, initial_scope, False, False)) while tasks: local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft() assert(local_data.shape[1]==len(scope)) operation, op_params = next_operation( local_data, scope, create_leaf, no_clusters=no_clusters, no_independencies=no_independencies, is_first=(parent is root), ) logging.debug("OP: {} on slice {} (remaining tasks {})".format(operation, local_data.shape, len(tasks))) if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES: node = Product() node.scope.extend(scope) parent.children[children_pos] = node rest_scope = set(range(len(scope))) for col in op_params: rest_scope.remove(col) node.children.append(None) tasks.append( ( data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True, ) ) next_final = False if len(rest_scope) == 0: continue elif len(rest_scope) == 1: next_final = True node.children.append(None) c_pos = len(node.children) - 1 rest_cols = list(rest_scope) rest_scope = [scope[col] for col in rest_scope] tasks.append( ( data_slicer(local_data, rest_cols, num_conditional_cols), node, c_pos, rest_scope, next_final, next_final, ) ) continue elif operation == Operation.SPLIT_ROWS: split_start_t = perf_counter() data_slices = split_rows(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug( "\t\tfound {} row clusters (in {:.5f} secs)".format(len(data_slices), split_end_t - split_start_t) ) if len(data_slices) == 1: tasks.append((local_data, parent, children_pos, scope, True, False)) continue node = Sum() node.scope.extend(scope) parent.children[children_pos] = node # assert parent.scope == node.scope for data_slice, scope_slice, proportion in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) node.weights.append(proportion) tasks.append((data_slice, node, len(node.children) - 1, scope, False, False)) continue elif operation == Operation.SPLIT_COLUMNS: split_start_t = perf_counter() data_slices = split_cols(local_data, ds_context, scope, l_rfft, is_2d) split_end_t = perf_counter() logging.debug( "\t\tfound {} col clusters (in {:.5f} secs)".format(len(data_slices), split_end_t - split_start_t) ) if len(data_slices) == 1: tasks.append((local_data, parent, children_pos, scope, False, True)) assert np.shape(data_slices[0][0]) == np.shape(local_data) assert data_slices[0][1] == scope continue node = Product() node.scope.extend(scope) parent.children[children_pos] = node for data_slice, scope_slice, _ in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) tasks.append((data_slice, node, len(node.children) - 1, scope_slice, False, False)) continue elif operation == Operation.NAIVE_FACTORIZATION: node = Product() node.scope.extend(scope) parent.children[children_pos] = node local_tasks = [] local_children_params = [] split_start_t = perf_counter() # modified by zhongjie on 04.10.2019 # 1. if is_2d==False --> no Multi Variate Gaussian here, use Univariate Gaussian to model all RVs # or if scope=1, the factorization ends with univariate Gaussian if not is_2d or len(scope)==1: for col in range(len(scope)): node.children.append(None) # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True)) local_tasks.append(len(node.children) - 1) child_data_slice = data_slicer(local_data, [col], num_conditional_cols) local_children_params.append((child_data_slice, ds_context, [scope[col]])) result_nodes = pool.starmap(create_leaf, local_children_params) # 2. if is_2d=True and #scope>1, Multi Variate Gaussian leaf will be created # the factorization ends with pairs of coefs else: if local_data.shape[0]==1: # trick, to avoid single instance? by zhongjie local_data = np.concatenate([local_data, local_data], axis=0) for col in range(len(scope)): # if it is not freq 0 or freq \Pi, consider first only the real part of coef if l_rfft-1 > scope[col] % (l_rfft * 2) > 0: node.children.append(None) local_tasks.append(len(node.children) - 1) # then find corresponding imag coefs --> scope[real]+l_rfft child_data_slice = data_slicer(local_data, [col, scope.index(scope[col]+l_rfft)], num_conditional_cols) local_children_params.append((child_data_slice, ds_context, [scope[col], scope[col]+l_rfft])) # if it is freq 0 or freq \pi, which has no imag part, do normally elif scope[col] % (l_rfft * 2)==0 or scope[col] % (l_rfft * 2)==l_rfft-1: node.children.append(None) # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True)) local_tasks.append(len(node.children) - 1) child_data_slice = data_slicer(local_data, [col], num_conditional_cols) local_children_params.append((child_data_slice, ds_context, [scope[col]])) result_nodes = pool.starmap(create_leaf, local_children_params) # result_nodes = [] # for l in tqdm(local_children_params): # result_nodes.append(create_leaf(*l)) # result_nodes = [create_leaf(*l) for l in local_children_params] for child_pos, child in zip(local_tasks, result_nodes): node.children[child_pos] = child split_end_t = perf_counter() logging.debug( "\t\tnaive factorization {} columns (in {:.5f} secs)".format(len(scope), split_end_t - split_start_t) ) continue elif operation == Operation.CREATE_LEAF: leaf_start_t = perf_counter() node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node leaf_end_t = perf_counter() logging.debug( "\t\t created leaf {} for scope={} (in {:.5f} secs)".format( node.__class__.__name__, scope, leaf_end_t - leaf_start_t ) ) else: raise Exception("Invalid operation: " + operation) node = root.children[0] assign_ids(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err node = Prune(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err return node