def likelihood(node, data, dtype=np.float64, node_likelihood=_node_likelihood, lls_matrix=None, debug=False): assert len(data.shape) == 2, "data must be 2D, found: {}".format( data.shape) all_results = {} vf = None if debug: def val_funct(node, ll): assert ll.shape == ( data.shape[0], 1), "node %s result has to match dimensions (N,1)" % (node.id) assert not np.all(np.isnan(ll)), "ll is nan %s " % (node.id) vf = val_funct result = eval_spn_bottom_up(node, node_likelihood, all_results=all_results, input_vals=data, after_eval_function=vf, debug=debug, dtype=dtype) if lls_matrix is not None: for n, ll in all_results.items(): lls_matrix[:, n.id] = ll[:, 0] return result
def Moment(spn, feature_scope, evidence_scope, evidence, node_moment=_node_moment, order=1): """Compute the moment: E[X_feature_scope | X_evidence_scope] given the spn and the evidence data Keyword arguments: spn -- the spn to compute the probabilities from feature_scope -- set() of integers, the scope of the features to get the moment from evidence_scope -- set() of integers, the scope of the evidence features evidence -- numpy 2d array of the evidence data """ if evidence_scope is None: evidence_scope = set() assert not (len(evidence_scope) > 0 and evidence is None) assert len(feature_scope.intersection(evidence_scope)) == 0 marg_spn = marginalize(spn, keep=feature_scope | evidence_scope) node_moments = _node_moment node_moments.update({Sum: sum_moment, Product: prod_moment}) if evidence is None: # fake_evidence is not used fake_evidence = np.zeros((1, len(spn.scope))).reshape(1,-1) moment = eval_spn_bottom_up(marg_spn, node_moments, order=order) return moment # if we have evidence, we want to compute the conditional moment else: raise NotImplementedError('Please use a conditional SPN to calculated conditional moments') return moment
def likelihood(node, data, dtype=np.float64, node_likelihood=_node_likelihood, lls_matrix=None, debug=False, **kwargs): all_results = {} if debug: assert len(data.shape) == 2, "data must be 2D, found: {}".format(data.shape) original_node_likelihood = node_likelihood def exec_funct(node, *args, **kwargs): assert node is not None, "node is nan " funct = original_node_likelihood[type(node)] ll = funct(node, *args, **kwargs) assert ll.shape == (data.shape[0], 1), "node %s result has to match dimensions (N,1)" % node.id assert not np.any(np.isnan(ll)), "ll is nan %s " % node.id return ll node_likelihood = {k: exec_funct for k in node_likelihood.keys()} result = eval_spn_bottom_up(node, node_likelihood, all_results=all_results, debug=debug, dtype=dtype, data=data, **kwargs) if lls_matrix is not None: for n, ll in all_results.items(): lls_matrix[:, n.id] = ll[:, 0] return result
def spn_to_sympy(spn, node_to_sympy=_node_to_sympy, log=False): input_vars = sp.symbols("x:%s" % len(spn.scope)) sympy_ecc = eval_spn_bottom_up(spn, node_to_sympy, input_vars=input_vars, log=log) return sympy_ecc
def condition(spn, evidence): scope = set( [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]]) node_conditions = { type(leaf): leaf_condition for leaf in get_nodes_by_type(spn, Leaf) } node_conditions.update({Sum: sum_condition, Product: prod_condition}) new_root, val = eval_spn_bottom_up(spn, node_conditions, input_vals=evidence, scope=scope) assign_ids(new_root) return Prune(new_root)
def spn_to_tf_graph(node, data, node_tf_graph=_node_log_tf_graph, log_space=True): tf.reset_default_graph() # data is a placeholder, with shape same as numpy data data_placeholder = tf.placeholder(data.dtype, (None, data.shape[1])) variable_dict = {} tf_graph = eval_spn_bottom_up(node, node_tf_graph, input_vals=data_placeholder, log_space=log_space, variable_dict=variable_dict, dtype=data.dtype) return tf_graph, data_placeholder, variable_dict
def gradient(spn, evidence): """ Computes a forward propagated gradient through the spn. This function currently assumes a tree structured SPN! :param spn: :param evidence: :return: """ _node_gradients[Sum] = sum_gradient_forward _node_gradients[Product] = prod_gradient_forward node_gradients = _node_gradients gradients = eval_spn_bottom_up(spn, node_gradients, input_vals=evidence) return gradients
def group_by_combinations(spn, ds_context, feature_scope, ranges, node_distinct_vals=None, node_likelihoods=None): """ Computes the distinct value combinations for features given the range conditions. """ evidence_scope = set([i for i, r in enumerate(ranges[0]) if r is not None]) evidence = ranges # make feature scope sorted feature_scope_unsorted = copy.copy(feature_scope) feature_scope.sort() # add range conditions to feature scope (makes checking with bloom filters easier) feature_scope = list(set(feature_scope) .union(evidence_scope.intersection(np.where(ds_context.no_unique_values <= 1200)[0]))) feature_scope.sort() inverted_order = [feature_scope.index(scope) for scope in feature_scope_unsorted] assert not (len(evidence_scope) > 0 and evidence is None) relevant_scope = set() relevant_scope.update(evidence_scope) relevant_scope.update(feature_scope) marg_spn = marginalize(spn, relevant_scope) def leaf_expectation(node, data, dtype=np.float64, **kwargs): if node.scope[0] in feature_scope: t_node = type(node) if t_node in node_distinct_vals: vals = node_distinct_vals[t_node](node, evidence) return vals else: raise Exception('Node type unknown: ' + str(t_node)) return likelihood(node, evidence, node_likelihood=node_likelihoods) node_expectations = {type(leaf): leaf_expectation for leaf in get_nodes_by_type(marg_spn, Leaf)} node_expectations.update({Sum: sum_group_by, Product: prod_group_by}) result = eval_spn_bottom_up(marg_spn, node_expectations, all_results={}, data=evidence, dtype=np.float64) if feature_scope_unsorted == feature_scope: return result scope, grouped_tuples = result return feature_scope_unsorted, set( [tuple(group_tuple[i] for i in inverted_order) for group_tuple in grouped_tuples])
def Moment(spn, feature_scope=None, node_moment=_node_moment, node_likelihoods=_node_likelihood, order=1): """ Computes moments from an spn :param spn: a valid spn :param feature_scope: optional list of features on which to compute the moments :param node_moment: optional list of node moment functions :param node_likelihoods: optional list of node likelihood functions :param order: the order of the moment to compute :return: an np array of computed moments """ if feature_scope is None: feature_scope = spn.scope feature_scope = list(feature_scope) assert len(feature_scope) == len( list(feature_scope)), "Found double entries in feature list" marg_spn = marginalize(spn, feature_scope) node_moments = {Sum: sum_moment, Product: prod_moment} for node in get_node_types(marg_spn, Leaf): try: moment = node_moment[node] node_ll = node_likelihoods[node] except KeyError: raise AssertionError( "Node type {} doe not have associated moment and likelihoods". format(node)) node_moments[node] = leaf_moment(moment, node_ll) results = np.full((1, max(spn.scope) + 1), np.nan) moment = eval_spn_bottom_up(marg_spn, node_moments, order=order, result_array=results) return moment[:, feature_scope]
def spn_to_tf_graph(node, data, batch_size=None, node_tf_graph=_node_log_tf_graph, log_space=True, dtype=None): tf.reset_default_graph() if not dtype: dtype = data.dtype # data is a placeholder, with shape same as numpy data data_placeholder = tf.placeholder(dtype, (batch_size, data.shape[1])) variable_dict = {} tf_graph = eval_spn_bottom_up(node, node_tf_graph, data_placeholder=data_placeholder, log_space=log_space, variable_dict=variable_dict, dtype=dtype) return tf_graph, data_placeholder, variable_dict
def likelihood(node, data, dtype=np.float64, node_likelihood=_node_likelihood, lls_matrix=None, debug=False, bmarg=None, ibm=None): assert len(data.shape) == 2, "data must be 2D, found: {}".format( data.shape) all_results = {} if debug: node_likelihood_with_validation = {} for k, funct in node_likelihood.items(): def exec_funct(node, children, data=None, dtype=np.float64): ll = funct(node, children, data=data, dtype=dtype) assert ll.shape == ( data.shape[0], 1 ), "node %s result has to match dimensions (N,1)" % (node.id) assert not np.all(np.isnan(ll)), "ll is nan %s " % (node.id) return ll node_likelihood_with_validation[k] = exec_funct node_likelihood = node_likelihood_with_validation result = eval_spn_bottom_up(node, node_likelihood, all_results=all_results, debug=debug, dtype=dtype, data=data, bmarg=bmarg, ibm=ibm) if lls_matrix is not None: for n, ll in all_results.items(): lls_matrix[:, n.id] = ll[:, 0] return result
def joined_means(spn): """Compute the joined mean: E[XY] TODO: Currently, only unconditional correlatios are implemented Keyword arguments: spn -- the spn to compute the probabilities from """ node_functions = { type(leaf): node_correlation for leaf in get_nodes_by_type(spn, Leaf) } node_functions.update({Sum: sum_correlation, Product: prod_correlation}) expectation = eval_spn_bottom_up(spn, node_functions, full_scope=len(spn.scope)) return expectation