def EM_optimization(spn, data, iterations=5, node_updates=_node_updates, skip_validation=False, **kwargs): if not skip_validation: valid, err = is_valid(spn) assert valid, "invalid spn: " + err lls_per_node = np.zeros((data.shape[0], get_number_of_nodes(spn))) for _ in range(iterations): # one pass bottom up evaluating the likelihoods log_likelihood(spn, data, dtype=data.dtype, lls_matrix=lls_per_node) gradients = gradient_backward(spn, lls_per_node) R = lls_per_node[:, 0] for node_type, func in node_updates.items(): for node in get_nodes_by_type(spn, node_type): func( node, node_lls=lls_per_node[:, node.id], node_gradients=gradients[:, node.id], root_lls=R, all_lls=lls_per_node, all_gradients=gradients, data=data, **kwargs )
def EM_optimization(spn, data, iterations=5, node_updates=_node_updates, **kwargs): for _ in range(iterations): lls_per_node = np.zeros((data.shape[0], get_number_of_nodes(spn))) # one pass bottom up evaluating the likelihoods log_likelihood(spn, data, dtype=data.dtype, lls_matrix=lls_per_node) gradients = gradient_backward(spn, lls_per_node) R = lls_per_node[:, 0] for node_type, func in node_updates.items(): # TODO: do in parallel for node in get_nodes_by_type(spn, node_type): func(node, node_lls=lls_per_node[:, node.id], node_gradients=gradients[:, node.id], root_lls=R, all_lls=lls_per_node, all_gradients=gradients, data=data, **kwargs)
def feature_gradient(node, data, node_gradient_functions=_node_feature_gradients, lls_per_node=None): """ Feature gradients are computed for the input query and each feature using the backwards automatic differentiation. In mathematicl terms, it computes the partial derivatives \partial P(X) / \partial X_i :param node: Node for the gradient calculation :param data: data for the computation. NaN values are implicitely marginalized out :param lls_per_node: optional for storing the intermediate results """ all_leaves = get_nodes_by_type(node, Leaf) if not lls_per_node: lls_per_node = np.full((data.shape[0], get_number_of_nodes(node)), np.nan) log_likelihood(node, data, lls_matrix=lls_per_node) gradients = np.exp(gradient_backward(node, lls_per_node)) node_gradients = [] for spn_node in all_leaves: i = spn_node.id result = node_gradient_functions[type(spn_node)](spn_node, data) node_gradients.append(result * gradients[:, i].reshape(-1, 1)) node_gradients = np.array(node_gradients) return np.nansum(node_gradients, axis=0)
def generate_code(spn_id, spn, meta_types, floating_data_type): """ Generates inference code for an SPN :param target_path: the path the generated C++ code is written to :param floating_data_type: data type floating numbers are represented in generated C++ code :param spn: root node of an SPN :return: code string """ # make sure we have ids assign_ids(spn) # fill method body according to SPN structure method_body = generate_method_body(spn, spn, floating_data_type, 0) # build parameters used in generated c++ function method_params = [] passed_params = [] for i, type in enumerate(meta_types): if type == MetaType.DISCRETE: method_params += [ f'vector <int> possibleValues{i}', f'int nullValueIdx{i}' ] passed_params += [ f'py::arg("possibleValues{i}")', f'py::arg("nullValueIdx{i}")' ] elif type == MetaType.REAL: method_params += [ f'bool inverse{i}', f'bool leftMinusInf{i}', f'float leftCondition{i}', f'bool rightMinusInf{i}', f'float rightCondition{i}', f'bool leftIncluded{i}', f'bool rightIncluded{i}', f'float nullValue{i}' ] passed_params += [ f'py::arg("inverse{i}")', f'py::arg("leftMinusInf{i}")', f'py::arg("leftCondition{i}")', f'py::arg("rightMinusInf{i}")', f'py::arg("rightCondition{i}")', f'py::arg("leftIncluded{i}")', f'py::arg("rightIncluded{i}")', f'py::arg("nullValue{i}")' ] value_dictionary = { 'spn_id': spn_id, 'method_body': method_body, 'method_params': ', '.join(method_params), 'node_count': get_number_of_nodes(spn), 'passed_params': ', '.join(passed_params), 'floating_data_type': floating_data_type } generated_method = replace_template(TemplatePath.METHOD_MASTER, value_dictionary, 0) registrate_method = replace_template(TemplatePath.REGISTRATION_MASTER, value_dictionary, 0) return generated_method, registrate_method
def get_node_description(spn, parent_node, size): # parent_node.validate() parent_type = type(parent_node).__name__ node_descriptions = dict() node_descriptions['num'] = len(parent_node.children) nodes = list() for i, node in enumerate(parent_node.children): node_spn = Copy(node) assign_ids(node_spn) node_dir = dict() node_dir[ 'weight'] = parent_node.weights[i] if parent_type == 'Sum' else 1 node_dir['size'] = get_number_of_nodes(node) - 1 node_dir['num_children'] = len( node.children) if not isinstance(node, Leaf) else 0 node_dir['leaf'] = isinstance(node, Leaf) node_dir['type'] = type(node).__name__ + ' Node' node_dir['split_features'] = [ list(c.scope) for c in node.children ] if not isinstance(node, Leaf) else node.scope node_dir['split_features'].sort(key=lambda x: len(x)) node_dir['depth'] = get_depth(node) node_dir['child_depths'] = [get_depth(c) for c in node.children] descriptor = node_dir['type'] if all((d == 0 for d in node_dir['child_depths'])): descriptor = 'shallow ' + descriptor node_dir['quick'] = 'shallow' elif len([d for d in node_dir['child_depths'] if d == 0]) == 1: node_dir['quick'] = 'split_one' descriptor += ', which separates one feature' else: node_dir['quick'] = 'deep' descriptor = 'deep ' + descriptor descriptor = 'a ' + descriptor node_dir['descriptor'] = descriptor node_dir['short_descriptor'] = descriptor node_dir['representative'] = mpe(node_spn, np.array([[np.nan] * size])) nodes.append(node_dir) node_descriptions['shallow'] = len( [d for d in nodes if d['quick'] == 'shallow']) node_descriptions['split_one'] = len( [d for d in nodes if d['quick'] == 'split_one']) node_descriptions['deep'] = len([d for d in nodes if d['quick'] == 'deep']) nodes.sort(key=lambda x: x['weight']) nodes.reverse() node_descriptions['nodes'] = nodes return node_descriptions
def test_binary_serialization_roundtrip(tmpdir): """Tests the binary serialization for SPFlow SPNs by round-tripping a simple SPN through serialization and de-serialization and comparing the graph-structure before and after serialization & de-serialization.""" h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=1) h2 = Histogram([0., 1., 2.], [0.45, 0.55], [1, 1], scope=2) h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=1) h4 = Histogram([0., 1., 2.], [0.875, 0.125], [1, 1], scope=2) p0 = Product(children=[h1, h2]) p1 = Product(children=[h3, h4]) spn = Sum([0.3, 0.7], [p0, p1]) model = SPNModel(spn, featureValueType="uint32") query = JointProbability(model) binary_file = os.path.join(tmpdir, "test.bin") print(f"Test binary file: {binary_file}") BinarySerializer(binary_file).serialize_to_file(query) deserialized = BinaryDeserializer(binary_file).deserialize_from_file() assert (isinstance(deserialized, JointProbability)) assert (deserialized.batchSize == query.batchSize) assert (deserialized.errorModel.error == query.errorModel.error) assert (deserialized.errorModel.kind == query.errorModel.kind) assert (deserialized.graph.featureType == model.featureType) assert (deserialized.graph.name == model.name) deserialized = deserialized.graph.root assert get_number_of_nodes(spn) == get_number_of_nodes(deserialized) assert get_number_of_nodes(spn, Sum) == get_number_of_nodes(deserialized, Sum) assert get_number_of_nodes(spn, Product) == get_number_of_nodes( deserialized, Product) assert get_number_of_nodes(spn, Histogram) == get_number_of_nodes( deserialized, Histogram) assert get_number_of_edges(spn) == get_number_of_edges(deserialized)
def EM_optimization(spn, data, iterations=5, node_updates=_node_updates, skip_validation=False, **kwargs): if not skip_validation: valid, err = is_valid(spn) assert valid, "invalid spn: " + err lls_per_node = np.zeros((data.shape[0], get_number_of_nodes(spn))) # node_updates = {Sum_sharedWeights: sum_em_update_shared} for _ in range(iterations): # one pass bottom up evaluating the likelihoods log_likelihood(spn, data, lls_matrix=lls_per_node) # dtype=data.dtype gradients = gradient_backward(spn, lls_per_node) weights = [ node.weights if isinstance(node, Sum_sharedWeights) else None for node in get_nodes_by_type(spn) ] R = lls_per_node[:, 0] for node_type, func in node_updates.items(): for node in get_nodes_by_type(spn, node_type): func(node, node_lls=lls_per_node[:, node.id], node_gradients=gradients[:, node.id], root_lls=R, all_lls=lls_per_node, all_gradients=gradients, data=data, spn=spn, weights=weights, **kwargs)
def _serialize_model(self, model): msg = spflow_capnp.Model.new_message() assert is_valid(model.root), "SPN invalid before serialization" # Assign (new) IDs to the nodes # Keep track of already assigned IDs, so the IDs are # unique for the whole file. assign_ids(model.root, self.assignedIDs) # Rebuild scopes bottom-up rebuild_scopes_bottom_up(model.root) msg.rootNode = model.root.id msg.numFeatures = len(model.root.scope) msg.featureType = model.featureType scope = msg.init("scope", len(model.root.scope)) for i,v in enumerate(model.root.scope): scope[i] = self._unwrap_value(v) name = "" if model.name is not None: name = model.name msg.name = name numNodes = get_number_of_nodes(model.root) nodes = msg.init("nodes", numNodes) nodeList = ListHandler(nodes) self._serialize_graph([model.root], nodeList) return msg