def test_accumulate_graph_edges(self): g = hg.get_4_adjacency_graph((2, 3)) edge_weights = np.asarray((1, 2, 3, 4, 6, 5, 7)) res1 = hg.accumulate_graph_edges(g, edge_weights, hg.Accumulators.max) ref1 = (2, 4, 6, 5, 7, 7) self.assertTrue(np.all(res1 == ref1)) edge_weights2 = np.asarray( ((1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 9))) res2 = hg.accumulate_graph_edges(g, edge_weights2, hg.Accumulators.sum) ref2 = np.asarray( ((3, 11), (8, 13), (8, 6), (8, 6), (17, 13), (12, 11))) self.assertTrue(np.all(res2 == ref2))
def attribute_contour_strength(tree, edge_weights, vertex_perimeter=None, edge_length=None, leaf_graph=None): """ Strength of the contour of each node of the given tree. The strength of the contour of a node is defined as the mean edge weights on the contour. :param tree: input tree (Concept :class:`~higra.CptHierarchy`) :param edge_weights: edge_weights of the leaf graph :param vertex_perimeter: perimeter of each vertex of the leaf graph (provided by :func:`~higra.attribute_vertex_perimeter` on `leaf_graph`) :param edge_length: length of each edge of the leaf graph (provided by :func:`~higra.attribute_edge_length` on `leaf_graph`) :param leaf_graph: (deduced from :class:`~higra.CptHierarchy`) :return: a 1d array """ if vertex_perimeter is None: vertex_perimeter = hg.attribute_vertex_perimeter(leaf_graph) if edge_length is None: edge_length = hg.attribute_edge_length(leaf_graph) perimeter = attribute_contour_length(tree, vertex_perimeter, edge_length, leaf_graph) # perimeter of the root may be null if np.isclose(perimeter[-1], 0): perimeter[-1] = 1 if hg.CptRegionAdjacencyGraph.validate(leaf_graph): edge_weights = hg.rag_accumulate_on_edges(leaf_graph, hg.Accumulators.sum, edge_weights) vertex_weights_sum = hg.accumulate_graph_edges(leaf_graph, edge_weights, hg.Accumulators.sum) edge_weights_sum = attribute_contour_length(tree, vertex_weights_sum, edge_weights, leaf_graph) return edge_weights_sum / perimeter
def attribute_vertex_perimeter(graph, edge_length=None): """ Vertex perimeter of the given graph. The perimeter of a vertex is defined as the sum of the length of out-edges of the vertex. If the input graph has an attribute value `no_border_vertex_out_degree`, then each vertex perimeter is assumed to be equal to this attribute value. This is a convenient method to handle image type graphs where an outer border has to be considered. :param graph: input graph :param edge_length: length of the edges of the input graph (provided by :func:`~higra.attribute_edge_length` on `graph`) :return: a nd array """ if edge_length is None: edge_length = hg.attribute_edge_length(graph) special_case_border_graph = hg.get_attribute( graph, "no_border_vertex_out_degree") if special_case_border_graph is not None: res = np.full((graph.num_vertices(), ), special_case_border_graph, dtype=np.float64) res = hg.delinearize_vertex_weights(res, graph) return res res = hg.accumulate_graph_edges(graph, edge_length, hg.Accumulators.sum) res = hg.delinearize_vertex_weights(res, graph) return res
def attribute_tree_sampling_probability(tree, leaf_graph, leaf_graph_edge_weights, model='edge'): """ Given a tree :math:`T`, estimate the probability that a node :math:`n` of the tree represents the smallest cluster containing a pair of vertices :math:`\{a, b\}` of the graph :math:`G=(V, E)` with edge weights :math:`w`. This method is defined in [1]_. We define the probability :math:`P(\{a,b\})` of a pair of vertices :math:`\{a,b\}` as :math:`w(\{a,b\}) / Z` with :math:`Z=\sum_{e\in E}w(E)` if :math:`\{a,b\}` is an edge of :math:`G` and 0 otherwise. Then the probability :math:`P(a)` of a vertex :math:`b` is defined as :math:`\sum_{b\in V}P(\{a, b\})` Two sampling strategies are proposed for sampling pairs of vertices to compute the probability of a node of the tree: - *edge*: the probability of sampling the pair :math:`\{a, b\}` is given by :math:`P(\{a, b\})`; and - *null*: the probability of sampling the pair :math:`\{a, b\}` is given by the product of the probabilities of :math:`a` and :math:`b`: :math:`P(a)*P(b)`. Assuming that the edge weights on the leaf graph of a hierarchy represents similarities: .. epigraph:: *We expect these distributions to differ significantly if the tree indeed represents the hierarchical structure of the graph. Specifically, we expect [the edge distribution] to be mostly concentrated on deep nodes of the tree (far from the root), as two nodes* :math:`u`, :math:`v` *connected with high weight* :math:`w(\{u, v\})` *in the graph typically belong to a small cluster, representative of the clustering structure of the graph; on the contrary, we expect [the null distribution] to be concentrated over shallow nodes (close to the root) as two nodes* :math:`w(\{u, v\})` *sampled independently at random typically belong to large clusters, less representative of the clustering structure of the graph*. [1]_ .. [1] Charpentier, B. & Bonald, T. (2019). `"Tree Sampling Divergence: An Information-Theoretic Metric for \ Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI. :Complexity: The tree sampling divergence runtime complexity depends of the sampling model: - *edge*: :math:`\mathcal{O}(N\log(N) + M)` with :math:`N` the number of nodes in the tree and :math:`M` the number of edges in the leaf graph. - *null*: :math:`\mathcal{O}(N\\times C^2)` with :math:`N` the number of nodes in the tree and :math:`C` the maximal number of children of a node in the tree. :see: The :func:`~higra.tree_sampling_divergence` is a non supervised hierarchical cost function defined as the Kullback-Leibler divergence between the edge sampling model and the independent (null) sampling model. :param tree: Input tree :param leaf_graph: Graph defined on the leaves of the input tree :param leaf_graph_edge_weights: Edge weights of the leaf graphs (similarities) :param model: defines the edge sampling strategy, either "edge" or "null" :return: a 1d array """ if model not in ("edge", "null"): raise ValueError("Parameter 'model' must be either 'edge' or 'null'.") if model == 'edge': lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph) leaf_graph_edge_weights = leaf_graph_edge_weights / np.sum(leaf_graph_edge_weights) return hg.accumulate_at(lca_map, leaf_graph_edge_weights, hg.Accumulators.sum) else: # model = 'null' leaf_graph_vertex_weights = hg.accumulate_graph_edges(leaf_graph, leaf_graph_edge_weights, hg.Accumulators.sum) leaf_graph_vertex_weights = leaf_graph_vertex_weights / np.sum(leaf_graph_edge_weights) tree_node_weights = hg.accumulate_sequential(tree, leaf_graph_vertex_weights, hg.Accumulators.sum) return hg.attribute_children_pair_sum_product(tree, tree_node_weights)