Пример #1
0
    def test_area(self):
        tree, altitudes = TestAttributes.get_test_tree()

        ref_area = [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 4, 7, 9]
        area = hg.attribute_area(tree)
        self.assertTrue(np.allclose(ref_area, area))

        leaf_area = np.asarray([1, 2, 1, 1, 2, 1, 1, 1, 3])
        ref_area = [1, 2, 1, 1, 2, 1, 1, 1, 3, 3, 2, 3, 2, 5, 5, 10, 13]
        area = hg.attribute_area(tree, vertex_area=leaf_area)
        self.assertTrue(np.allclose(ref_area, area))
Пример #2
0
    def test_area_default_param(self):
        g = hg.get_4_adjacency_graph((2, 3))
        edge_weights = np.asarray((1, 4, 6, 5, 2, 7, 3))

        ref_area = (1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 6)

        tree, altitudes = hg.bpt_canonical(g, edge_weights)
        area = hg.attribute_area(tree)
        self.assertTrue(np.all(ref_area == area))

        tree2 = hg.Tree(tree.parents())
        area2 = hg.attribute_area(tree2)
        self.assertTrue(np.all(ref_area == area2))
Пример #3
0
def dasgupta_cost(tree, edge_weights, leaf_graph):
    """
    Dasgupta's cost is an unsupervised measure of the quality of a hierarchical clustering of an edge weighted graph.

    Let :math:`T` be a tree representing a hierarchical clustering of the graph :math:`G=(V, E)`.
    Let :math:`w` be a dissimilarity function on the edges :math:`E` of the graph.

    The Dasgupta's cost is define as:

    .. math::

        dasgupta(T, V, E, w) = \sum_{\{x,y\}\in E} \\frac{area(lca_T(x,y))}{w(\{x,y\})}

    :See:

        S. Dasgupta. "`A cost function for similarity-based hierarchical clustering <https://arxiv.org/pdf/1510.05043.pdf>`_ ."
        In Proc. STOC, pages 118–127, Cambridge, MA, USA, 2016

    :Complexity:

    The runtime complexity is :math:`\mathcal{O}(n\log(n) + m)` with :math:`n` the number of nodes in :math:`T` and
    :math:`m` the number of edges in :math:`E`.

    :param tree: Input tree
    :param edge_weights: Edge weights on the leaf graph (dissimilarities)
    :param leaf_graph: Leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :return: a real number
    """
    area = hg.attribute_area(tree, leaf_graph=leaf_graph)

    lcaf = hg.make_lca_fast(tree)
    lca = lcaf.lca(leaf_graph)

    return np.sum(area[lca] / edge_weights)
Пример #4
0
def dendrogram_purity_naif(tree, leaf_labels):
    from itertools import combinations

    tree.lowest_common_ancestor_preprocess()
    area = hg.attribute_area(tree)
    max_label = np.max(leaf_labels)
    label_histo = np.zeros((tree.num_leaves(), max_label + 1), dtype=np.int64)
    label_histo[np.arange(tree.num_leaves()), leaf_labels] = 1
    label_histo = hg.accumulate_sequential(tree, label_histo,
                                           hg.Accumulators.sum)
    class_purity = label_histo / area[:, None]

    count = 0
    total = 0
    for label in set(leaf_labels):
        same = leaf_labels == label
        same_indices, = same.nonzero()

        if len(same_indices) < 2:
            continue

        pairs = list(combinations(same_indices, 2))
        count += len(pairs)

        pairs = np.asarray(pairs, dtype=np.int64)
        lcas = tree.lowest_common_ancestor(pairs[:, 0], pairs[:, 1])
        total += np.sum(class_purity[lcas, label])

    return total / count
Пример #5
0
def watershed_hierarchy_by_volume(graph, edge_weights, vertex_area=None):
    """
    Watershed hierarchy by volume.

    The definition of hierarchical watershed follows the one given in:

        J. Cousty, L. Najman.
        `Incremental algorithm for hierarchical minimum spanning forests and saliency of watershed cuts <https://hal-upec-upem.archives-ouvertes.fr/hal-00622505/document>`_.
        ISMM 2011: 272-283.

    The algorithm used is described in:

        Laurent Najman, Jean Cousty, Benjamin Perret.
        `Playing with Kruskal: Algorithms for Morphological Trees in Edge-Weighted Graphs <https://hal.archives-ouvertes.fr/file/index/docid/798621/filename/ismm2013-algo.pdf>`_.
        ISMM 2013: 135-146.

    :param graph: input graph
    :param edge_weights: input graph edge weights
    :param vertex_area: area of the input graph vertices (provided by :func:`~higra.attribute_vertex_area`)
    :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes
    """
    if vertex_area is None:
        vertex_area = hg.attribute_vertex_area(graph)

    vertex_area = hg.linearize_vertex_weights(vertex_area, graph)

    return watershed_hierarchy_by_attribute(
        graph, edge_weights, lambda tree, altitudes: hg.attribute_volume(
            tree, altitudes, hg.attribute_area(tree, vertex_area)))
Пример #6
0
def hierarchy_to_optimal_MumfordShah_energy_cut_hierarchy(tree,
                                                          vertex_weights,
                                                          leaf_graph,
                                                          approximation_piecewise_linear_function=10):
    """
    Transform the given hierarchy into an optimal energy cut hierarchy using the piecewise constant Mumford-Shah energy
    (see function :func:`~higra.hierarchy_to_optimal_energy_cut_hierarchy`).

    In this context:

        - the data fidelity energy assumes a piecewise constant model in each node and is given by the variance of the vertex values inside the node  (see function :func:`~higra.attribute_gaussian_region_weights_model`) multiplied by its area,
        - the regularity energy is given by the length of the contour of the node (see function :func:`~higra.attribute_contour_length`).

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param vertex_weights: vertex weights of the leaf graph of the input tree
    :param leaf_graph: leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :param approximation_piecewise_linear_function: Maximum number of pieces used in the approximated piecewise linear model for the energy function (default 10).
    :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes
    """
    area = hg.attribute_area(tree, leaf_graph=leaf_graph)
    _, variance = hg.attribute_gaussian_region_weights_model(tree, vertex_weights, leaf_graph)
    perimeter = hg.attribute_contour_length(tree, leaf_graph=leaf_graph)

    if variance.ndim > 1:
        variance = np.trace(variance, axis1=1, axis2=2)

    return hierarchy_to_optimal_energy_cut_hierarchy(tree, variance * area, perimeter,
                                                     int(approximation_piecewise_linear_function))
Пример #7
0
def patches_batch_tos_area_p(batch_image, i, j, l):
    tree, altitudes = hg.component_tree_tree_of_shapes_image2d(
        batch_image[i, :, :, 0])
    area = hg.attribute_area(tree)
    batch_image[i, :, :,
                j + 1] = hg.reconstruct_leaf_data(tree, altitudes, area < l)
    return batch_image[i, :, :, j + 1]
Пример #8
0
def attribute_mean_vertex_weights(tree, vertex_weights, area=None, leaf_graph=None):
    """
    Mean vertex weights of the leaf graph vertices inside each node of the given tree.

    For any node :math:`n`, the mean vertex weights :math:`a(n)` of :math:`n` is

    .. math::

        a(n) = \\frac{\sum_{x\in n} vertex\_weights(x)}{area(n)}

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param vertex_weights: vertex weights of the leaf graph of the input tree
    :param area: area of the tree nodes  (provided by :func:`~higra.attribute_area`)
    :param leaf_graph: leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :return: a nd array
    """
    if area is None:
        area = hg.attribute_area(tree)

    if leaf_graph is not None:
        vertex_weights = hg.linearize_vertex_weights(vertex_weights, leaf_graph)

    attribute = hg.accumulate_sequential(
        tree,
        vertex_weights.astype(np.float64),
        hg.Accumulators.sum) / area.reshape([-1] + [1] * (vertex_weights.ndim - 1))
    return attribute
Пример #9
0
def attribute_compactness(tree,
                          area=None,
                          contour_length=None,
                          normalize=True,
                          leaf_graph=None):
    """
    The compactness of a node is defined as its area divided by the square of its perimeter length.

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param area: node area of the input tree (provided by :func:`~higra.attribute_area` on `tree`)
    :param contour_length: node contour length of the input tree (provided by :func:`~higra.attribute_perimeter_length` on `tree`)
    :param normalize: if True the result is divided by the maximal compactness value in the tree
    :param leaf_graph: (deduced from :class:`~higra.CptHierarchy`)
    :return: a 1d array
    """
    if area is None:
        area = hg.attribute_area(tree)

    if contour_length is None:
        contour_length = hg.attribute_contour_length(tree,
                                                     leaf_graph=leaf_graph)

    compactness = area / (contour_length * contour_length)
    if normalize:
        max_compactness = np.nanmax(compactness)
        compactness = compactness / max_compactness

    return compactness
Пример #10
0
def binary_hierarchy_to_scipy_linkage_matrix(tree, altitudes=None, area=None):
    """
    Converts an Higra binary hierarchy to a SciPy linkage matrix.

    From SciPy documentation:

        An :math:`n-1` by 4 matrix :math:`Z` is returned.
        At the :math:`i`-th iteration, clusters with indices :math:`Z[i, 0]` and :math:`Z[i, 1]` are combined to
        form cluster :math:`n+i`.
        A cluster with an index  less than :math:`n` corresponds to one of the :math:`n` original observations.
        The distance between clusters :math:`Z[i, 0]` and :math:`Z[i, 1]` is given by :math:`Z[i, 2]`.
        The fourth value :math:`Z[i, 3]` represents the number of original observations in the newly formed cluster.

    If :attr:`altitudes` is not specified, the value provided by :func:`~higra.attribute_regular_altitudes`
    on :attr:`tree` is used.

    If :attr:`area` is not specified, the value provided by :func:`~higra.attribute_area` on :attr:`tree` is used.

    :param tree: Input tree
    :param altitudes: Tree nodes altitudes (should be increasing w.r.t tree)
    :param area: Tree nodes area (should be increasing w.r.t tree)
    :return: A linkage matrix
    """

    if altitudes is None:
        altitudes = hg.attribute_regular_altitudes(tree)

    if area is None:
        area = hg.attribute_area(tree)

    area = hg.cast_to_dtype(area, np.int64)
    return hg.cpp._binary_hierarchy_to_scipy_linkage_matrix(
        tree, altitudes, area)
Пример #11
0
def attribute_piecewise_constant_Mumford_Shah_energy(tree, vertex_weights,
                                                     gamma, leaf_graph):
    """
    Piecewise constant Mumford-Shah energy of each node of the input tree.
    The energy of a node is equal to its data fidelity energy plus gamma times its regularization energy.

    For the piecewise constant Mumford-Shah model:

        - the data fidelity energy assumes a piecewise constant model in each node and is given by the variance of the vertex values inside the node  (see function :func:`~higra.attribute_gaussian_region_weights_model`) multiplied by its area,
        - the regularity energy is given by the length of the contour of the node (see function :func:`~higra.attribute_contour_length`).

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param vertex_weights: vertex weights of the leaf graph of the input tree
    :param gamma: weighting of the regularization term (should be a positive value)
    :param leaf_graph: leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :return: a 1d array measuring the energy of each node the input tree
    """
    area = hg.attribute_area(tree, leaf_graph=leaf_graph)
    _, variance = hg.attribute_gaussian_region_weights_model(
        tree, vertex_weights, leaf_graph)
    perimeter = hg.attribute_contour_length(tree, leaf_graph=leaf_graph)

    if variance.ndim > 1:
        variance = np.trace(variance, axis1=1, axis2=2)

    return variance * area + gamma * perimeter
Пример #12
0
def dendrogram_purity(tree, leaf_labels):
    """
    Weighted average of the purity of each node of the tree with respect to a ground truth
    labelization of the tree leaves.
    
    Let :math:`T` be a tree with leaves :math:`V=\{1, \ldots, n\}`.
    Let :math:`C=\{C_1, \ldots, C_K\}` be a partition of :math:`V` into :math:`k` (label) sets.
    
    The purity of a subset :math:`X` of :math:`V` with respect to class :math:`C_\ell\in C` is the fraction of
    elements of :math:`X` that belongs to class :math:`C_\ell`:
    
    .. math::
    
         pur(X, C_\ell) = \\frac{| X \cap C_\ell |}{| X |}.
    
    The purity of :math:`T` is the defined as:
    
    .. math::
    
         pur(T) = \\frac{1}{Z}\sum_{k=1}^{K}\sum_{x,y\in C_k, x\\neq y} pur(lca_T(x,y), C_k)
    
    with :math:`Z=| \{\{x,y\} \subseteq V \mid x\\neq y, \exists k, \{x,y\}\subseteq C_k\} |`.
    
    :See:
    
         Heller, Katherine A., and Zoubin Ghahramani. "`Bayesian hierarchical clustering <https://www2.stat.duke.edu/~kheller/bhcnew.pdf>`_ ."
         Proc. ICML. ACM, 2005.
    
    :Complexity:
    
    The dendrogram purity is computed in :math:`\mathcal{O}(N\\times K \\times C^2)` with :math:`N` the number of nodes
    in the tree, :math:`K` the number of classes, and :math:`C` the maximal number of children of a node in the tree.

    :param tree: input tree
    :param leaf_labels: a 1d integral array of length `tree.num_leaves()`
    :return:  a score between 0 and 1 (higher is better)
    """
    if leaf_labels.ndim != 1 or leaf_labels.size != tree.num_leaves(
    ) or leaf_labels.dtype.kind != 'i':
        raise ValueError(
            "leaf_labels must be a 1d integral array of length `tree.num_leaves()`"
        )

    num_l = tree.num_leaves()
    area = hg.attribute_area(tree)

    max_label = np.max(leaf_labels)
    num_labels = max_label + 1
    label_histo_leaves = np.zeros((num_l, num_labels), dtype=np.float64)
    label_histo_leaves[np.arange(num_l), leaf_labels] = 1

    label_histo = hg.accumulate_sequential(tree, label_histo_leaves,
                                           hg.Accumulators.sum)
    class_purity = label_histo / area[:, np.newaxis]

    weights = hg.attribute_children_pair_sum_product(tree, label_histo)
    total = np.sum(class_purity[num_l:, :] * weights[num_l:, :])

    return total / np.sum(weights[num_l:])
Пример #13
0
    def test_reconstruct_leaf_data_component_tree_default(self):
        g = hg.get_4_adjacency_implicit_graph((1, 6))
        vertex_values = np.asarray((1, 5, 4, 3, 3, 6), dtype=np.int32)
        tree, altitudes = hg.component_tree_max_tree(g, vertex_values)

        area = hg.attribute_area(tree)
        output = hg.reconstruct_leaf_data(tree, area)
        ref = np.asarray((6, 1, 2, 5, 5, 1), dtype=np.int32)

        self.assertTrue(np.all(ref == output))
Пример #14
0
def patches_batch_tos_area(batch_image, batch_size, lambdas):
    for i in range(batch_size):
        tree, altitudes = hg.component_tree_tree_of_shapes_image2d(
            batch_image[i, :, :, 0])
        area = hg.attribute_area(tree)

        for j in range(len(lambdas)):
            batch_image[i, :, :, j + 1] = hg.reconstruct_leaf_data(
                tree, altitudes, area < lambdas[j])

    return batch_image
Пример #15
0
def attribute_gaussian_region_weights_model(tree,
                                            vertex_weights,
                                            leaf_graph=None):
    """
    Estimates a gaussian model (mean, (co-)variance) for leaf weights inside a node.

    The result is composed of two arrays:

        - the first one contains the mean value inside each node, scalar if vertex weights are scalar and vectorial otherwise,
        - the second one contains the variance of the values inside each node, scalar if vertex weights are scalar and a (biased) covariance matrix otherwise.

    Vertex weights must be scalar or 1 dimensional.

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param vertex_weights: vertex weights of the leaf graph of the input tree
    :param leaf_graph: leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :return: two arrays mean and variance
    """
    if leaf_graph is not None:
        vertex_weights = hg.linearize_vertex_weights(vertex_weights,
                                                     leaf_graph)

    if vertex_weights.ndim > 2:
        raise ValueError(
            "Vertex weight can either be scalar or 1 dimensional.")

    if vertex_weights.dtype not in (np.float32, np.float64):
        vertex_weights = vertex_weights.astype(np.float64)

    area = hg.attribute_area(tree, leaf_graph=leaf_graph)
    mean = hg.accumulate_sequential(tree, vertex_weights, hg.Accumulators.sum,
                                    leaf_graph)

    if vertex_weights.ndim == 1:
        # general case below would work but this is simpler
        mean /= area
        mean2 = hg.accumulate_sequential(tree, vertex_weights * vertex_weights,
                                         hg.Accumulators.sum, leaf_graph)
        mean2 /= area
        variance = mean2 - mean * mean
    else:
        mean /= area[:, None]
        tmp = vertex_weights[:, :, None] * vertex_weights[:, None, :]
        mean2 = hg.accumulate_sequential(tree, tmp, hg.Accumulators.sum,
                                         leaf_graph)
        mean2 /= area[:, None, None]

        variance = mean2 - mean[:, :, None] * mean[:, None, :]

    return mean, variance
Пример #16
0
    def test_watershed_hierarchy_by_attribute(self):
        g = hg.get_4_adjacency_graph((1, 19))
        edge_weights = np.asarray(
            (0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0))
        # watershed hierarchy by area...
        t, altitudes = hg.watershed_hierarchy_by_attribute(
            g, edge_weights, lambda tree, _: hg.attribute_area(tree))

        ref_parents = np.asarray(
            (19, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22,
             23, 23, 23, 24, 24, 25, 26, 26, 25, 27, 27, 27),
            dtype=np.int64)
        ref_tree = hg.Tree(ref_parents)
        ref_altitudes = np.asarray((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 5))

        self.assertTrue(hg.test_tree_isomorphism(t, ref_tree))
        self.assertTrue(np.allclose(altitudes, ref_altitudes))
Пример #17
0
    def test_area_filter_max_tree(self):
        graph = hg.get_4_adjacency_implicit_graph((5, 5))
        vertex_weights = np.asarray(
            ((-5, 2, 2, 5, 5), (-4, 2, 2, 6, 5), (3, 3, 3, 3, 3),
             (-2, -2, -2, 9, 7), (-1, 0, -2, 8, 9)),
            dtype=np.float64)
        tree, altitudes = hg.component_tree_max_tree(graph, vertex_weights)
        area = hg.attribute_area(tree)

        filtered_weights = hg.reconstruct_leaf_data(tree, altitudes, area <= 4)

        expected_filtered_weights = \
            np.asarray(((-5, 2, 2, 3, 3),
                        (-4, 2, 2, 3, 3),
                        (3, 3, 3, 3, 3),
                        (-2, -2, -2, 3, 3),
                        (-2, -2, -2, 3, 3)), dtype=np.float64)

        self.assertTrue(np.all(filtered_weights == expected_filtered_weights))
Пример #18
0
def attribute_volume(tree, altitudes, area=None):
    """
    Volume of each node the given tree.
    The volume :math:`V(n)` of a node :math:`n` is defined recursively as:

    .. math::

        V(n) = area(n) * | altitude(n) - altitude(parent(n)) | +  \sum_{c \in children(n)} V(c)

    :param tree: input tree
    :param altitudes: node altitudes of the input tree
    :param area: area of the nodes of the input hierarchy (provided by :func:`~higra.attribute_area` on `tree`)
    :return: a 1d array
    """
    if area is None:
        area = hg.attribute_area(tree)

    height = np.abs(altitudes[tree.parents()] - altitudes)
    height = height * area
    volume_leaves = np.zeros(tree.num_leaves(), dtype=np.float64)
    return hg.accumulate_and_add_sequential(tree, height, volume_leaves, hg.Accumulators.sum)
Пример #19
0
    def test_attribute_extinction_value2(self):
        graph = hg.get_4_adjacency_implicit_graph((4, 4))
        vertex_weights = np.asarray(
            (0, 1, 4, 4, 7, 5, 6, 8, 2, 3, 4, 1, 9, 8, 6, 7))

        tree, altitudes = hg.component_tree_max_tree(graph, vertex_weights)
        area = hg.attribute_area(tree)

        expected_ext = np.asarray(
            (0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 16, 0, 0, 1, 16, 16, 4, 1, 1,
             16, 4, 4, 16, 16, 16, 16, 16))

        ext = hg.attribute_extinction_value(tree, altitudes, area)
        self.assertTrue(np.all(expected_ext == ext))

        ext = hg.attribute_extinction_value(tree, altitudes, area, False)
        self.assertTrue(np.all(expected_ext == ext))

        ext = hg.attribute_extinction_value(tree, altitudes, area,
                                            "decreasing")
        self.assertTrue(np.all(expected_ext == ext))
Пример #20
0
def loss_cluster_size(graph,
                      edge_weights,
                      ultrametric,
                      hierarchy,
                      top_nodes=0,
                      dtype=tc.float64):
    """
    Cluster size regularization:
    
     .. math::
    
        loss = \\frac{1}{|E|}\sum_{e_{xy}\in E}\\frac{ultrametric(e_{xy})}{\min\{|c|\, | \, c\in Children(lca(x,y))\}}
    
    :param graph: input graph (``higra.UndirectedGraph``)
    :param edge_weights: edge weights of the input graph (``torch.Tensor``, autograd is supported)
    :param ultrametric; ultrametric on the input graph  (``torch.Tensor``, autograd is supported)
    :param hierarchy: optional,  if provided must be a tuple ``(tree, altitudes)`` corresponding to the result of ``higra.bpt_canonical`` on the input edge weighted graph 
    :param top_nodes: if different from 0, only the top ``top_nodes`` of the hiearchy are taken into account in the cluster size regularization
    :return: loss value as a pytorch scalar
    
    """
    tree, altitudes = hierarchy
    lca_map = hg.attribute_lca_map(tree)

    if top_nodes <= 0:
        top_nodes = tree.num_vertices()
    top_nodes = max(tree.num_vertices() - top_nodes, tree.num_leaves())
    top_edges, = np.nonzero(lca_map >= top_nodes)

    area = hg.attribute_area(tree)
    min_area = hg.accumulate_parallel(tree, area, hg.Accumulators.min)
    min_area = min_area[lca_map[top_edges]]
    min_area = tc.tensor(min_area, dtype=dtype)

    cluster_size_loss = ultrametric[top_edges] / min_area

    return cluster_size_loss.mean()
Пример #21
0
def print_partition_tree(tree,
                         *,
                         altitudes=None,
                         attribute=None,
                         float_size=4,
                         ordering="area",
                         scale="linear",
                         return_string=False):
    """
    Print a partition tree in ASCII format.

    The tree is represented as a dendrogram oriented horizontally with the leaves on the left and the root on the right.
    Node positions are proportional to their altitudes.

    This function can be used for debugging and illustrations: it is not meant to handle large trees.

    :param tree: Input tree
    :param altitudes: Tree node altitudes (will default to :func:`~higra.attribute_regular_altitudes(tree)` if ``None``)
    :param attribute: Optional tree node attributes. If provided, the node attribute value will be printed instead
            of its altitude.
    :param float_size: Number of characters reserved for number printing.
    :param ordering: determine how the children of a node are ordered. Possible values are
            'altitudes', 'area', 'none', 'leaves'
    :param scale: scale of the x axis: 'linear' (default) or 'log'
    :param return_string: if ``True``, the string is returned instead of being printed (default ``False``)
    :return: A string if :attr:`return_string` is ``True``, ``None`` otherwise
    """
    # arbitrary !
    nleaves = tree.num_leaves()
    assert nleaves < 100, "Tree has too many leaves for pretty print!"
    if nleaves >= 10:
        leaf_format = "{:2d}"
    else:
        leaf_format = "{:1d}"

    # number printing
    float_size = max(3, int(float_size))
    half_float = max(1, float_size // 2)
    prec = max(1, float_size - 1)
    float_format = "{0:" + str(float_size) + "." + str(prec) + "g}"

    # space between two leaves
    y_spacing = 3

    # normalized altitudes determines parent/child spacing
    if altitudes is None:
        normalized_altitudes = altitudes = hg.attribute_regular_altitudes(tree)
    else:
        min_a = np.min(altitudes[tree.num_leaves():])
        normalized_altitudes = (altitudes - min_a) / (np.max(altitudes) -
                                                      min_a)
        normalized_altitudes[:tree.num_leaves()] = 0

    if scale == "log":
        normalized_altitudes = np.log(1 + normalized_altitudes) / np.log(2)
    elif scale == "linear":
        pass
    else:
        raise ValueError("Invalid scale parameter '" + scale + "'")

    # attribute is what is printed
    if attribute is None:
        attribute = altitudes

    # the minimum difference of altitudes between a child and its parent will determine the total size of the graph
    diff_altitudes = normalized_altitudes[
        tree.parents()] - normalized_altitudes
    min_diff_altitudes = np.min(diff_altitudes[np.nonzero(diff_altitudes)])

    # spacing between two succesors cannot be less than float_size + 3
    total_width = int((float_size + 3) / min_diff_altitudes + 1)
    total_height = (1 + y_spacing) * nleaves - y_spacing

    # arbitrary !
    assert total_width < 1000, "Tree is to deep for pretty print!"

    # "drawing" area
    screen = np.full((total_height, total_width + 10), ' ')

    # y positions
    yy = np.zeros((tree.num_vertices(), ))

    # area is necessary to determine how much space must be "reserved" for each child of a node
    area = hg.attribute_area(tree)

    # how leaves are sorted
    ordering_modes = {
        'altitudes': lambda cl: sorted(cl, key=lambda c: altitudes[c]),
        'area': lambda cl: sorted(cl, key=lambda c: area[c]),
        'none': lambda cl: cl,
        'leaves': None
    }

    if ordering not in ordering_modes:
        raise ValueError('Invalid ordering mode.')
    else:
        ordering = ordering_modes[ordering]

    # special case, not that the branch of the tree might self-intersect...
    if ordering is None:
        yy[:nleaves] = np.arange(0, total_height, y_spacing + 1)
        for n in tree.leaves_to_root_iterator(include_leaves=False):
            yy[n] = np.mean(yy[tree.children(n)])
    else:

        def compute_yy_rec(n, left, right):
            if tree.is_leaf(n):
                yy[n] = (left + right) / 2
            else:
                cl = ordering(tree.children(n))

                r = right - left
                ys = []
                tarea = 0
                narea = area[n]
                for i, c in enumerate(cl):
                    y = compute_yy_rec(c, left + r * tarea / narea,
                                       left + r * (tarea + area[c]) / narea)
                    ys.append(y)
                    tarea += area[c]

                yy[n] = np.mean(ys)

            return yy[n]

        compute_yy_rec(tree.root(), 0, total_height)

    # final scaling along x axis
    # because we substract the mininal non zero value in normalized altitudes,
    # we shift non leaves nodes to separate them from leaves
    xshift = half_float + 1
    x0_util = 0
    x1_util = total_width - xshift
    xr_util = x1_util - x0_util

    xx = np.round(xr_util * normalized_altitudes)
    xx[tree.num_leaves():] += xshift

    def write_string(y, x, s):
        for i, c in enumerate(s):
            screen[y, x + i] = c

    def draw_hline(y, x1, x2):
        if x1 > x2:
            x1, x2 = x2, x1
        for x in range(x1, x2 + 1):
            screen[y, x] = "-"

    def draw_vline(x, y1, y2):
        if y1 > y2:
            y1, y2 = y2, y1
        for y in range(y1, y2 + 1):
            screen[y, x] = "|"

    yy -= 1
    xx = xx.astype(np.int32)
    yy = yy.astype(np.int32)

    for n in tree.leaves_to_root_iterator(include_leaves=False):
        nx = xx[n] + half_float + 1
        ny = yy[n]
        for c in tree.children(n):
            cx = xx[c]
            if not tree.is_leaf(c):
                cx += half_float + 1
            cy = yy[c]
            draw_vline(nx, cy, ny)
            draw_hline(cy, cx, nx)

    for n in tree.leaves():
        s = leaf_format.format(n)
        write_string(yy[n], xx[n], s)

    for n in tree.leaves_to_root_iterator(include_leaves=False):
        s = float_format.format(attribute[n])
        write_string(yy[n], xx[n], s)

    r = []
    for i in range(screen.shape[0]):
        s = screen[i, :].astype('|S1').tostring().decode('utf-8')
        s = s.rstrip()
        if s != "":
            r.append(s)

    r = "\n".join(r)
    if not return_string:
        print(r)
    else:
        return r
Пример #22
0
 def functor(tree, altitudes):
     area = hg.attribute_area(tree)
     area_min_children = hg.accumulate_parallel(tree, area,
                                                hg.Accumulators.min)
     return area_min_children < 3
Пример #23
0
 def non_relevant_functor(tree, _):
     area = hg.attribute_area(tree)
     return hg.accumulate_parallel(tree, area,
                                   hg.Accumulators.min) < size_threshold