Пример #1
0
 def remap_bricks(partition_bricks):
     domain, codomain = mapping_pairs.transpose()
     mapper = LabelMapper(domain, codomain)
     
     partition_bricks = list(partition_bricks)
     for brick in partition_bricks:
         # TODO: Apparently LabelMapper can't handle non-contiguous arrays right now.
         #       (It yields incorrect results)
         #       Check to see if this is still a problem in the latest version of xtensor-python.
         brick.volume = np.asarray( brick.volume, order='C' )
         
         mapper.apply_inplace(brick.volume, allow_unmapped=True)
     return partition_bricks
Пример #2
0
    def remap_bricks(partition_bricks):
        domain, codomain = mapping_pairs.transpose()
        mapper = LabelMapper(domain, codomain)

        partition_bricks = list(partition_bricks)
        for brick in partition_bricks:
            # TODO: Apparently LabelMapper can't handle non-contiguous arrays right now.
            #       (It yields incorrect results)
            #       Check to see if this is still a problem in the latest version of xtensor-python.
            brick.volume = np.asarray(brick.volume, order='C')

            mapper.apply_inplace(brick.volume, allow_unmapped=True)
            brick.compress()
        return partition_bricks
Пример #3
0
def split_disconnected_bodies(labels_orig):
    """
    Produces 3D volume split into connected components.

    This function identifies bodies that are the same label
    but are not connected.  It splits these bodies and
    produces a dict that maps these newly split bodies to
    the original body label.

    Special exception: Segments with label 0 are not relabeled.
    
    Note:
        Requires scikit-image (which, currently, is not otherwise
        listed as a dependency of neuclease's conda-recipe).

    Args:
        labels_orig (numpy.array): 3D array of labels

    Returns:
        (labels_new, new_to_orig)

        labels_new:
            The partially relabeled array.
            Segments that were not split will keep their original IDs.
            Among split segments, the largest 'child' of a split segment retains the original ID.
            The smaller segments are assigned new labels in the range (N+1)..(N+1+S) where N is
            highest original label and S is the number of new segments after splitting.
        
        new_to_orig:
            A pseudo-minimal (but not quite minimal) mapping of labels
            (N+1)..(N+1+S) -> some subset of (1..N),
            which maps new segment IDs to the segments they came from.
            Segments that were not split at all are not mentioned in this mapping,
            for split segments, every mapping pair for the split is returned, including the k->k (identity) pair.
        
        new_unique_labels:
            An array of all label IDs in the newly relabeled volume.
            The original label set can be selected via:
            
                new_unique_labels[new_unique_labels < min(new_to_orig.keys())]
        
    """
    import skimage.measure as skm
    # Compute connected components and cast back to original dtype
    labels_cc = skm.label(labels_orig, background=0, connectivity=1)
    assert labels_cc.dtype == np.int64
    if labels_orig.dtype == np.uint64:
        labels_cc = labels_cc.view(np.uint64)
    else:
        labels_cc = labels_cc.astype(labels_orig.dtype, copy=False)

    # Find overlapping segments between orig and CC volumes
    overlap_table_df = contingency_table(labels_orig, labels_cc).reset_index()
    assert overlap_table_df.columns.tolist() == [
        'left', 'right', 'voxel_count'
    ]
    overlap_table_df.columns = ['orig', 'cc', 'voxels']
    overlap_table_df.sort_values('voxels', ascending=False, inplace=True)

    # If a label in 'orig' is duplicated, it has multiple components in labels_cc.
    # The largest component gets to keep the original ID;
    # the other components must take on new values.
    # (The new values must not conflict with any of the IDs in the original, so start at orig_max+1)
    new_cc_pos = overlap_table_df['orig'].duplicated()
    orig_max = overlap_table_df['orig'].max()
    new_cc_values = np.arange(orig_max + 1,
                              orig_max + 1 + new_cc_pos.sum(),
                              dtype=labels_orig.dtype)

    overlap_table_df['final_cc'] = overlap_table_df['orig'].copy()
    overlap_table_df.loc[new_cc_pos, 'final_cc'] = new_cc_values

    # Relabel the CC volume to use the 'final_cc' labels
    mapper = LabelMapper(overlap_table_df['cc'].values,
                         overlap_table_df['final_cc'].values)
    mapper.apply_inplace(labels_cc)

    # Generate the mapping that could (if desired) convert the new
    # volume into the original one, as described in the docstring above.
    emitted_mapping_rows = overlap_table_df['orig'].duplicated(keep=False)
    emitted_mapping_pairs = overlap_table_df.loc[emitted_mapping_rows,
                                                 ['final_cc', 'orig']].values

    new_to_orig = dict(emitted_mapping_pairs)

    new_unique_labels = pd.unique(overlap_table_df['final_cc'].values)
    new_unique_labels = new_unique_labels.astype(
        overlap_table_df['final_cc'].dtype)
    new_unique_labels.sort()

    return labels_cc, new_to_orig, new_unique_labels
Пример #4
0
def cleave(edges,
           edge_weights,
           seeds_dict,
           node_ids,
           node_sizes=None,
           method='seeded-mst'):
    """
    Cleave the graph with the given edges and edge weights.
    
    Args:
        
        edges:
            array, (E,2), uint32
        
        edge_weights:
            array, (E,), float32
        
        seeds_dict:
            dict, { seed_class : [node_id, node_id, ...] }
        
        node_ids:
            The complete list of node IDs in the graph. Must contain a superset of the ids given in edges.
            Extra ids in node_ids (i.e. not mentioned in 'edges') will be included
            in the results as disconnected components.
        
        method:
            One of: 'seeded-mst', 'seeded-watershed', 'agglomerative-clustering', 'echo-seeds'

    Returns:
    
        CleaveResults, namedtuple with fields:
        (node_ids, output_labels, disconnected_components, contains_unlabeled_components)
        
        Where:
            node_ids:
                The graph node_ids.
                
            output_labels:
                array (N,), uint32
                Agglomerated node labeling, in the same order as node_ids.
                
            disconnected_components:
                A set of seeds which ended up with more than one component in the result.
            
            contains_unlabeled_components:
                True if the input contains one or more disjoint components that were not seeded
                and thus not labeled during agglomeration. False otherwise.
        
    """
    assert isinstance(node_ids, np.ndarray)
    assert node_ids.dtype in (np.uint32, np.uint64)
    assert node_ids.ndim == 1
    assert node_sizes is None or node_sizes.shape == node_ids.shape

    cleave_func, requires_sizes = get_cleave_method(method)
    assert not requires_sizes or node_sizes is not None, \
        f"The specified cleave method ({method}) requires node sizes but none were provided."

    # Relabel node ids consecutively
    cons_node_ids = np.arange(len(node_ids), dtype=np.uint32)
    mapper = LabelMapper(node_ids, cons_node_ids)

    # Initialize sparse seed label array
    seed_labels = np.zeros_like(cons_node_ids)
    for seed_class, seed_nodes in seeds_dict.items():
        seed_nodes = np.asarray(seed_nodes, dtype=np.uint64)
        mapper.apply_inplace(seed_nodes)
        seed_labels[seed_nodes] = seed_class

    if len(edges) == 0:
        # No edges: Return empty results (just seeds)
        return CleaveResults(seed_labels, set(seeds_dict.keys()),
                             not seed_labels.all())

    # Clean the edges (normalized form, no duplicates, no loops)
    edges.sort(axis=1)
    edges_df = pd.DataFrame({
        'u': edges[:, 0],
        'v': edges[:, 1],
        'weight': edge_weights
    })
    edges_df.drop_duplicates(['u', 'v'], keep='last', inplace=True)
    edges_df = edges_df.query('u != v')
    edges = edges_df[['u', 'v']].values
    edge_weights = edges_df['weight'].values

    # Relabel edges for consecutive nodes
    cons_edges = mapper.apply(edges)
    assert cons_edges.dtype == np.uint32

    cleave_results = cleave_func(cons_edges, edge_weights, seed_labels,
                                 node_sizes)
    assert isinstance(cleave_results, CleaveResults)
    return cleave_results
Пример #5
0
def agglomerative_clustering(cleaned_edges,
                             edge_weights,
                             seed_labels,
                             node_sizes=None,
                             num_classes=None):
    """
    Run vigra.graphs.agglomerativeClustering() on the given graph with N nodes and E edges.
    The graph node IDs must be consecutive, starting with zero, dtype=np.uint32
    
    
    Args:
        cleaned_edges:
            array, (E,2), uint32
            Node IDs should be consecutive (more-or-less).
            To avoid segfaults:
                - Must not contain duplicates.
                - Must not contain 'loops' (no self-edges).
        
        edge_weights:
            array, (E,), float32
        
        seed_labels:
            array (N,), uint32
            All un-seeded nodes should be marked as 0.
        
    Returns:
        (output_labels, disconnected_components, contains_unlabeled_components)
        
        Where:
        
            output_labels:
                array (N,), uint32
                Agglomerated node labeling.
                
            disconnected_components:
                A set of seeds which ended up with more than one component in the result.
            
            contains_unlabeled_components:
                True if the input contains one or more disjoint components that were not seeded
                and thus not labeled during agglomeration. False otherwise.
    """
    #
    # Notes:
    #
    # vigra.graphs.agglomerativeClustering() is somewhat sophisticated.
    #
    # During agglomeration, edges are selected for 'contraction' and the corresponding nodes are merged.
    # The newly merged node contains the superset of the edges from its constituent nodes, with duplicate
    # edges combined via weighted average according to their relative 'edgeLengths'.
    #
    # The edge weights used in the optimization are adjusted dynamically after every merge.
    # The dynamic edge weight is computed as a weighted average of it's original 'edgeWeight'
    # and the similarity of its two nodes (by distance between 'nodeFeatures',
    # using the distance measure defined by 'metric').
    #
    # The relative importances of the original edgeWeight and the node similarity is determined by 'beta'.
    # To ignore node feature similarity completely, use beta=0.0.  To ignore edgeWeights completely, use beta=1.0.
    #
    # After computing that weighted average, the dynamic edge weight is then scaled by a 'Ward factor',
    # which seems to give priority to edges that connect smaller components.
    # The importance of the 'Ward factor' is determined by 'wardness'. To disable it, set wardness=0.0.
    #
    #
    # For reference, here are the relevant lines from vigra/hierarchical_clustering.hxx:
    #
    #    ValueType getEdgeWeight(const Edge & e){
    #        ...
    #        const ValueType wardFac = 2.0 / ( 1.0/std::pow(sizeU,wardness_) + 1/std::pow(sizeV,wardness_) );
    #        const ValueType fromEdgeIndicator = edgeIndicatorMap_[ee];
    #        ValueType fromNodeDist = metric_(nodeFeatureMap_[uu],nodeFeatureMap_[vv]);
    #        ValueType totalWeight = ((1.0-beta_)*fromEdgeIndicator + beta_*fromNodeDist)*wardFac;
    #        ...
    #    }
    #
    #
    # To achieve the "most naive" version of hierarchical clustering,
    # i.e. based purely on pre-computed edge weights (and no node features),
    # use beta=0.0, wardness=0.0.
    #
    # (Ideally, we would also set nodeSizes=[0,...], but unfortunately,
    # setting nodeSizes of 0.0 seems to result in strange bugs.
    # Therefore, we can't avoid the affect of using cumulative node size during the agglomeration.)

    assert cleaned_edges.dtype == np.uint32
    assert cleaned_edges.ndim == 2
    assert cleaned_edges.shape[1] == 2
    assert edge_weights.shape == (len(cleaned_edges), )
    assert seed_labels.ndim == 1
    assert cleaned_edges.max() < len(seed_labels)

    # Initialize graph
    # (These params merely reserve RAM in advance. They don't initialize actual graph state.)
    g = vg.AdjacencyListGraph(len(seed_labels), len(cleaned_edges))

    # Make sure there are the correct number of nodes.
    # (Internally, AdjacencyListGraph ensures contiguous nodes are created
    # up to the max id it has seen, so adding the max node is sufficient to
    # ensure all nodes are present.)
    g.addNode(len(seed_labels) - 1)

    # Insert edges.
    g.addEdges(cleaned_edges)

    if num_classes is None:
        num_classes = len(set(pd.unique(seed_labels)) - set([0]))

    output_labels = vg.agglomerativeClustering(
        graph=g,
        edgeWeights=edge_weights,
        #edgeLengths=...,
        #nodeFeatures=...,
        #nodeSizes=...,
        nodeLabels=seed_labels,
        nodeNumStop=num_classes,
        beta=0.0,
        #metric='l1',
        wardness=0.0)

    # For some reason, the output labels do not necessarily
    # have the same values as the seed labels. We have to relabel them ourselves.
    #
    # Furthermore, there are some special cases to consider:
    #
    # 1. It is possible that some seeds will map to disconnected components,
    #    if one of the following is true:
    #      - The input contains disconnected components with identical seeds
    #      - The input contains no disconnected components, but it failed to
    #        connect two components with identical seeds (some other seeded
    #        component ended up blocking the path between the two disconnected
    #        components).
    #    In those cases, we should ensure that the disconnected components are
    #    still labeled with the right input seed, but add the seed to the returned
    #    'disconnected components' set.
    #
    # 2. If the input contains any disconnected components that were NOT seeded,
    #    we should relabel those as 0, and return contains_unlabeled_components=True

    # Get mapping of seeds -> corresponding agg values.
    # (There might be more than one agg value for a given seed, as explained in point 1 above)
    df = pd.DataFrame({'seed': seed_labels, 'agg': output_labels})
    df.drop_duplicates(inplace=True)

    # How many unique agg values are there for each seed class?
    seed_mapping_df = df.query('seed != 0')
    seed_component_counts = seed_mapping_df.groupby(['seed'
                                                     ]).agg({'agg': 'size'})
    seed_component_counts.columns = ['component_count']

    # More than one agg value for a seed class implies that it wasn't fully agglomerated.
    disconnected_components = set(
        seed_component_counts.query('component_count > 1').index)

    # If there are 'extra' agg values (not corresponding to seeds),
    # then some component(s) are unlabeled. (Point 2 above.)
    _seeded_agg_ids = set(seed_mapping_df['agg'])
    nonseeded_agg_ids = df.query('agg not in @_seeded_agg_ids')['agg']
    contains_unlabeled_components = (len(nonseeded_agg_ids) > 0)

    # Map from output agg values back to original seed classes.
    agg_values = seed_mapping_df['agg'].values
    seed_values = seed_mapping_df['seed'].values
    if len(nonseeded_agg_ids) > 0:
        nonseeded_agg_ids = np.fromiter(nonseeded_agg_ids, np.uint32)
        agg_values = np.concatenate((agg_values, nonseeded_agg_ids))
        seed_values = np.concatenate(
            (seed_values, np.zeros((len(nonseeded_agg_ids), ), np.uint32)))

    mapper = LabelMapper(agg_values, seed_values)
    mapper.apply_inplace(output_labels)

    return CleaveResults(output_labels, disconnected_components,
                         contains_unlabeled_components)
Пример #6
0
def cleave(edges,
           edge_weights,
           seeds_dict,
           node_ids=None,
           method='seeded-watershed'):
    """
    Cleave the graph with the given edges and edge weights.
    If node_ids is given, it must contain a superset of the ids given in edges.
    Extra ids in node_ids (i.e. not mentioned in 'edges') will be included
    in the results as disconnected components.
    
    Args:
        
        edges:
            array, (E,2), uint32
        
        edge_weights:
            array, (E,), float32
        
        seeds_dict:
            dict, { seed_class : [node_id, node_id, ...] }
        
        node_ids:
            The complete list of node IDs in the graph.
        
        method:
            Either 'seeded-watershed' or 'agglomerative-clustering'

    Returns:
    
        CleaveResults, namedtuple with fields:
        (node_ids, output_labels, disconnected_components, contains_unlabeled_components)
        
        Where:
            node_ids:
                The graph node_ids.
                
            output_labels:
                array (N,), uint32
                Agglomerated node labeling, in the same order as node_ids.
                
            disconnected_components:
                A set of seeds which ended up with more than one component in the result.
            
            contains_unlabeled_components:
                True if the input contains one or more disjoint components that were not seeded
                and thus not labeled during agglomeration. False otherwise.
        
    """
    if node_ids is None:
        node_ids = pd.unique(edges.flat)
        node_ids.sort()

    assert isinstance(node_ids, np.ndarray)
    assert node_ids.dtype in (np.uint32, np.uint64)
    assert node_ids.ndim == 1

    assert method in ('seeded-watershed', 'agglomerative-clustering')

    # Clean the edges (normalized form, no duplicates, no loops)
    edges.sort(axis=1)
    edges_df = pd.DataFrame({
        'u': edges[:, 0],
        'v': edges[:, 1],
        'weight': edge_weights
    })
    edges_df.drop_duplicates(['u', 'v'], keep='last', inplace=True)
    edges_df = edges_df.query('u != v')
    edges = edges_df[['u', 'v']].values
    edge_weights = edges_df['weight'].values

    # Relabel node ids consecutively
    cons_node_ids = np.arange(len(node_ids), dtype=np.uint32)
    mapper = LabelMapper(node_ids, cons_node_ids)
    cons_edges = mapper.apply(edges)
    assert cons_edges.dtype == np.uint32

    # Initialize sparse seed label array
    seed_labels = np.zeros_like(cons_node_ids)
    for seed_class, seed_nodes in seeds_dict.items():
        seed_nodes = np.asarray(seed_nodes, dtype=np.uint64)
        mapper.apply_inplace(seed_nodes)
        seed_labels[seed_nodes] = seed_class

    if method == 'agglomerative-clustering':
        output_labels, disconnected_components, contains_unlabeled_components = agglomerative_clustering(
            cons_edges, edge_weights, seed_labels)
    elif method == 'seeded-watershed':
        output_labels, disconnected_components, contains_unlabeled_components = edge_weighted_watershed(
            cons_edges, edge_weights, seed_labels)

    return CleaveResults(node_ids, output_labels, disconnected_components,
                         contains_unlabeled_components)
Пример #7
0
    def stitch_adjacent_faces(self, drop_unused_vertices=True, drop_duplicate_faces=True):
        """
        Search for duplicate vertices and remove all references to them in self.faces,
        by replacing them with the index of the first matching vertex in the list.
        Works in-place.
        
        Note: Normals are recomputed iff they were present originally.
        
        Args:
            drop_unused_vertices:
                If True, drop the unused (duplicate) vertices from self.vertices_zyx
                (since no faces refer to them any more, this saves some RAM).
            
            drop_duplicate_faces:
                If True, remove faces with an identical
                vertex list to any previous face.
        
        Returns:
            False if no stitching was performed (none was needed),
            or True otherwise.
        
        """
        need_normals = (self.normals_zyx.shape[0] > 0)

        mapping_pairs = remap_duplicates(self.vertices_zyx)
        
        dup_indices, orig_indices = mapping_pairs.transpose()
        if len(dup_indices) == 0:
            if need_normals:
                self.recompute_normals(True)
            return False # No stitching was needed.

        # Discard old normals
        self.drop_normals()

        # Remap faces to no longer refer to the duplicates
        mapper = LabelMapper(dup_indices, orig_indices)
        mapper.apply_inplace(self.faces, allow_unmapped=True)
        del mapper
        del orig_indices
        
        # Now the faces have been stitched, but the duplicate
        # vertices are still unnecessarily present,
        # and the face vertex indexes still reflect that.
        # Also, we may have uncovered duplicate faces now that the
        # vertexes have been canonicalized.

        if drop_unused_vertices:
            self.drop_unused_vertices()

        def _drop_duplicate_faces():
            # Normalize face vertex order before checking for duplicates.
            # Technically, this means we don't distinguish
            # betweeen clockwise/counter-clockwise ordering,
            # but that seems unlikely to be a problem in practice.
            sorted_faces = pd.DataFrame(np.sort(self.faces, axis=1))
            duplicate_faces_mask = sorted_faces.duplicated()
            faces_df = pd.DataFrame(self.faces)
            faces_df.drop(duplicate_faces_mask.nonzero()[0], inplace=True)
            self.faces = np.asarray(faces_df.values, order='C')

        if drop_duplicate_faces:
            _drop_duplicate_faces()

        if need_normals:
            self.recompute_normals(True)

        return True # stitching was needed.
Пример #8
0
def split_disconnected_bodies(labels_orig):
    """
    Produces 3D volume split into connected components.

    This function identifies bodies that are the same label
    but are not connected.  It splits these bodies and
    produces a dict that maps these newly split bodies to
    the original body label.

    Special exception: Segments with label 0 are not relabeled.

    Args:
        labels_orig (numpy.array): 3D array of labels

    Returns:
        (labels_new, new_to_orig)

        labels_new:
            The partially relabeled array.
            Segments that were not split will keep their original IDs.
            Among split segments, the largest 'child' of a split segment retains the original ID.
            The smaller segments are assigned new labels in the range (N+1)..(N+1+S) where N is
            highest original label and S is the number of new segments after splitting.
        
        new_to_orig:
            A pseudo-minimal (but not quite minimal) mapping of labels
            (N+1)..(N+1+S) -> some subset of (1..N),
            which maps new segment IDs to the segments they came from.
            Segments that were not split at all are not mentioned in this mapping,
            for split segments, every mapping pair for the split is returned, including the k->k (identity) pair.
    """
    # Compute connected components and cast back to original dtype
    labels_cc = skm.label(labels_orig, background=0, connectivity=1)
    assert labels_cc.dtype == np.int64
    if labels_orig.dtype == np.uint64:
        labels_cc = labels_cc.view(np.uint64)
    else:
        labels_cc = labels_cc.astype(labels_orig.dtype, copy=False)

    # Find overlapping segments between orig and CC volumes
    overlap_table_df = contingency_table(labels_orig, labels_cc)
    overlap_table_df.columns = ['orig', 'cc', 'voxels']
    overlap_table_df.sort_values('voxels', ascending=False, inplace=True)
    
    # If a label in 'orig' is duplicated, it has multiple components in labels_cc.
    # The largest component gets to keep the original ID;
    # the other components must take on new values.
    # (The new values must not conflict with any of the IDs in the original, so start at orig_max+1)
    new_cc_pos = overlap_table_df['orig'].duplicated()
    orig_max = overlap_table_df['orig'].max()
    new_cc_values = np.arange(orig_max+1, orig_max+1+new_cc_pos.sum(), dtype=labels_orig.dtype)

    overlap_table_df['final_cc'] = overlap_table_df['orig'].copy()
    overlap_table_df.loc[new_cc_pos, 'final_cc'] = new_cc_values
    
    # Relabel the CC volume to use the 'final_cc' labels
    mapper = LabelMapper(overlap_table_df['cc'].values, overlap_table_df['final_cc'].values)
    mapper.apply_inplace(labels_cc)

    # Generate the mapping that could (if desired) convert the new
    # volume into the original one, as described in the docstring above.
    emitted_mapping_rows = overlap_table_df['orig'].duplicated(keep=False)
    emitted_mapping_pairs = overlap_table_df.loc[emitted_mapping_rows, ['final_cc', 'orig']].values

    # Use tolist() to ensure plain Python int types
    # (This is required by some client code in Evaluate.py)
    new_to_orig = dict(emitted_mapping_pairs.tolist())
    
    return labels_cc, new_to_orig