def testGridGraphAgglomerativeClustering(): dataRGB = numpy.random.random([10, 10, 3]).astype(numpy.float32) dataRGB = vigra.taggedView(dataRGB, 'xyc') data = numpy.random.random([10, 10]).astype(numpy.float32) edata = numpy.random.random([10 * 2 - 1, 10 * 2 - 1]).astype(numpy.float32) g0 = graphs.gridGraph(data.shape) ew = graphs.edgeFeaturesFromInterpolatedImage(graph=g0, image=edata) #ew = taggedView(ew,'xyz') labels = graphs.agglomerativeClustering(graph=g0, edgeWeights=ew, nodeFeatures=dataRGB, nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0, labels=labels) assert g1.nodeNum == 5 labels = graphs.agglomerativeClustering(graph=g0, edgeWeights=ew, nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0, labels=labels) assert g1.nodeNum == 5 dataRGB = numpy.random.random([10, 10, 10, 3]).astype(numpy.float32) dataRGB = vigra.taggedView(dataRGB, 'xyzc') data = numpy.random.random([10, 10, 10]).astype(numpy.float32) edata = numpy.random.random([10 * 2 - 1, 10 * 2 - 1, 10 * 2 - 1]).astype(numpy.float32) g0 = graphs.gridGraph(data.shape) ew = graphs.edgeFeaturesFromInterpolatedImage(graph=g0, image=edata) #ew = taggedView(ew,'xyz') labels = graphs.agglomerativeClustering(graph=g0, edgeWeights=ew, nodeFeatures=dataRGB, nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0, labels=labels) assert g1.nodeNum == 5 labels = graphs.agglomerativeClustering(graph=g0, edgeWeights=ew, nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0, labels=labels) assert g1.nodeNum == 5
def testGridGraphAgglomerativeClustering(): dataRGB = numpy.random.random([10,10,3]).astype(numpy.float32) dataRGB = vigra.taggedView(dataRGB,'xyc') data = numpy.random.random([10,10]).astype(numpy.float32) edata = numpy.random.random([10*2-1,10*2-1]).astype(numpy.float32) g0 = graphs.gridGraph(data.shape) ew = graphs.edgeFeaturesFromInterpolatedImage(graph=g0,image=edata) #ew = taggedView(ew,'xyz') labels = graphs.agglomerativeClustering(graph=g0,edgeWeights=ew,nodeFeatures=dataRGB,nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0,labels=labels) assert g1.nodeNum == 5 labels = graphs.agglomerativeClustering(graph=g0,edgeWeights=ew,nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0,labels=labels) assert g1.nodeNum == 5 dataRGB = numpy.random.random([10,10,10,3]).astype(numpy.float32) dataRGB = vigra.taggedView(dataRGB,'xyzc') data = numpy.random.random([10,10,10]).astype(numpy.float32) edata = numpy.random.random([10*2-1,10*2-1,10*2-1]).astype(numpy.float32) g0 = graphs.gridGraph(data.shape) ew = graphs.edgeFeaturesFromInterpolatedImage(graph=g0,image=edata) #ew = taggedView(ew,'xyz') labels = graphs.agglomerativeClustering(graph=g0,edgeWeights=ew,nodeFeatures=dataRGB,nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0,labels=labels) assert g1.nodeNum == 5 labels = graphs.agglomerativeClustering(graph=g0,edgeWeights=ew,nodeNumStop=5) g1 = graphs.regionAdjacencyGraph(graph=g0,labels=labels) assert g1.nodeNum == 5
def get_segmentation(predict, pmin=0.5, minMemb=10, minSeg=10, sigMin=6, sigWeights=1, sigSmooth=0.1, cleanCloseSeeds=True, returnSeedsOnly=False, edgeLengths=None,nodeFeatures=None, nodeSizes=None, nodeLabels=None, nodeNumStop=None, beta=0, metric='l1', wardness=0.2, out=None): """ Get segmentation through watershed and agglomerative clustering :param predict: prediction map :return: segmentation map """ #use watershed and save superpixels map super_pixels = wsDtSegmentation(predict, pmin, minMemb, minSeg, sigMin, sigWeights, cleanCloseSeeds, returnSeedsOnly) # seeds = wsDtSegmentation(predict, pmin, minMemb, minSeg, sigMin, sigWeights, cleanCloseSeeds, True) # save_h5(seeds, "/home/stamylew/delme/seeds.h5", "data") print print "#Nodes in superpixels", len(np.unique(super_pixels)) # save_h5(super_pixels, "/home/stamylew/delme/super_pixels.h5", "data") #smooth prediction map probs = vf.gaussianSmoothing(predict, sigSmooth) # save_h5(probs, "/home/stamylew/delme/probs.h5", "data") #make grid graph grid_graph = vg.gridGraph(super_pixels.shape, False) grid_graph_edge_indicator = vg.edgeFeaturesFromImage(grid_graph, probs) #make region adjacency graph rag = vg.regionAdjacencyGraph(grid_graph, super_pixels) #accumulate edge features from grid graph node map edge_weights = rag.accumulateEdgeFeatures(grid_graph_edge_indicator) edge_weights_tag = "mean of the probabilities" #do agglomerative clustering labels = vg.agglomerativeClustering(rag, edge_weights, edgeLengths, nodeFeatures, nodeSizes, nodeLabels, nodeNumStop, beta, metric, wardness, out) #segmentation data wsDt_data = np.zeros((8,1)) wsDt_data[:,0] = (pmin, minMemb, minSeg, sigMin, sigWeights, sigSmooth, cleanCloseSeeds, returnSeedsOnly) agglCl_data = edge_weights_tag, str(edgeLengths), str(nodeFeatures), str(nodeSizes), str(nodeLabels), str(nodeNumStop), \ str(beta), metric, str(wardness), str(out) #project labels back to data segmentation = rag.projectLabelsToBaseGraph(labels) print "#nodes in segmentation", len(np.unique(segmentation)) # save_h5(segmentation, "/home/stamylew/delme/segmap.h5", "data", None) print "seg", np.unique(segmentation) return segmentation, super_pixels, wsDt_data, agglCl_data
#(5000, 1.0, 0.70), (10000, 1.0, 0.50), (20000, 1.0, 0.25), (20000, 1.0, 0.50) #(30000, 1.0, 0.70) ] for s in settings: print s edgeWeights = (1.0 - s[2]) * ewDmap + s[2] * ewPmap labelsRag = graphs.agglomerativeClustering(graph=rag, edgeWeights=edgeWeights, beta=0.000, nodeFeatures=None, nodeNumStop=int(s[0]), wardness=float(s[1]), edgeLengths=edgeLengths, nodeSizes=nodeSizes) print "project back" labels = rag.projectLabelsToGridGraph(labelsRag) print "labels.shape", labels.shape print "project back done" vigra.impex.writeHDF5(labels, opt['oversegL1'], "data") #segData.append([rag.labels, "segmentation-L0"]) name = "segmentation-L1_NODES_%d_W_%f_BETA_%f" % (s[0], s[1], s[2]) print name segData.append([labels.copy(), name])
gridGraph = graphs.gridGraph(img.shape[0:2]) gridGraphEdgeIndicator = graphs.edgeFeaturesFromInterpolatedImage(gridGraph, gradMag) # get region adjacency graph from super-pixel labels rag = graphs.regionAdjacencyGraph(gridGraph, labels) # accumulate edge weights from gradient magnitude edgeWeights = rag.accumulateEdgeFeatures(gridGraphEdgeIndicator) # accumulate node features from grid graph node map # which is just a plain image (with channels) nodeFeatures = rag.accumulateNodeFeatures(imgLab) # do agglomerativeClustering labels = graphs.agglomerativeClustering(graph=rag, edgeWeights=edgeWeights, beta=beta, nodeFeatures=nodeFeatures, nodeNumStop=nodeNumStop) # show result f = pylab.figure() ax1 = f.add_subplot(2, 2, 1) vigra.imshow(gradMag,show=False) ax1.set_title("Input Image") pylab.axis('off') ax2 = f.add_subplot(2, 2, 2) rag.show(img) ax2.set_title("Over-Segmentation") pylab.axis('off') ax3 = f.add_subplot(2, 2, 3)
(10000, 1.0, 0.50), (20000, 1.0, 0.25), (20000, 1.0, 0.50) #(30000, 1.0, 0.70) ] for s in settings: print s edgeWeights = (1.0-s[2])*ewDmap + s[2]*ewPmap labelsRag = graphs.agglomerativeClustering(graph=rag, edgeWeights=edgeWeights, beta=0.000, nodeFeatures=None, nodeNumStop=int(s[0]), wardness=float(s[1]), edgeLengths=edgeLengths, nodeSizes=nodeSizes) print "project back" labels = rag.projectLabelsToGridGraph(labelsRag) print "labels.shape", labels.shape print "project back done" vigra.impex.writeHDF5(labels, opt['oversegL1'], "data") #segData.append([rag.labels, "segmentation-L0"]) name = "segmentation-L1_NODES_%d_W_%f_BETA_%f" % (s[0], s[1], s[2]) print name segData.append([labels.copy(), name])
gridGraphEdgeIndicator = graphs.edgeFeaturesFromInterpolatedImage( gridGraph, gradMag) # get region adjacency graph from super-pixel labels rag = graphs.regionAdjacencyGraph(gridGraph, labels) # accumulate edge weights from gradient magnitude edgeWeights = rag.accumulateEdgeFeatures(gridGraphEdgeIndicator) # accumulate node features from grid graph node map # which is just a plain image (with channels) nodeFeatures = rag.accumulateNodeFeatures(imgLab) # do agglomerativeClustering labels = graphs.agglomerativeClustering(graph=rag, edgeWeights=edgeWeights, beta=beta, nodeFeatures=nodeFeatures, nodeNumStop=nodeNumStop) # show result f = pylab.figure() ax1 = f.add_subplot(2, 2, 1) vigra.imshow(gradMag, show=False) ax1.set_title("Input Image") pylab.axis('off') ax2 = f.add_subplot(2, 2, 2) rag.show(img) ax2.set_title("Over-Segmentation") pylab.axis('off')
for name, dsetname, pmapPath in pmaps: print "read pmap" pmap = vigra.impex.readHDF5(pmapPath, dsetname)[:, :, :, 0] pmap = numpy.require(pmap, dtype=numpy.float32) grayData.append([pmap, "pmap"]) print "extract" ggPmap = graphs.implicitMeanEdgeMap(gridGraph, pmap) edgeWeights = rag.accumulateEdgeFeatures(ggPmap) print "do agglomerative clustering" # do agglomerativeClustering labelsRag = graphs.agglomerativeClustering(graph=rag, edgeWeights=edgeWeights, beta=0.005, nodeFeatures=None, nodeNumStop=75000, wardness=0.95) labels = rag.projectLabelsToGridGraph(labelsRag) vigra.impex.writeHDF5(labels, aggloSegPath, "data") segData.append([labels + 10, "segmentation-" + name]) skneuro.addHocViewer(grayData, segData) #bla = array[20:40, 60:80, 60:90]
def agglomerative_clustering(cleaned_edges, edge_weights, seed_labels, node_sizes=None, num_classes=None): """ Run vigra.graphs.agglomerativeClustering() on the given graph with N nodes and E edges. The graph node IDs must be consecutive, starting with zero, dtype=np.uint32 Args: cleaned_edges: array, (E,2), uint32 Node IDs should be consecutive (more-or-less). To avoid segfaults: - Must not contain duplicates. - Must not contain 'loops' (no self-edges). edge_weights: array, (E,), float32 seed_labels: array (N,), uint32 All un-seeded nodes should be marked as 0. Returns: (output_labels, disconnected_components, contains_unlabeled_components) Where: output_labels: array (N,), uint32 Agglomerated node labeling. disconnected_components: A set of seeds which ended up with more than one component in the result. contains_unlabeled_components: True if the input contains one or more disjoint components that were not seeded and thus not labeled during agglomeration. False otherwise. """ # # Notes: # # vigra.graphs.agglomerativeClustering() is somewhat sophisticated. # # During agglomeration, edges are selected for 'contraction' and the corresponding nodes are merged. # The newly merged node contains the superset of the edges from its constituent nodes, with duplicate # edges combined via weighted average according to their relative 'edgeLengths'. # # The edge weights used in the optimization are adjusted dynamically after every merge. # The dynamic edge weight is computed as a weighted average of it's original 'edgeWeight' # and the similarity of its two nodes (by distance between 'nodeFeatures', # using the distance measure defined by 'metric'). # # The relative importances of the original edgeWeight and the node similarity is determined by 'beta'. # To ignore node feature similarity completely, use beta=0.0. To ignore edgeWeights completely, use beta=1.0. # # After computing that weighted average, the dynamic edge weight is then scaled by a 'Ward factor', # which seems to give priority to edges that connect smaller components. # The importance of the 'Ward factor' is determined by 'wardness'. To disable it, set wardness=0.0. # # # For reference, here are the relevant lines from vigra/hierarchical_clustering.hxx: # # ValueType getEdgeWeight(const Edge & e){ # ... # const ValueType wardFac = 2.0 / ( 1.0/std::pow(sizeU,wardness_) + 1/std::pow(sizeV,wardness_) ); # const ValueType fromEdgeIndicator = edgeIndicatorMap_[ee]; # ValueType fromNodeDist = metric_(nodeFeatureMap_[uu],nodeFeatureMap_[vv]); # ValueType totalWeight = ((1.0-beta_)*fromEdgeIndicator + beta_*fromNodeDist)*wardFac; # ... # } # # # To achieve the "most naive" version of hierarchical clustering, # i.e. based purely on pre-computed edge weights (and no node features), # use beta=0.0, wardness=0.0. # # (Ideally, we would also set nodeSizes=[0,...], but unfortunately, # setting nodeSizes of 0.0 seems to result in strange bugs. # Therefore, we can't avoid the affect of using cumulative node size during the agglomeration.) assert cleaned_edges.dtype == np.uint32 assert cleaned_edges.ndim == 2 assert cleaned_edges.shape[1] == 2 assert edge_weights.shape == (len(cleaned_edges), ) assert seed_labels.ndim == 1 assert cleaned_edges.max() < len(seed_labels) # Initialize graph # (These params merely reserve RAM in advance. They don't initialize actual graph state.) g = vg.AdjacencyListGraph(len(seed_labels), len(cleaned_edges)) # Make sure there are the correct number of nodes. # (Internally, AdjacencyListGraph ensures contiguous nodes are created # up to the max id it has seen, so adding the max node is sufficient to # ensure all nodes are present.) g.addNode(len(seed_labels) - 1) # Insert edges. g.addEdges(cleaned_edges) if num_classes is None: num_classes = len(set(pd.unique(seed_labels)) - set([0])) output_labels = vg.agglomerativeClustering( graph=g, edgeWeights=edge_weights, #edgeLengths=..., #nodeFeatures=..., #nodeSizes=..., nodeLabels=seed_labels, nodeNumStop=num_classes, beta=0.0, #metric='l1', wardness=0.0) # For some reason, the output labels do not necessarily # have the same values as the seed labels. We have to relabel them ourselves. # # Furthermore, there are some special cases to consider: # # 1. It is possible that some seeds will map to disconnected components, # if one of the following is true: # - The input contains disconnected components with identical seeds # - The input contains no disconnected components, but it failed to # connect two components with identical seeds (some other seeded # component ended up blocking the path between the two disconnected # components). # In those cases, we should ensure that the disconnected components are # still labeled with the right input seed, but add the seed to the returned # 'disconnected components' set. # # 2. If the input contains any disconnected components that were NOT seeded, # we should relabel those as 0, and return contains_unlabeled_components=True # Get mapping of seeds -> corresponding agg values. # (There might be more than one agg value for a given seed, as explained in point 1 above) df = pd.DataFrame({'seed': seed_labels, 'agg': output_labels}) df.drop_duplicates(inplace=True) # How many unique agg values are there for each seed class? seed_mapping_df = df.query('seed != 0') seed_component_counts = seed_mapping_df.groupby(['seed' ]).agg({'agg': 'size'}) seed_component_counts.columns = ['component_count'] # More than one agg value for a seed class implies that it wasn't fully agglomerated. disconnected_components = set( seed_component_counts.query('component_count > 1').index) # If there are 'extra' agg values (not corresponding to seeds), # then some component(s) are unlabeled. (Point 2 above.) _seeded_agg_ids = set(seed_mapping_df['agg']) nonseeded_agg_ids = df.query('agg not in @_seeded_agg_ids')['agg'] contains_unlabeled_components = (len(nonseeded_agg_ids) > 0) # Map from output agg values back to original seed classes. agg_values = seed_mapping_df['agg'].values seed_values = seed_mapping_df['seed'].values if len(nonseeded_agg_ids) > 0: nonseeded_agg_ids = np.fromiter(nonseeded_agg_ids, np.uint32) agg_values = np.concatenate((agg_values, nonseeded_agg_ids)) seed_values = np.concatenate( (seed_values, np.zeros((len(nonseeded_agg_ids), ), np.uint32))) mapper = LabelMapper(agg_values, seed_values) mapper.apply_inplace(output_labels) return CleaveResults(output_labels, disconnected_components, contains_unlabeled_components)