class OpArrayPiperWithAccessCount(Operator): """ array piper that counts how many times its execute function has been called """ Input = InputSlot(allow_mask=True) Output = OutputSlot(allow_mask=True) def __init__(self, *args, **kwargs): super(OpArrayPiperWithAccessCount, self).__init__(*args, **kwargs) self.clear() self._lock = threading.Lock() def setupOutputs(self): self.Output.meta.assignFrom(self.Input.meta) def execute(self, slot, subindex, roi, result): with self._lock: self.accessCount += 1 self.requests.append(roi) req = self.Input.get(roi) req.writeInto(result) req.block() def propagateDirty(self, slot, subindex, roi): self.Output.setDirty(roi) def clear(self): self.requests = [] self.accessCount = 0
class OpCallWhenDirty(Operator): """ calls the attribute 'function' when Input gets dirty The parameters of the dirty call are stored in attributres. """ Input = InputSlot(allow_mask=True) Output = OutputSlot(allow_mask=True) function = lambda: None slot = None roi = None def setupOutputs(self): self.Output.meta.assignFrom(self.Input.meta) def execute(self, slot, subindex, roi, result): req = self.Input.get(roi) req.writeInto(result) req.block() def propagateDirty(self, slot, subindex, roi): try: self.slot = slot self.subindex = subindex self.roi = roi self.function() except: raise finally: self.Output.setDirty(roi)
class DirtyAssert(Operator): Input = InputSlot() class WasSetDirty(Exception): pass def propagateDirty(self, slot, subindex, roi): raise DirtyAssert.WasSetDirty()
class OpArrayPiper2(Operator): name = "ArrayPiper" description = "simple piping operator" #Inputs Input = InputSlot() #Outputs Output = OutputSlot() def setupOutputs(self): inputSlot = self.inputs["Input"] self.outputs["Output"].meta.assignFrom(inputSlot.meta) self.Output.meta.axistags = vigra.AxisTags([ vigra.AxisInfo("t"), vigra.AxisInfo("x"), vigra.AxisInfo("y"), vigra.AxisInfo("z"), vigra.AxisInfo("c") ]) def execute(self, slot, subindex, roi, result): key = roi.toSlice() req = self.inputs["Input"][key].writeInto(result) req.wait() return result def propagateDirty(self, slot, subindex, roi): key = roi.toSlice() # Check for proper name because subclasses may define extra inputs. # (but decline to override notifyDirty) if slot.name == 'Input': self.outputs["Output"].setDirty(key) else: # If some input we don't know about is dirty (i.e. we are subclassed by an operator with extra inputs), # then mark the entire output dirty. This is the correct behavior for e.g. 'sigma' inputs. self.outputs["Output"].setDirty(slice(None)) def setInSlot(self, slot, subindex, roi, value): # Forward to output assert subindex == () assert slot == self.Input key = roi.toSlice() self.outputs["Output"][key] = value
class OpLazyConnectedComponents(Operator): name = "OpLazyConnectedComponents" supportedDtypes = [np.uint8, np.uint32, np.float32] # input data (usually segmented) Input = InputSlot() # the spatial shape of one chunk, in 'xyz' order # (even if the input does lack some axis, you *have* to provide a # 3-tuple here) ChunkShape = InputSlot(optional=True) # background with axes 'txyzc', spatial axes must be singletons # (this layout is needed to be compatible with OpLabelVolume) Background = InputSlot(optional=True) # the labeled output, internally cached (the two slots are the same) Output = OutputSlot() CachedOutput = OutputSlot() # cache access slots, see OpCompressedCache # fill the cache from an HDF5 group InputHdf5 = InputSlot(optional=True) # returns an object array of length 1 that contains a list of 2-tuples # first item is block start, second item is block stop (exclusive) CleanBlocks = OutputSlot() # fills an HDF5 group with data from cache, requests must be for exactly # one block OutputHdf5 = OutputSlot() ### INTERNALS -- DO NOT USE ### _Input = OutputSlot() _Output = OutputSlot() def __init__(self, *args, **kwargs): super(OpLazyConnectedComponents, self).__init__(*args, **kwargs) self._lock = HardLock() # reordering operators - we want to handle txyzc inside this operator self._opIn = OpReorderAxes(parent=self) self._opIn.AxisOrder.setValue('txyzc') self._opIn.Input.connect(self.Input) self._Input.connect(self._opIn.Output) self._opOut = OpReorderAxes(parent=self) self._opOut.Input.connect(self._Output) self.Output.connect(self._opOut.Output) self.CachedOutput.connect(self.Output) def setupOutputs(self): self.Output.meta.assignFrom(self.Input.meta) self.Output.meta.dtype = _LABEL_TYPE self._Output.meta.assignFrom(self._Input.meta) self._Output.meta.dtype = _LABEL_TYPE if not self.Input.meta.dtype in self.supportedDtypes: raise ValueError("Cannot label data type {}".format( self.Input.meta.dtype)) self.OutputHdf5.meta.assignFrom(self.Input.meta) self.CleanBlocks.meta.shape = (1, ) self.CleanBlocks.meta.dtype = np.object self._setDefaultInternals() # go back to original order self._opOut.AxisOrder.setValue(self.Input.meta.getAxisKeys()) def execute(self, slot, subindex, roi, result): if slot is self._Output: logger.debug("Execute for {}".format(roi)) self._manager.hello() othersToWaitFor = set() chunks = self._roiToChunkIndex(roi) for chunk in chunks: othersToWaitFor |= self.growRegion(chunk) self._manager.waitFor(othersToWaitFor) self._manager.goodbye() self._mapArray(roi, result) self._report() elif slot == self.OutputHdf5: self._executeOutputHdf5(roi, result) elif slot == self.CleanBlocks: self._executeCleanBlocks(result) else: raise ValueError("Request to invalid slot {}".format(str(slot))) def propagateDirty(self, slot, subindex, roi): # Dirty handling is not trivial with this operator. The worst # case happens when an object disappears entirely, meaning that # the assigned labels would not be contiguous anymore. We could # check for that here, and set everything dirty if it's the # case, but this would require us to run the entire algorithm # once again, which is not desireable in propagateDirty(). The # simplest valid decision is to set the whole output dirty in # every case. self._setDefaultInternals() self.Output.setDirty(slice(None)) def setInSlot(self, slot, subindex, key, value): if slot == self.InputHdf5: self._setInSlotInputHdf5(slot, subindex, key, value) else: raise ValueError( "setInSlot() not supported for slot {}".format(slot)) # grow the requested region such that all labels inside that region are # final # @param chunkIndex the index of the chunk to finalize def growRegion(self, chunkIndex): ticket = self._manager.register() othersToWaitFor = set() # label this chunk self._label(chunkIndex) # we want to finalize every label in our first chunk localLabels = np.arange(1, self._numIndices[chunkIndex] + 1) localLabels = localLabels.astype(_LABEL_TYPE) chunksToProcess = [(chunkIndex, localLabels)] while chunksToProcess: # Breadth-First-Search, using list as FIFO currentChunk, localLabels = chunksToProcess.pop(0) # get the labels in use by this chunk # (no need to label this chunk, has been done already because it # was labeled as a neighbour of the last chunk, and the first chunk # was labeled above) localLabels = np.arange(1, self._numIndices[currentChunk] + 1) localLabels = localLabels.astype(_LABEL_TYPE) # tell the label manager that we are about to finalize some labels actualLabels, others = self._manager.checkoutLabels( currentChunk, localLabels, ticket) othersToWaitFor |= others # now we have got a list of local labels for this chunk, which no # other process is going to finalize # start merging adjacent regions otherChunks = self._generateNeighbours(currentChunk) for other in otherChunks: self._label(other) a, b = self._orderPair(currentChunk, other) me = 0 if a == currentChunk else 1 res = self._merge(a, b) myLabels, otherLabels = res[me], res[1 - me] # determine which objects from this chunk continue in the # neighbouring chunk extendingLabels = [ b for a, b in zip(myLabels, otherLabels) if a in actualLabels ] extendingLabels = np.unique(extendingLabels).astype( _LABEL_TYPE) # add the neighbour to our processing queue only if it actually # shares objects if extendingLabels.size > 0: # check if already in queue found = False for i in xrange(len(chunksToProcess)): if chunksToProcess[i][0] == other: extendingLabels = np.union1d( chunksToProcess[i][1], extendingLabels) chunksToProcess[i] = (other, extendingLabels) found = True break if not found: chunksToProcess.append((other, extendingLabels)) self._manager.unregister(ticket) return othersToWaitFor # label a chunk and store information @_chunksynchronized def _label(self, chunkIndex): if self._numIndices[chunkIndex] >= 0: # this chunk is already labeled return logger.debug("labeling chunk {} ({})".format( chunkIndex, self._chunkIndexToRoi(chunkIndex))) # get the raw data roi = self._chunkIndexToRoi(chunkIndex) inputChunk = self._Input.get(roi).wait() inputChunk = vigra.taggedView(inputChunk, axistags='txyzc') inputChunk = inputChunk.withAxes(*'xyz') # label the raw data assert self._background_valid,\ "Background values are configured incorrectly" bg = self._background[chunkIndex[0], chunkIndex[4]] # a vigra bug forces us to convert to int here bg = int(bg) # TODO use labelMultiArray once available labeled = vigra.analysis.labelVolumeWithBackground(inputChunk, background_value=bg) labeled = vigra.taggedView(labeled, axistags='xyz').withAxes(*'txyzc') del inputChunk # TODO this could be more efficiently combined with merging # store the labeled data in cache self._cache[roi.toSlice()] = labeled # update the labeling information numLabels = labeled.max() # we ignore 0 here self._numIndices[chunkIndex] = numLabels if numLabels > 0: with self._lock: # determine the offset # localLabel + offset = globalLabel (for localLabel>0) offset = self._uf.makeNewIndex() self._globalLabelOffset[chunkIndex] = offset - 1 # get n-1 more labels for i in range(numLabels - 1): self._uf.makeNewIndex() # merge the labels of two adjacent chunks # the chunks have to be ordered lexicographically, e.g. by self._orderPair @_chunksynchronized def _merge(self, chunkA, chunkB): if chunkB in self._mergeMap[chunkA]: return (np.zeros((0, ), dtype=_LABEL_TYPE), ) * 2 assert not self._isFinal[chunkA] assert not self._isFinal[chunkB] self._mergeMap[chunkA].append(chunkB) hyperplane_roi_a, hyperplane_roi_b = \ self._chunkIndexToHyperplane(chunkA, chunkB) hyperplane_index_a = hyperplane_roi_a.toSlice() hyperplane_index_b = hyperplane_roi_b.toSlice() label_hyperplane_a = self._cache[hyperplane_index_a] label_hyperplane_b = self._cache[hyperplane_index_b] # see if we have border labels at all adjacent_bool_inds = np.logical_and(label_hyperplane_a > 0, label_hyperplane_b > 0) if not np.any(adjacent_bool_inds): return (np.zeros((0, ), dtype=_LABEL_TYPE), ) * 2 # check if the labels do actually belong to the same component hyperplane_a = self._Input[hyperplane_index_a].wait() hyperplane_b = self._Input[hyperplane_index_b].wait() adjacent_bool_inds = np.logical_and(adjacent_bool_inds, hyperplane_a == hyperplane_b) # union find manipulations are critical with self._lock: map_a = self.localToGlobal(chunkA) map_b = self.localToGlobal(chunkB) labels_a = map_a[label_hyperplane_a[adjacent_bool_inds]] labels_b = map_b[label_hyperplane_b[adjacent_bool_inds]] for a, b in zip(labels_a, labels_b): assert a not in self._globalToFinal, "Invalid merge" assert b not in self._globalToFinal, "Invalid merge" self._uf.makeUnion(a, b) logger.debug("merged chunks {} and {}".format(chunkA, chunkB)) correspondingLabelsA = label_hyperplane_a[adjacent_bool_inds] correspondingLabelsB = label_hyperplane_b[adjacent_bool_inds] return correspondingLabelsA, correspondingLabelsB # get a rectangular region with final global labels # @param roi region of interest # @param result array of shape roi.stop - roi.start, will be filled def _mapArray(self, roi, result): assert np.all(roi.stop - roi.start == result.shape) logger.debug("mapping roi {}".format(roi)) indices = self._roiToChunkIndex(roi) for idx in indices: newroi = self._chunkIndexToRoi(idx) newroi.stop = np.minimum(newroi.stop, roi.stop) newroi.start = np.maximum(newroi.start, roi.start) self._mapChunk(idx) chunk = self._cache[newroi.toSlice()] newroi.start -= roi.start newroi.stop -= roi.start s = newroi.toSlice() result[s] = chunk # Store a chunk with final labels in cache @_chunksynchronized def _mapChunk(self, chunkIndex): if self._isFinal[chunkIndex]: return newroi = self._chunkIndexToRoi(chunkIndex) s = newroi.toSlice() chunk = self._cache[s] labels = self.localToGlobal(chunkIndex) labels = self.globalToFinal(chunkIndex[0], chunkIndex[4], labels) self._cache[s] = labels[chunk] self._isFinal[chunkIndex] = True # returns an array of global labels in use by this chunk. This array can be # used as a mapping via # mapping = localToGlobal(...) # mapped = mapping[locallyLabeledArray] # The global labels are updated to their current state according to the # global UnionFind structure. def localToGlobal(self, chunkIndex): offset = self._globalLabelOffset[chunkIndex] numLabels = self._numIndices[chunkIndex] labels = np.arange(1, numLabels + 1, dtype=_LABEL_TYPE) + offset labels = np.asarray(map(self._uf.findIndex, labels), dtype=_LABEL_TYPE) # we got 'numLabels' real labels, and one label '0', so our # output has to have numLabels+1 elements out = np.zeros((numLabels + 1, ), dtype=_LABEL_TYPE) out[1:] = labels return out # map an array of global indices to final labels # after calling this function, the labels passed in may not be used with # UnionFind.makeUnion any more! @threadsafe def globalToFinal(self, t, c, labels): newlabels = labels.copy() d = self._globalToFinal[(t, c)] labeler = self._labelIterators[(t, c)] for k in np.unique(labels): l = self._uf.findIndex(k) if l == 0: continue if l not in d: nextLabel = labeler.next() d[l] = nextLabel newlabels[labels == k] = d[l] return newlabels ########################################################################## ##################### HELPER METHODS ##################################### ########################################################################## # create roi object from chunk index def _chunkIndexToRoi(self, index): shape = self._shape start = self._chunkShape * np.asarray(index) stop = self._chunkShape * (np.asarray(index) + 1) stop = np.where(stop > shape, shape, stop) roi = SubRegion(self.Input, start=tuple(start), stop=tuple(stop)) return roi # create a list of chunk indices needed for a particular roi def _roiToChunkIndex(self, roi): cs = self._chunkShape start = np.asarray(roi.start) stop = np.asarray(roi.stop) start_cs = start / cs stop_cs = stop / cs # add one if division was not even stop_cs += np.where(stop % cs, 1, 0) iters = [xrange(start_cs[i], stop_cs[i]) for i in range(5)] chunks = list(itertools.product(*iters)) return chunks # compute the adjacent hyperplanes of two chunks (1 pix wide) # @return 2-tuple of roi's for the respective chunk def _chunkIndexToHyperplane(self, chunkA, chunkB): rev = False assert chunkA[0] == chunkB[0] and chunkA[4] == chunkB[4],\ "these chunks are not spatially adjacent" # just iterate over spatial axes for i in range(1, 4): if chunkA[i] > chunkB[i]: rev = True chunkA, chunkB = chunkB, chunkA if chunkA[i] < chunkB[i]: roiA = self._chunkIndexToRoi(chunkA) roiB = self._chunkIndexToRoi(chunkB) start = np.asarray(roiA.start) start[i] = roiA.stop[i] - 1 roiA.start = tuple(start) stop = np.asarray(roiB.stop) stop[i] = roiB.start[i] + 1 roiB.stop = tuple(stop) if rev: return roiB, roiA else: return roiA, roiB # generate a list of adjacent chunks def _generateNeighbours(self, chunkIndex): n = [] idx = np.asarray(chunkIndex, dtype=np.int) # only spatial neighbours are considered for i in range(1, 4): if idx[i] > 0: new = idx.copy() new[i] -= 1 n.append(tuple(new)) if idx[i] + 1 < self._chunkArrayShape[i]: new = idx.copy() new[i] += 1 n.append(tuple(new)) return n # fills attributes with standard values, call on each setupOutputs def _setDefaultInternals(self): # chunk array shape calculation shape = self._Input.meta.shape if self.ChunkShape.ready(): chunkShape = (1, ) + self.ChunkShape.value + (1, ) elif self._Input.meta.ideal_blockshape is not None and\ np.prod(self._Input.meta.ideal_blockshape) > 0: chunkShape = self._Input.meta.ideal_blockshape else: chunkShape = self._automaticChunkShape(self._Input.meta.shape) assert len(shape) == len(chunkShape),\ "Encountered an invalid chunkShape" chunkShape = np.minimum(shape, chunkShape) f = lambda i: shape[i] // chunkShape[i] + (1 if shape[i] % chunkShape[ i] else 0) self._chunkArrayShape = tuple(map(f, range(len(shape)))) self._chunkShape = np.asarray(chunkShape, dtype=np.int) self._shape = shape # determine the background values self._background = np.zeros((shape[0], shape[4]), dtype=self.Input.meta.dtype) if self.Background.ready(): bg = self.Background[...].wait() bg = vigra.taggedView(bg, axistags="txyzc").withAxes('t', 'c') # we might have an old value set for the background value # ignore it until it is configured correctly, or execute is called if bg.size > 1 and \ (shape[0] != bg.shape[0] or shape[4] != bg.shape[1]): self._background_valid = False else: self._background_valid = True self._background[:] = bg else: self._background_valid = True # manager object self._manager = _LabelManager() ### local labels ### # cache for local labels # adjust cache chunk shape to our chunk shape cs = tuple(map(_get_next_power, self._chunkShape)) logger.debug("Creating cache with chunk shape {}".format(cs)) self._cache = vigra.ChunkedArrayCompressed(shape, dtype=_LABEL_TYPE, chunk_shape=cs) ### global indices ### # offset (global labels - local labels) per chunk self._globalLabelOffset = np.ones(self._chunkArrayShape, dtype=_LABEL_TYPE) # keep track of number of indices in chunk (-1 == not labeled yet) self._numIndices = -np.ones(self._chunkArrayShape, dtype=np.int32) # union find data structure, tells us for every global index to which # label it belongs self._uf = UnionFindArray(_LABEL_TYPE(1)) ### global labels ### # keep track of assigned global labels gen = partial(InfiniteLabelIterator, 1, dtype=_LABEL_TYPE) self._labelIterators = defaultdict(gen) self._globalToFinal = defaultdict(dict) self._isFinal = np.zeros(self._chunkArrayShape, dtype=np.bool) ### algorithmic ### # keep track of merged regions self._mergeMap = defaultdict(list) # locks that keep threads from changing a specific chunk self._chunk_locks = defaultdict(HardLock) def _executeCleanBlocks(self, destination): assert destination.shape == (1, ) finalIndices = np.where(self._isFinal) def ind2tup(ind): roi = self._chunkIndexToRoi(ind) return (roi.start, roi.stop) destination[0] = list(map(ind2tup, zip(*finalIndices))) def _executeOutputHdf5(self, roi, destination): logger.debug("Servicing request for hdf5 block {}".format(roi)) assert isinstance(destination, h5py.Group),\ "OutputHdf5 slot requires an hdf5 GROUP to copy into "\ "(not a numpy array)." index = self._roiToChunkIndex(roi)[0] block_roi = self._chunkIndexToRoi(index) valid = np.all(roi.start == block_roi.start) valid = valid and np.all(roi.stop == block_roi.stop) assert valid, "OutputHdf5 slot requires roi to be exactly one block." name = str([block_roi.start, block_roi.stop]) assert name not in destination,\ "destination hdf5 group already has a dataset "\ "with this block's name" destination.create_dataset(name, shape=self._chunkShape, dtype=_LABEL_TYPE, data=self._cache[block_roi.toSlice()]) def _setInSlotInputHdf5(self, slot, subindex, roi, value): logger.debug("Setting block {} from hdf5".format(roi)) assert isinstance(value, h5py.Dataset),\ "InputHdf5 slot requires an hdf5 Dataset to copy from "\ "(not a numpy array)." indices = self._roiToChunkIndex(roi) for idx in indices: cacheroi = self._chunkIndexToRoi(idx) cacheroi.stop = np.minimum(cacheroi.stop, roi.stop) cacheroi.start = np.maximum(cacheroi.start, roi.start) dsroi = cacheroi.copy() dsroi.start -= roi.start dsroi.stop -= roi.start self._cache[cacheroi.toSlice()] = value[dsroi.toSlice()] self._isFinal[idx] = True # print a summary of blocks in use and their storage volume def _report(self): m = {np.uint8: 1, np.uint16: 2, np.uint32: 4, np.uint64: 8} nStoredChunks = self._isFinal.sum() nChunks = self._isFinal.size cachedMB = self._cache.data_bytes / 1024.0**2 rawMB = self._cache.size * m[_LABEL_TYPE] logger.debug("Currently stored chunks: {}/{} ({:.1f} MB)".format( nStoredChunks, nChunks, cachedMB)) # order a pair of chunk indices lexicographically # (ret[0] is top-left-in-front-of of ret[1]) @staticmethod def _orderPair(tupA, tupB): for a, b in zip(tupA, tupB): if a < b: return tupA, tupB if a > b: return tupB, tupA raise ValueError("tupA={} and tupB={} are the same".format(tupA, tupB)) return tupA, tupB # choose chunk shape appropriate for a particular dataset # TODO: this is by no means an optimal decision -> extend @staticmethod def _automaticChunkShape(shape): # use about 16 million pixels per chunk default = (1, 256, 256, 256, 1) if np.prod(shape) < 2 * np.prod(default): return (1, ) + shape[1:4] + (1, ) else: return default