示例#1
0
    def execute(self, slot, subindex, roi, result):
        t1 = time.perf_counter()
        key = roi.toSlice()
        nlabels = self.inputs["LabelsCount"].value

        traceLogger.debug(
            "OpPredictRandomForest: Requesting classifier. roi={}".format(roi))
        forests = self.inputs["Classifier"][:].wait()

        if any(forest is None for forest in forests):
            # Training operator may return 'None' if there was no data to train with
            return np.zeros(np.subtract(roi.stop, roi.start),
                            dtype=np.float32)[...]

        traceLogger.debug("OpPredictRandomForest: Got classifier")
        #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels)

        newKey = key[:-1]
        newKey += (slice(0, self.inputs["Image"].meta.shape[-1], None), )

        res = self.inputs["Image"][newKey].wait()

        shape = res.shape
        prod = np.prod(shape[:-1])
        res.shape = (prod, shape[-1])
        features = res

        predictions = [0] * len(forests)

        t2 = time.perf_counter()

        pool = RequestPool()

        def predict_forest(i):
            predictions[i] = forests[i].predict(
                np.asarray(features, dtype=np.float32))
            predictions[i] = predictions[i].reshape(result.shape[:-1])

        for i, f in enumerate(forests):
            req = pool.request(partial(predict_forest, i))

        pool.wait()
        pool.clean()
        #predictions[0] = forests[0].predict(np.asarray(features, dtype = np.float32), normalize = False)
        #predictions[0] = predictions[0].reshape(result.shape)
        prediction = np.dstack(predictions)
        result[...] = prediction

        # If our LabelsCount is higher than the number of labels in the training set,
        # then our results aren't really valid.  FIXME !!!
        # Duplicate the last label's predictions
        #for c in range(result.shape[-1]):
        #    result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)]

        t3 = time.perf_counter()

        logger.debug(
            "Predict took %fseconds, actual RF time was %fs, feature time was %fs"
            % (t3 - t1, t3 - t2, t2 - t1))
        return result
示例#2
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self._ReorderedOutput
        pool = RequestPool()

        t_ind = 0
        for t in range(roi.start[0], roi.stop[0]):
            c_ind = 0
            for c in range(roi.start[-1], roi.stop[-1]):
                newroi = roi.copy()
                newroi.start[0] = t
                newroi.stop[0] = t+1
                newroi.start[-1] = c
                newroi.stop[-1] = c+1

                req = self._op.Output.get(newroi)
                resView = result[t_ind:t_ind+1, ..., c_ind:c_ind+1]
                req.writeInto(resView)

                pool.add(req)

                c_ind += 1

            t_ind += 1

        pool.wait()
        pool.clean()
示例#3
0
    def _label(self, roi, result):
        result = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        # get the background values
        bg = self.Background[...].wait()
        bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags)
        bg = bg.withAxes(*'ct')
        assert np.all(self.Background.meta.shape[3:] ==
                      self.Input.meta.shape[3:]),\
            "Shape of background values incompatible to shape of Input"

        # do labeling in parallel over channels and time slices
        pool = RequestPool()

        start = np.asarray(roi.start, dtype=np.int)
        stop = np.asarray(roi.stop, dtype=np.int)
        for ti, t in enumerate(range(roi.start[4], roi.stop[4])):
            start[4], stop[4] = t, t+1
            for ci, c in enumerate(range(roi.start[3], roi.stop[3])):
                start[3], stop[3] = c, c+1
                newRoi = SubRegion(self.Output,
                                   start=tuple(start), stop=tuple(stop))
                resView = result[..., ci, ti].withAxes(*'xyz')
                req = Request(partial(self._label3d, newRoi,
                                      bg[c, t], resView))
                pool.add(req)

        logger.debug(
            "{}: Computing connected components for ROI {} ...".format(
                self.name, roi))
        pool.wait()
        pool.clean()
        logger.debug("{}: Connected components computed.".format(
            self.name))
示例#4
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                fullBlockShape = numpy.array([self.blockShape.value for i in self.Input.meta.shape])
                fun = self.inputs["Function"].value
                #data = self.inputs["Input"][:].wait()
                #split up requests into blocks
                shape = self.Input.meta.shape
                numBlocks = numpy.ceil(shape/(1.0*fullBlockShape)).astype("int")
                blockCache = numpy.ndarray(shape = numpy.prod(numBlocks), dtype=self.Output.meta.dtype)
                pool = RequestPool()
                #blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(*[range(i) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)
                
                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)
                    
                for i,f in enumerate(blockCache):
                    req = pool.request(partial(predict_block,i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
示例#5
0
    def _label(self, roi, result):
        result = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        # get the background values
        bg = self.Background[...].wait()
        bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags)
        bg = bg.withAxes(*"ct")
        assert np.all(
            self.Background.meta.shape[0] == self.Input.meta.shape[0]
        ), "Shape of background values incompatible to shape of Input"
        assert np.all(
            self.Background.meta.shape[4] == self.Input.meta.shape[4]
        ), "Shape of background values incompatible to shape of Input"

        # do labeling in parallel over channels and time slices
        pool = RequestPool()

        start = np.asarray(roi.start, dtype=np.int)
        stop = np.asarray(roi.stop, dtype=np.int)
        for ti, t in enumerate(range(roi.start[0], roi.stop[0])):
            start[0], stop[0] = t, t + 1
            for ci, c in enumerate(range(roi.start[4], roi.stop[4])):
                start[4], stop[4] = c, c + 1
                newRoi = SubRegion(self.Output, start=tuple(start), stop=tuple(stop))
                resView = result[ti, ..., ci].withAxes(*"xyz")
                req = Request(partial(self._label3d, newRoi, bg[c, t], resView))
                pool.add(req)

        logger.debug("{}: Computing connected components for ROI {} ...".format(self.name, roi))
        pool.wait()
        pool.clean()
        logger.debug("{}: Connected components computed.".format(self.name))
示例#6
0
    def execute(self, slot, subindex, rroi, result):
        key = roiToSlice(rroi.start,rroi.stop)

        cnt = 0
        written = 0
        start, stop = roi.sliceToRoi(key, self.outputs["Output"].meta.shape)
        assert (stop<=self.outputs["Output"].meta.shape).all()
        #axisindex = self.inputs["AxisIndex"].value
        flag = self.inputs["AxisFlag"].value
        axisindex = self.outputs["Output"].meta.axistags.index(flag)
        #ugly-ugly-ugly
        oldkey = list(key)
        oldkey.pop(axisindex)
        
        #print "STACKER: ", flag, axisindex
        #print "requesting an outslot from stacker:", key, result.shape
        #print "input slots total: ", len(self.inputs['Images'])
        requests = []
        
        pool = RequestPool()

        for i, inSlot in enumerate(self.inputs['Images']):
            req = None
            inTagKeys = [ax.key for ax in inSlot.meta.axistags]
            if flag in inTagKeys:
                slices = inSlot.meta.shape[axisindex]
                if cnt + slices >= start[axisindex] and start[axisindex]-cnt<slices and start[axisindex]+written<stop[axisindex]:
                    begin = 0
                    if cnt < start[axisindex]:
                        begin = start[axisindex] - cnt
                    end = slices
                    if cnt + end > stop[axisindex]:
                        end -= cnt + end - stop[axisindex]
                    key_ = copy.copy(oldkey)
                    key_.insert(axisindex, slice(begin, end, None))
                    reskey = [slice(None, None, None) for x in range(len(result.shape))]
                    reskey[axisindex] = slice(written, written+end-begin, None)

                    req = inSlot[tuple(key_)].writeInto(result[tuple(reskey)])
                    written += end - begin
                cnt += slices
            else:
                if cnt>=start[axisindex] and start[axisindex] + written < stop[axisindex]:
                    #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey
                    #print "result: ", result.shape, "inslot:", inSlot.meta.shape
                    reskey = [slice(None, None, None) for s in oldkey]
                    reskey.insert(axisindex, written)
                    destArea = result[tuple(reskey)]
                    req = inSlot[tuple(oldkey)].writeInto(destArea)
                    written += 1
                cnt += 1

            if req is not None:
                pool.add(req)

        pool.wait()
        pool.clean()
示例#7
0
    def execute(self, slot, subindex, rroi, result):
        key = roiToSlice(rroi.start,rroi.stop)

        cnt = 0
        written = 0
        start, stop = roi.sliceToRoi(key, self.outputs["Output"].meta.shape)
        assert (stop<=self.outputs["Output"].meta.shape).all()
        #axisindex = self.inputs["AxisIndex"].value
        flag = self.inputs["AxisFlag"].value
        axisindex = self.outputs["Output"].meta.axistags.index(flag)
        #ugly-ugly-ugly
        oldkey = list(key)
        oldkey.pop(axisindex)
        
        #print "STACKER: ", flag, axisindex
        #print "requesting an outslot from stacker:", key, result.shape
        #print "input slots total: ", len(self.inputs['Images'])
        requests = []
        
        pool = RequestPool()

        for i, inSlot in enumerate(self.inputs['Images']):
            req = None
            inTagKeys = [ax.key for ax in inSlot.meta.axistags]
            if flag in inTagKeys:
                slices = inSlot.meta.shape[axisindex]
                if cnt + slices >= start[axisindex] and start[axisindex]-cnt<slices and start[axisindex]+written<stop[axisindex]:
                    begin = 0
                    if cnt < start[axisindex]:
                        begin = start[axisindex] - cnt
                    end = slices
                    if cnt + end > stop[axisindex]:
                        end -= cnt + end - stop[axisindex]
                    key_ = copy.copy(oldkey)
                    key_.insert(axisindex, slice(begin, end, None))
                    reskey = [slice(None, None, None) for x in range(len(result.shape))]
                    reskey[axisindex] = slice(written, written+end-begin, None)

                    req = inSlot[tuple(key_)].writeInto(result[tuple(reskey)])
                    written += end - begin
                cnt += slices
            else:
                if cnt>=start[axisindex] and start[axisindex] + written < stop[axisindex]:
                    #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey
                    #print "result: ", result.shape, "inslot:", inSlot.meta.shape
                    reskey = [slice(None, None, None) for s in oldkey]
                    reskey.insert(axisindex, written)
                    destArea = result[tuple(reskey)]
                    req = inSlot[tuple(oldkey)].writeInto(destArea)
                    written += 1
                cnt += 1

            if req is not None:
                pool.add(req)

        pool.wait()
        pool.clean()
示例#8
0
    def execute(self, slot, subindex, roi, result):
        t1 = time.time()
        key = roi.toSlice()
        nlabels=self.inputs["LabelsCount"].value

        traceLogger.debug("OpPredictRandomForest: Requesting classifier. roi={}".format(roi))
        forests=self.inputs["Classifier"][:].wait()

        if forests is None or any(x is None for x in forests):
            # Training operator may return 'None' if there was no data to train with
            return numpy.zeros(numpy.subtract(roi.stop, roi.start), dtype=numpy.float32)[...]

        traceLogger.debug("OpPredictRandomForest: Got classifier")
        #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels)

        newKey = key[:-1]
        newKey += (slice(0,self.inputs["Image"].meta.shape[-1],None),)

        res = self.inputs["Image"][newKey].wait()

        shape=res.shape
        prod = numpy.prod(shape[:-1])
        res.shape = (prod, shape[-1])
        features=res

        predictions = [0]*len(forests)

        def predict_forest(number):
            predictions[number] = forests[number].predictProbabilities(numpy.asarray(features, dtype=numpy.float32))

        t2 = time.time()

        # predict the data with all the forests in parallel
        pool = RequestPool()

        for i,f in enumerate(forests):
            req = pool.request(partial(predict_forest, i))

        pool.wait()
        pool.clean()

        prediction=numpy.dstack(predictions)
        prediction = numpy.average(prediction, axis=2)
        prediction.shape =  shape[:-1] + (forests[0].labelCount(),)
        #prediction = prediction.reshape(*(shape[:-1] + (forests[0].labelCount(),)))

        # If our LabelsCount is higher than the number of labels in the training set,
        # then our results aren't really valid.  FIXME !!!
        # Duplicate the last label's predictions
        for c in range(result.shape[-1]):
            result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)]

        t3 = time.time()

        self.logger.debug("predict roi=%r took %fseconds, actual RF time was %fs, feature time was %fs" % (key, t3-t1, t3-t2, t2-t1))
        
        return result
    def execute(self, slot, subindex, roi, result):

        featList = []
        labelsList = []

        for i in range(len(self.Labels)):
            feats = self.Features[i]([]).wait()

            # TODO: we should be able to use self.Labels[i].value,
            # but the current implementation of Slot.value() does not
            # do the right thing.
            labels = self.Labels[i]([]).wait()

            featstmp, labelstmp = make_feature_array(feats, labels)
            featList.append(featstmp)
            labelsList.append(labelstmp)

        featMatrix = _concatenate(featList, axis=0)
        labelsMatrix = _concatenate(labelsList, axis=0)
        print "training on matrix:", featMatrix.shape, featMatrix.dtype

        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            result[:] = None
            return
        oob = [0] * self.ForestCount.value
        try:
            # Ensure there are no NaNs in the feature matrix
            # TODO: There should probably be a better way to fix this...
            featMatrix = numpy.asarray(featMatrix, dtype=numpy.float32)
            nanFeatMatrix = numpy.isnan(featMatrix)
            if nanFeatMatrix.any():
                warnings.warn("Feature matrix has NaN values!  Replacing with 0.0...")
                featMatrix[numpy.where(nanFeatMatrix)] = 0.0
            # train and store forests in parallel
            pool = RequestPool()
            for i in range(self.ForestCount.value):
                def train_and_store(number):
                    result[number] = vigra.learning.RandomForest(self._tree_count)
                    oob[number] = result[number].learnRF(featMatrix, numpy.asarray(labelsMatrix, dtype=numpy.uint32))
                    print "intermediate oob:", oob[number]
                req = Request( partial(train_and_store, i) )
                pool.add( req )
            pool.wait()
            pool.clean()
        except:
            print ("couldn't learn classifier")
            raise
        oob_total = numpy.mean(oob)
        print "training finished, out of bag error:", oob_total
        return result
示例#10
0
    def predict(cls, X, method="classic"):
        """
        predict if the histograms in X correspond to missing regions
        do this for subsets of X in parallel
        """

        if cls._manager is None:
            cls._manager = SVMManager()

        assert len(
            X.shape
        ) == 2, "Prediction data must have shape (nSamples, nHistogramBins)."

        nBins = X.shape[1]

        if method == "classic":
            svm = PseudoSVC()
        else:
            try:
                svm = cls._manager.get(nBins)
            except SVMManager.NotTrainedError:
                # fail gracefully if not trained => responsibility of user!
                svm = PseudoSVC()

        y = np.zeros((len(X), )) * np.nan

        pool = RequestPool()

        chunkSize = 1000  # FIXME magic number??
        nChunks = len(X) // chunkSize + (1 if len(X) % chunkSize > 0 else 0)

        s = [
            slice(k * chunkSize, min((k + 1) * chunkSize, len(X)))
            for k in range(nChunks)
        ]

        def partFun(i):
            y[s[i]] = svm.predict(X[s[i]])

        for i in range(nChunks):
            req = Request(partial(partFun, i))
            pool.add(req)

        pool.wait()
        pool.clean()

        # not neccessary
        # assert not np.any(np.isnan(y))
        return np.asarray(y)
    def predict(cls, X, method='classic'):
        """
        predict if the histograms in X correspond to missing regions
        do this for subsets of X in parallel
        """

        if cls._manager is None:
            cls._manager = SVMManager()

        # svm input has to be (nSamples, nFeatures) -> for us: (nSampels = len(X), nFeatures = # of histogrambins )
        X_reshaped = np.zeros((len(X), len(X[0])))
        for i in range(len(X)):
            X_reshaped[i, :] = X[i]

        n_bins = len(X[0])

        if method == 'classic' or not have_sklearn:
            logger.warning("no real svm used! -> PseudoSVC")
            svm = PseudoSVC()
        else:
            # load samples for histograms of labeled regions
            try:
                svm = cls._manager.get(n_bins)
            except SVMManager.NotTrainedError:
                # fail gracefully if not trained => responsibility of user!
                svm = PseudoSVC()

        y = np.zeros((len(X),)) * np.nan

        pool = RequestPool()

        # chunk up all samples from X into chunks that will be predicted in parallel
        chunk_size = 1000  # FIXME magic number??
        n_chunks = len(X)/chunk_size + (1 if len(X) % chunk_size > 0 else 0)

        s = [slice(k * chunk_size, min((k + 1) * chunk_size, len(X)))
             for k in range(n_chunks)]

        def partFun(i):
            y[s[i]] = svm.predict(X_reshaped[s[i], :])

        for i in range(n_chunks):
            req = Request(partial(partFun, i))
            pool.add(req)

        pool.wait()
        pool.clean()
        return np.asarray(y)
示例#12
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                shape = self.Input.meta.shape
                # self.blockshape has None in the last dimension to indicate that it should not be
                # handled block-wise. None is replaced with the image shape in the respective axis.
                fullBlockShape = []
                for u, v in zip(self.blockShape.value, shape):
                    if u is not None:
                        fullBlockShape.append(u)
                    else:
                        fullBlockShape.append(v)
                fullBlockShape = numpy.array(fullBlockShape,
                                             dtype=numpy.float64)

                # data = self.inputs["Input"][:].wait()
                # split up requests into blocks

                numBlocks = numpy.ceil(shape / fullBlockShape).astype("int")
                blockCache = numpy.ndarray(shape=numpy.prod(numBlocks),
                                           dtype=self.Output.meta.dtype)
                pool = RequestPool()
                # blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(
                    *[list(range(i)) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                fun = self.inputs["Function"].value

                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i, f in enumerate(blockCache):
                    req = pool.request(partial(predict_block, i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
示例#13
0
    def predict(cls, X, method='classic'):
        '''
        predict if the histograms in X correspond to missing regions
        do this for subsets of X in parallel
        '''

        if cls._manager is None:
            cls._manager = SVMManager()

        assert len(X.shape) == 2, \
            "Prediction data must have shape (nSamples, nHistogramBins)."

        nBins = X.shape[1]

        if method == 'classic' or not havesklearn:
            svm = PseudoSVC()
        else:
            try:
                svm = cls._manager.get(nBins)
            except SVMManager.NotTrainedError:
                # fail gracefully if not trained => responsibility of user!
                svm = PseudoSVC()

        y = np.zeros((len(X),))*np.nan

        pool = RequestPool()

        chunkSize = 1000  # FIXME magic number??
        nChunks = len(X)//chunkSize + (1 if len(X) % chunkSize > 0 else 0)

        s = [slice(k*chunkSize, min((k+1)*chunkSize, len(X)))
             for k in range(nChunks)]

        def partFun(i):
            y[s[i]] = svm.predict(X[s[i]])

        for i in range(nChunks):
            req = Request(partial(partFun, i))
            pool.add(req)

        pool.wait()
        pool.clean()

        # not neccessary
        #assert not np.any(np.isnan(y))
        return np.asarray(y)
示例#14
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                shape = self.Input.meta.shape
                # self.blockshape has None in the last dimension to indicate that it should not be
                # handled block-wise. None is replaced with the image shape in the respective axis.
                fullBlockShape = []
                for u, v in zip(self.blockShape.value, shape):
                    if u is not None:
                        fullBlockShape.append(u)
                    else:
                        fullBlockShape.append(v)
                fullBlockShape = numpy.array(fullBlockShape, dtype=numpy.float64)

                #data = self.inputs["Input"][:].wait()
                #split up requests into blocks

                numBlocks = numpy.ceil(shape / fullBlockShape).astype("int")
                blockCache = numpy.ndarray(shape = numpy.prod(numBlocks), dtype=self.Output.meta.dtype)
                pool = RequestPool()
                #blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(*[list(range(i)) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                fun = self.inputs["Function"].value
                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i,f in enumerate(blockCache):
                    req = pool.request(partial(predict_block,i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
示例#15
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                fullBlockShape = numpy.array(
                    [self.blockShape.value for i in self.Input.meta.shape])
                fun = self.inputs["Function"].value
                #data = self.inputs["Input"][:].wait()
                #split up requests into blocks
                shape = self.Input.meta.shape
                numBlocks = numpy.ceil(shape /
                                       (1.0 * fullBlockShape)).astype("int")
                blockCache = numpy.ndarray(shape=numpy.prod(numBlocks),
                                           dtype=self.Output.meta.dtype)
                pool = RequestPool()
                #blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(*[range(i) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i, f in enumerate(blockCache):
                    req = pool.request(partial(predict_block, i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
示例#16
0
    def execute(self, slot, subindex, rroi, result):
        assert slot == self.Features or slot == self.Output
        if slot == self.Features:
            key = roiToSlice(rroi.start, rroi.stop)
            index = subindex[0]
            key = list(key)
            channelIndex = self.Input.meta.axistags.index('c')
            
            # Translate channel slice to the correct location for the output slot.
            key[channelIndex] = slice(self.featureOutputChannels[index][0] + key[channelIndex].start,
                                      self.featureOutputChannels[index][0] + key[channelIndex].stop)
            rroi = SubRegion(self.Output, pslice=key)

            # Get output slot region for this channel
            return self.execute(self.Output, (), rroi, result)
        elif slot == self.outputs["Output"]:
            key = rroi.toSlice()
            
            logger.debug("OpPixelFeaturesPresmoothed: request %s" % (rroi.pprint(),))
            
            cnt = 0
            written = 0
            assert (rroi.stop<=self.outputs["Output"].meta.shape).all()
            flag = 'c'
            channelAxis=self.inputs["Input"].meta.axistags.index('c')
            axisindex = channelAxis
            oldkey = list(key)
            oldkey.pop(axisindex)


            inShape  = self.inputs["Input"].meta.shape
            hasChannelAxis = (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) > 0)
            #if (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0):
            #    noChannels = True
            inAxistags = self.inputs["Input"].meta.axistags
                
            shape = self.outputs["Output"].meta.shape
            axistags = self.outputs["Output"].meta.axistags

            result = result.view(vigra.VigraArray)
            result.axistags = copy.copy(axistags)


            hasTimeAxis = self.inputs["Input"].meta.axistags.axisTypeCount(vigra.AxisType.Time)
            timeAxis=self.inputs["Input"].meta.axistags.index('t')

            subkey = popFlagsFromTheKey(key,axistags,'c')
            subshape=popFlagsFromTheKey(shape,axistags,'c')
            at2 = copy.copy(axistags)
            at2.dropChannelAxis()
            subshape=popFlagsFromTheKey(subshape,at2,'t')
            subkey = popFlagsFromTheKey(subkey,at2,'t')

            oldstart, oldstop = roi.sliceToRoi(key, shape)

            start, stop = roi.sliceToRoi(subkey,subkey)
            maxSigma = max(0.7,self.maxSigma)  #we use 0.7 as an approximation of not doing any smoothing
            #smoothing was already applied previously
            
            # The region of the smoothed image we need to give to the feature filter (in terms of INPUT coordinates)
            # 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood 
            vigOpSourceStart, vigOpSourceStop = roi.enlargeRoiForHalo(start, stop, subshape, 0.7, self.WINDOW_SIZE)
            
            
            # The region of the input that we need to give to the smoothing operator (in terms of INPUT coordinates)
            newStart, newStop = roi.enlargeRoiForHalo(vigOpSourceStart, vigOpSourceStop, subshape, maxSigma, self.WINDOW_SIZE)
            
            newStartSmoother = roi.TinyVector(start - vigOpSourceStart)
            newStopSmoother = roi.TinyVector(stop - vigOpSourceStart)
            roiSmoother = roi.roiToSlice(newStartSmoother, newStopSmoother)

            # Translate coordinates (now in terms of smoothed image coordinates)
            vigOpSourceStart = roi.TinyVector(vigOpSourceStart - newStart)
            vigOpSourceStop = roi.TinyVector(vigOpSourceStop - newStart)

            readKey = roi.roiToSlice(newStart, newStop)

            writeNewStart = start - newStart
            writeNewStop = writeNewStart +  stop - start

            treadKey=list(readKey)

            if hasTimeAxis:
                if timeAxis < channelAxis:
                    treadKey.insert(timeAxis, key[timeAxis])
                else:
                    treadKey.insert(timeAxis-1, key[timeAxis])
            if  self.inputs["Input"].meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0:
                treadKey =  popFlagsFromTheKey(treadKey,axistags,'c')
            else:
                treadKey.insert(channelAxis, slice(None,None,None))

            treadKey=tuple(treadKey)

            req = self.inputs["Input"][treadKey]
            
            sourceArray = req.wait()
            req.clean()
            #req.result = None
            req.destination = None
            if sourceArray.dtype != numpy.float32:
                sourceArrayF = sourceArray.astype(numpy.float32)
                try:
                    sourceArray.resize((1,), refcheck = False)
                except:
                    pass
                del sourceArray
                sourceArray = sourceArrayF
                
            #if (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0):
                #add a channel dimension to make the code afterwards more uniform
            #    sourceArray = sourceArray.view(numpy.ndarray)
            #    sourceArray = sourceArray.reshape(sourceArray.shape+(1,))
            sourceArrayV = sourceArray.view(vigra.VigraArray)
            sourceArrayV.axistags =  copy.copy(inAxistags)
            
            dimCol = len(self.scales)
            dimRow = self.matrix.shape[0]

            sourceArraysForSigmas = [None]*dimCol

            #connect individual operators
            try:
                for j in range(dimCol):
                    hasScale = False
                    for i in range(dimRow):
                        if self.matrix[i,j]:
                            hasScale = True
                    if not hasScale:
                        continue
                    destSigma = 1.0
                    if self.scales[j] > destSigma:
                        tempSigma = math.sqrt(self.scales[j]**2 - destSigma**2)
                    else:
                        destSigma = 0.0
                        tempSigma = self.scales[j]
                    vigOpSourceShape = list(vigOpSourceStop - vigOpSourceStart)
                                        
                    if hasTimeAxis:
                        if timeAxis < channelAxis:
                            vigOpSourceShape.insert(timeAxis, ( oldstop - oldstart)[timeAxis])
                        else:
                            vigOpSourceShape.insert(timeAxis-1, ( oldstop - oldstart)[timeAxis])
                        vigOpSourceShape.insert(channelAxis, inShape[channelAxis])
    
                        sourceArraysForSigmas[j] = numpy.ndarray(tuple(vigOpSourceShape),numpy.float32)
                        
                        for i,vsa in enumerate(sourceArrayV.timeIter()):
                            droi = (tuple(vigOpSourceStart._asint()), tuple(vigOpSourceStop._asint()))
                            tmp_key = getAllExceptAxis(len(sourceArraysForSigmas[j].shape),timeAxis, i) 
                            sourceArraysForSigmas[j][tmp_key] = self._computeGaussianSmoothing(vsa, tempSigma, droi)

                    else:
                        droi = (tuple(vigOpSourceStart._asint()), tuple(vigOpSourceStop._asint()))                            
                        sourceArraysForSigmas[j] = self._computeGaussianSmoothing(sourceArrayV, tempSigma, droi)
            
            except RuntimeError as e:
                if e.message.find('kernel longer than line') > -1:
                    message = "Feature computation error:\nYour image is too small to apply a filter with sigma=%.1f. Please select features with smaller sigmas." % self.scales[j]
                    raise RuntimeError(message)
                else:
                    raise e

            del sourceArrayV
            try:
                sourceArray.resize((1,), refcheck = False)
            except ValueError:
                # Sometimes this fails, but that's okay.
                logger.debug("Failed to free array memory.")                
            del sourceArray

            closures = []

            #connect individual operators
            for i in range(dimRow):
                for j in range(dimCol):
                    val=self.matrix[i,j]
                    if val:
                        vop= self.featureOps[i][j]
                        oslot = vop.outputs["Output"]
                        req = None
                        #inTagKeys = [ax.key for ax in oslot.meta.axistags]
                        #print inTagKeys, flag
                        if hasChannelAxis:
                            slices = oslot.meta.shape[axisindex]
                            if cnt + slices >= rroi.start[axisindex] and rroi.start[axisindex]-cnt<slices and rroi.start[axisindex]+written<rroi.stop[axisindex]:
                                begin = 0
                                if cnt < rroi.start[axisindex]:
                                    begin = rroi.start[axisindex] - cnt
                                end = slices
                                if cnt + end > rroi.stop[axisindex]:
                                    end -= cnt + end - rroi.stop[axisindex]
                                key_ = copy.copy(oldkey)
                                key_.insert(axisindex, slice(begin, end, None))
                                reskey = [slice(None, None, None) for x in range(len(result.shape))]
                                reskey[axisindex] = slice(written, written+end-begin, None)
                                
                                destArea = result[tuple(reskey)]
                                #readjust the roi for the new source array
                                roiSmootherList = list(roiSmoother)
                                
                                roiSmootherList.insert(axisindex, slice(begin, end, None))
                                
                                if hasTimeAxis:
                                    # The time slice in the ROI doesn't matter:
                                    # The sourceArrayParameter below overrides the input data to be used.
                                    roiSmootherList.insert(timeAxis, 0)
                                roiSmootherRegion = SubRegion(oslot, pslice=roiSmootherList)
                                
                                closure = partial(oslot.operator.execute, oslot, (), roiSmootherRegion, destArea, sourceArray = sourceArraysForSigmas[j])
                                closures.append(closure)

                                written += end - begin
                            cnt += slices
                        else:
                            if cnt>=rroi.start[axisindex] and rroi.start[axisindex] + written < rroi.stop[axisindex]:
                                reskey = [slice(None, None, None) for x in range(len(result.shape))]
                                slices = oslot.meta.shape[axisindex]
                                reskey[axisindex]=slice(written, written+slices, None)
                                #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey, "resshape:", result.shape
                                #print "roiSmoother:", roiSmoother
                                destArea = result[tuple(reskey)]
                                #print "destination area:", destArea.shape
                                logger.debug(oldkey, destArea.shape, sourceArraysForSigmas[j].shape)
                                oldroi = SubRegion(oslot, pslice=oldkey)
                                #print "passing roi:", oldroi
                                closure = partial(oslot.operator.execute, oslot, (), oldroi, destArea, sourceArray = sourceArraysForSigmas[j])
                                closures.append(closure)

                                written += 1
                            cnt += 1
            pool = RequestPool()
            for c in closures:
                r = pool.request(c)
            pool.wait()
            pool.clean()

            for i in range(len(sourceArraysForSigmas)):
                if sourceArraysForSigmas[i] is not None:
                    try:
                        sourceArraysForSigmas[i].resize((1,))
                    except:
                        sourceArraysForSigmas[i] = None
示例#17
0
    def execute(self, slot, subindex, roi, result):

        progress = 0
        numImages = len(self.Images)
        self.progressSignal(progress)
        featMatrix=[]
        labelsMatrix=[]
        tagList = []

        
        #result[0] = self._svr

        for i,labels in enumerate(self.inputs["ForegroundLabels"]):
            if labels.meta.shape is not None:
                opGaussian = OpGaussianSmoothing(parent = self, graph = self.graph)
                opGaussian.Sigma.setValue(self.Sigma.value)
                opGaussian.Input.connect(self.ForegroundLabels[i])
                blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait()
                
                reqlistlabels = []
                reqlistbg = []
                reqlistfeat = []
                progress += 10 / numImages
                self.progressSignal(progress)
                
                for b in blocks[0]:
                    request = opGaussian.Output[b]
                    #request = labels[b]
                    featurekey = list(b)
                    featurekey[-1] = slice(None, None, None)
                    request2 = self.Images[i][featurekey]
                    request3 = self.inputs["BackgroundLabels"][i][b]
                    reqlistlabels.append(request)
                    reqlistfeat.append(request2)
                    reqlistbg.append(request3)

                traceLogger.debug("Requests prepared")

                numLabelBlocks = len(reqlistlabels)
                progress_outer = [progress]
                if numLabelBlocks > 0:
                    progressInc = (80 - 10)/(numLabelBlocks * numImages)

                def progressNotify(req):
                    progress_outer[0] += progressInc/2
                    self.progressSignal(progress_outer[0])

                for ir, req in enumerate(reqlistfeat):
                    req.notify_finished(progressNotify)
                    req.submit()

                for ir, req in enumerate(reqlistlabels):
                    req.notify_finished(progressNotify)
                    req.submit()

                for ir, req in enumerate(reqlistbg):
                    req.notify_finished(progressNotify)
                    req.submit()
                
                traceLogger.debug("Requests fired")
                

                #Fixme: Maybe later request only part of the region?

                #image=self.inputs["Images"][i][:].wait()
                for ir, req in enumerate(reqlistlabels):
                    
                    labblock = req.wait()
                    
                    image = reqlistfeat[ir].wait()
                    labbgblock = reqlistbg[ir].wait()
                    labblock = labblock.reshape((image.shape[:-1]))
                    image = image.reshape((-1, image.shape[-1]))
                    labbgindices = np.where(labbgblock == 2)            
                    labbgindices = np.ravel_multi_index(labbgindices, labbgblock.shape)
                    
                    newDot, mapping, tags = \
                    self._svr.prepareDataRefactored(labblock, labbgindices)
                    #self._svr.prepareData(labblock, smooth = True)

                    labels   = newDot[mapping]
                    features = image[mapping]

                    featMatrix.append(features)
                    labelsMatrix.append(labels)
                    tagList.append(tags)
                
                progress = progress_outer[0]

                traceLogger.debug("Requests processed")


        self.progressSignal(80 / numImages)
        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            result[:] = None

        else:
            posTags = [tag[0] for tag in tagList]
            negTags = [tag[1] for tag in tagList]
            numPosTags = np.sum(posTags)
            numTags = np.sum(posTags) + np.sum(negTags)
            fullFeatMatrix = np.ndarray((numTags, self.Images[0].meta.shape[-1]), dtype = np.float64)
            fullLabelsMatrix = np.ndarray((numTags), dtype = np.float64)
            fullFeatMatrix[:] = np.NAN
            fullLabelsMatrix[:] = np.NAN
            currPosCount = 0
            currNegCount = numPosTags
            for i, posCount in enumerate(posTags):
                fullFeatMatrix[currPosCount:currPosCount + posTags[i],:] = featMatrix[i][:posCount,:]
                fullLabelsMatrix[currPosCount:currPosCount + posTags[i]] = labelsMatrix[i][:posCount]
                fullFeatMatrix[currNegCount:currNegCount + negTags[i],:] = featMatrix[i][posCount:,:]
                fullLabelsMatrix[currNegCount:currNegCount + negTags[i]] = labelsMatrix[i][posCount:]
                currPosCount += posTags[i]
                currNegCount += negTags[i]


            assert(not np.isnan(np.sum(fullFeatMatrix)))

            fullTags = [np.sum(posTags), np.sum(negTags)]
            #pool = RequestPool()

            maxima = np.max(fullFeatMatrix, axis=0)
            minima = np.min(fullFeatMatrix, axis=0)
            normalizationFactors = (minima,maxima)
            



            boxConstraintList = []
            boxConstraints = None
            if self.BoxConstraintRois.ready() and self.BoxConstraintValues.ready():
                for i, slot in enumerate(zip(self.BoxConstraintRois,self.BoxConstraintValues)):
                    for constr, val in zip(slot[0].value, slot[1].value):
                        boxConstraintList.append((i, constr, val))
                if len(boxConstraintList) > 0:
                    boxConstraints = self.constructBoxConstraints(boxConstraintList)

            params = self._svr.get_params() 
            try:
                pool = RequestPool()
                def train_and_store(i):
                    result[i] = SVR(minmax = normalizationFactors, **params)
                    result[i].fitPrepared(fullFeatMatrix, fullLabelsMatrix, tags = fullTags, boxConstraints = boxConstraints, numRegressors
                         = self.numRegressors, trainAll = False)
                for i in range(self.numRegressors):
                    req = pool.request(partial(train_and_store, i))
                
                pool.wait()
                pool.clean()
            
            except:
                logger.error("ERROR: could not learn regressor")
                logger.error("fullFeatMatrix shape = {}, dtype = {}".format(fullFeatMatrix.shape, fullFeatMatrix.dtype) )
                logger.error("fullLabelsMatrix shape = {}, dtype = {}".format(fullLabelsMatrix.shape, fullLabelsMatrix.dtype) )
                raise
            finally:
                self.progressSignal(100) 

        return result
示例#18
0
    def _execute_graphcut(self, roi, result):
        for i in (0, 4):
            assert roi.stop[i] - roi.start[i] == 1,\
                "Invalid roi for graph-cut: {}".format(str(roi))
        t = roi.start[0]
        c = roi.start[4]

        margin = self.Margin.value
        beta = self.Beta.value
        MAXBOXSIZE = 10000000  # FIXME justification??

        ## request the bounding box coordinates ##
        # the trailing index brackets give us the dictionary (instead of an
        # array of size 1)
        feats = self.BoundingBoxes.get(roi).wait()
        mins = feats["Coord<Minimum>"]
        maxs = feats["Coord<Maximum>"]
        nobj = mins.shape[0]
        # these are indices, so they should have an index datatype
        mins = mins.astype(np.uint32)
        maxs = maxs.astype(np.uint32)

        ## request the prediction image ##
        pred = self.Prediction.get(roi).wait()
        pred = vigra.taggedView(pred, axistags=self.Prediction.meta.axistags)
        pred = pred.withAxes(*'xyz')

        ## request the connected components image ##
        cc = self.LabelImage.get(roi).wait()
        cc = vigra.taggedView(cc, axistags=self.LabelImage.meta.axistags)
        cc = cc.withAxes(*'xyz')

        # provide xyz view for the output (just need 8bit for segmentation
        resultXYZ = vigra.taggedView(np.zeros(cc.shape, dtype=np.uint8),
                                     axistags='xyz')

        def processSingleObject(i):
            logger.debug("processing object {}".format(i))
            # maxs are inclusive, so we need to add 1
            xmin = max(mins[i][0] - margin[0], 0)
            ymin = max(mins[i][1] - margin[1], 0)
            zmin = max(mins[i][2] - margin[2], 0)
            xmax = min(maxs[i][0] + margin[0] + 1, cc.shape[0])
            ymax = min(maxs[i][1] + margin[1] + 1, cc.shape[1])
            zmax = min(maxs[i][2] + margin[2] + 1, cc.shape[2])
            ccbox = cc[xmin:xmax, ymin:ymax, zmin:zmax]
            resbox = resultXYZ[xmin:xmax, ymin:ymax, zmin:zmax]

            nVoxels = ccbox.size
            if nVoxels > MAXBOXSIZE:
                #problem too large to run graph cut, assign to seed
                logger.warn("Object {} too large for graph cut.".format(i))
                resbox[ccbox == i] = 1
                return

            probbox = pred[xmin:xmax, ymin:ymax, zmin:zmax]
            gcsegm = segmentGC(probbox, beta)
            gcsegm = vigra.taggedView(gcsegm, axistags='xyz')
            ccsegm = vigra.analysis.labelVolumeWithBackground(
                gcsegm.astype(np.uint8))

            # Extended bboxes of different objects might overlap.
            # To avoid conflicting segmentations, we find all connected
            # components in the results and only take the one, which
            # overlaps with the object "core" or "seed", defined by the
            # pre-thresholding
            seed = ccbox == i
            filtered = seed * ccsegm
            passed = vigra.analysis.unique(filtered.astype(np.uint32))
            assert len(passed.shape) == 1
            if passed.size > 2:
                logger.warn("ambiguous label assignment for region {}".format(
                    (xmin, xmax, ymin, ymax, zmin, zmax)))
                resbox[ccbox == i] = 1
            elif passed.size <= 1:
                logger.warn("box {} segmented out with beta {}".format(
                    i, beta))
            else:
                # assign to the overlap region
                label = passed[1]  # 0 is background
                resbox[ccsegm == label] = 1

        pool = RequestPool()
        #FIXME make sure that the parallel computations fit into memory
        for i in range(1, nobj):
            req = Request(functools.partial(processSingleObject, i))
            pool.add(req)

        logger.info("Processing {} objects ...".format(nobj - 1))

        pool.wait()
        pool.clean()

        logger.info("object loop done")

        # prepare result
        resView = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        resView = resView.withAxes(*'xyz')

        # some labels could have been removed => relabel
        vigra.analysis.labelVolumeWithBackground(resultXYZ, out=resView)
示例#19
0
    def execute(self, slot, subindex, roi, result):
        assert slot in [self.Predictions,
                        self.Probabilities,
                        self.CachedProbabilities,
                        self.ProbabilityChannels,
                        self.BadObjects]

        times = roi._l
        if len(times) == 0:
            # we assume that 0-length requests are requesting everything
            times = range(self.Predictions.meta.shape[0])

        if slot is self.CachedProbabilities:
            return {t: self.prob_cache[t] for t in times if t in self.prob_cache}

        forests=self.inputs["Classifier"][:].wait()
        if forests is None or forests[0] is None:
            # this happens if there was no data to train with
            return dict((t, numpy.array([])) for t in times)

        feats = {}
        prob_predictions = {}

        selected = self.SelectedFeatures([]).wait()

        # FIXME: self.prob_cache is shared, so we need to block.
        # However, this makes prediction single-threaded.
        self.lock.acquire()
        try:
            for t in times:
                if t in self.prob_cache:
                    continue

                tmpfeats = self.Features([t]).wait()
                ftmatrix, _, col_names = make_feature_array(tmpfeats, selected)
                rows, cols = replace_missing(ftmatrix)
                self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0],))
                self.bad_objects[t][rows] = 1
                feats[t] = ftmatrix
                prob_predictions[t] = [0] * len(forests)

            def predict_forest(_t, forest_index):
                # Note: We can't use RandomForest.predictLabels() here because we're training in parallel,
                #        and we have to average the PROBABILITIES from all forests.
                #       Averaging the label predictions from each forest is NOT equivalent.
                #       For details please see wikipedia:
                #       http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote
                #       (^-^)
                prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t].astype(numpy.float32))

            # predict the data with all the forests in parallel
            pool = RequestPool()
            for t in times:
                if t in self.prob_cache:
                    continue
                for i, f in enumerate(forests):
                    req = Request( partial(predict_forest, t, i) )
                    pool.add(req)

            pool.wait()
            pool.clean()

            for t in times:
                if t not in self.prob_cache:
                    # prob_predictions is a dict-of-lists-of-arrays, indexed as follows:
                    # prob_predictions[t][forest_index][object_index, class_index]

                    # Stack the forests together and average them.
                    stacked_predictions = numpy.array( prob_predictions[t] )
                    averaged_predictions = numpy.average( stacked_predictions, axis=0 )
                    assert averaged_predictions.shape[0] == len(feats[t])
                    self.prob_cache[t] = averaged_predictions

                    self.prob_cache[t][0] = 0 # Background probability is always zero


            if slot == self.Probabilities:
                return { t : self.prob_cache[t] for t in times }
            elif slot == self.Predictions:
                # FIXME: Support SegmentationThreshold again...
                labels = dict()
                for t in times:
                    prob_sum = numpy.sum(self.prob_cache[t], axis=1)
                    labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1)
                    labels[t][0] = 0 # Background gets the zero label
                
                return labels

            elif slot == self.ProbabilityChannels:
                try:
                    prob_single_channel = {t: self.prob_cache[t][:, subindex[0]]
                                           for t in times}
                except:
                    # no probabilities available for this class; return zeros
                    prob_single_channel = {t: numpy.zeros((self.prob_cache[t].shape[0], 1))
                                           for t in times}
                return prob_single_channel

            elif slot == self.BadObjects:
                return { t : self.bad_objects[t] for t in times }

            else:
                assert False, "Unknown input slot"
        finally:
            self.lock.release()
示例#20
0
    def execute(self, slot, subindex, roi, result):
        featList = []
        all_col_names = []
        labelsList = []

        # will be available at slot self.Warnings
        all_bad_objects = defaultdict(lambda: defaultdict(list))
        all_bad_feats = set()

        selected = self.SelectedFeatures([]).wait()
        if len(selected)==0:
            # no features - no predictions
            self.Classifier.setValue(None)
            return
        
        for i in range(len(self.Labels)):
            # this loop is by image, not time! 

            # TODO: we should be able to use self.Labels[i].value,
            # but the current implementation of Slot.value() does not
            # do the right thing.
            labels_image = self.Labels[i]([]).wait()
            labels_image_filtered = {}
            nztimes = []
            for timestep, labels_time in labels_image.iteritems():
                nz = numpy.nonzero(labels_time)
                if len(nz[0])==0:
                    continue
                else:
                    nztimes.append(timestep)
                    labels_image_filtered[timestep] = labels_time

            if len(nztimes)==0:
                continue
            # compute the features if there are nonzero labels in this image
            # and only for the time steps, which have labels
            feats = self.Features[i](nztimes).wait()

            featstmp, row_names, col_names, labelstmp = make_feature_array(feats, selected, labels_image_filtered)
            if labelstmp.size == 0 or featstmp.size == 0:
                continue

            rows, cols = replace_missing(featstmp)

            featList.append(featstmp)
            all_col_names.append(tuple(col_names))
            labelsList.append(labelstmp)

            for idx in rows:
                t, obj = row_names[idx]
                all_bad_objects[i][t].append(obj)

            for c in cols:
                all_bad_feats.add(col_names[c])


        if len(labelsList)==0:
            #no labels, return here
            self.Classifier.setValue(None)
            return
        
        
        self._warnBadObjects(all_bad_objects, all_bad_feats)

        if not len(set(all_col_names)) == 1:
            raise Exception('different time slices did not have same features.')

        featMatrix = _concatenate(featList, axis=0)
        labelsMatrix = _concatenate(labelsList, axis=0)
        
        logger.info("training on matrix of shape {}".format(featMatrix.shape))

        if featMatrix.size == 0 or labelsMatrix.size == 0:
            result[:] = None
            return
        oob = [0] * self.ForestCount.value
        try:
            # train and store forests in parallel
            pool = RequestPool()
            for i in range(self.ForestCount.value):
                def train_and_store(number):
                    result[number] = vigra.learning.RandomForest(self._tree_count)
                    oob[number] = result[number].learnRF(featMatrix.astype(numpy.float32), numpy.asarray(labelsMatrix, dtype=numpy.uint32))
                req = Request( partial(train_and_store, i) )
                pool.add( req )
            pool.wait()
            pool.clean()
        except:
            logger.warn("couldn't learn classifier")
            raise
        oob_total = numpy.mean(oob)
        logger.info("training finished, out of bag error: {}".format(oob_total))
        return result
示例#21
0
    def execute(self, slot, subindex, roi, result):
        progress = 0
        self.progressSignal(progress)
        numImages = len(self.Images)

        featMatrix=[]
        labelsMatrix=[]
        tagsMatrix = []

        result[0] = SVR(self.UnderMult.value, self.OverMult.value, limitDensity = True, **self.SelectedOption.value)
        for i,labels in enumerate(self.inputs["Labels"]):
            if labels.meta.shape is not None:
                #labels=labels[:].wait()
                blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait()

                progress += 10/numImages
                self.progressSignal(progress)

                reqlistlabels = []
                reqlistfeat = []
                traceLogger.debug("Sending requests for {} non-zero blocks (labels and data)".format( len(blocks[0])) )
                for b in blocks[0]:

                    request = labels[b]
                    featurekey = list(b)
                    featurekey[-1] = slice(None, None, None)
                    request2 = self.inputs["Images"][i][featurekey]

                    reqlistlabels.append(request)
                    reqlistfeat.append(request2)

                traceLogger.debug("Requests prepared")

                numLabelBlocks = len(reqlistlabels)
                progress_outer = [progress] # Store in list for closure access
                if numLabelBlocks > 0:
                    progressInc = (80-10)/numLabelBlocks/numImages

                def progressNotify(req):
                    # Note: If we wanted perfect progress reporting, we could use lock here
                    #       to protect the progress from being incremented simultaneously.
                    #       But that would slow things down and imperfect reporting is okay for our purposes.
                    progress_outer[0] += progressInc/2
                    self.progressSignal(progress_outer[0])

                for ir, req in enumerate(reqlistfeat):
                    image = req.notify_finished(progressNotify)

                for ir, req in enumerate(reqlistlabels):
                    labblock = req.notify_finished(progressNotify)

                traceLogger.debug("Requests fired")

                for ir, req in enumerate(reqlistlabels):
                    traceLogger.debug("Waiting for a label block...")
                    labblock = req.wait()

                    traceLogger.debug("Waiting for an image block...")
                    image = reqlistfeat[ir].wait()

                    newImg, newDot, mapping, tags = \
                    result[0].prepareData(image, labblock, sigma = self.Sigma.value, smooth = True, normalize = False)

                    features = newImg[mapping]
                    labbla = newDot[mapping]

                    #indexes=np.nonzero(labblock[...,0].view(np.ndarray))
                    #features=image[indexes]
                    #labbla=labblock[indexes]

                    featMatrix.append(features)
                    labelsMatrix.append(labbla)
                    tagsMatrix.append(tags)

                progress = progress_outer[0]

                traceLogger.debug("Requests processed")

        self.progressSignal(80/numImages)

        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            # If there was no actual data for the random forest to train with, we return None
            result[:] = None
        else:
            featMatrix=np.concatenate(featMatrix,axis=0)
            labelsMatrix=np.concatenate(labelsMatrix,axis=0)
            tagsMatrix=np.concatenate(tagsMatrix,axis=0)

            try:
                logger.debug("Learning with Vigra...")

                pool = RequestPool()

                #result[0].fitPrepared(featMatrix, labelsMatrix, tagsMatrix, self.Epsilon.value)
                req = pool.request(partial(result[0].fitPrepared, featMatrix, labelsMatrix, tagsMatrix, self.Epsilon.value))
                pool.wait()
                pool.clean()

                logger.debug("Vigra finished")
            except:
                logger.error( "ERROR: could not learn classifier" )
                logger.error( "featMatrix shape={}, max={}, dtype={}".format(featMatrix.shape, featMatrix.max(), featMatrix.dtype) )
                logger.error( "labelsMatrix shape={}, max={}, dtype={}".format(labelsMatrix.shape, labelsMatrix.max(), labelsMatrix.dtype ) )
                raise
            finally:
                self.progressSignal(100)

        return result
示例#22
0
def extractHistograms(volume, labels, patchSize=64, haloSize=0,
                      nBins=30, intRange=(0, 255), appendPositions=False):
    '''
    extracts histograms from 3d-volume
     - labels are
        0       ignore
        1       positive
        2       negative
     - histogram extraction is attempted to be done in parallel
     - patches that intersect with the volume border are discarded
     - volume and labels must be 3d, and in order 'zyx' (if not VigraArrays)
     - returns: np.ndarray, shape: (nSamples,nBins+1), last column is the label
    '''

    # progress reporter class, histogram extraction can take quite a long time
    class ProgressReporter(object):

        lock = None

        def __init__(self, nThreads):
            self.lock = ThreadLock()
            self.nThreads = nThreads
            self.status = np.zeros((nThreads,))

        def report(self, index):
            self.lock.acquire()
            self.status[index] = 1
            logger.debug("Finished threads: %d/%d." %
                         (self.status.sum(), len(self.status)))
            self.lock.release()

    # sanity checks
    assert len(volume.shape) == 3, "Volume must be 3d data"
    assert volume.shape == labels.shape,\
        "Volume and labels must have the same shape"

    try:
        volumeZYX = volume.withAxes(*'zyx')
        labelsZYX = labels.withAxes(*'zyx')
    except AttributeError:
        # can't blame me
        volumeZYX = volume
        labelsZYX = labels
        pass

    # compute actual patch size
    patchSize = patchSize + 2*haloSize

    # fill list of patch centers (VigraArray does not support bitwise_or)
    ind_z, ind_y, ind_x = np.where(
        (labelsZYX == 1).view(np.ndarray) | (labelsZYX == 2).view(np.ndarray))
    index = np.arange(len(ind_z))

    # prepare chunking of histogram centers
    chunkSize = 10000  # FIXME magic number??
    nChunks = len(index)//chunkSize + (1 if len(index) % chunkSize > 0 else 0)
    sliceList = [slice(k*chunkSize, min((k+1)*chunkSize, len(index)))
                 for k in range(nChunks)]
    histoList = [None]*nChunks

    # prepare subroutine for parallel extraction
    reporter = ProgressReporter(nChunks)

    #BEGIN subroutine
    def _extractHistogramsSub(itemList):

        xs = ind_x[itemList]
        ys = ind_y[itemList]
        zs = ind_z[itemList]

        ymin = ys - patchSize//2
        ymax = ymin + patchSize

        xmin = xs - patchSize//2
        xmax = xmin + patchSize

        validPatchIndices = np.where(
            np.all(
                (ymin >= 0,
                 xmin >= 0,
                 xmax <= volumeZYX.shape[2],
                 ymax <= volumeZYX.shape[1]),
                axis=0))[0]

        if appendPositions:
            out = np.zeros((len(validPatchIndices), nBins+4))
        else:
            out = np.zeros((len(validPatchIndices), nBins+1))

        for k, patchInd in enumerate(validPatchIndices):
            x = xs[patchInd]
            y = ys[patchInd]
            z = zs[patchInd]

            vol = volumeZYX[z, ymin[patchInd]:ymax[patchInd],
                            xmin[patchInd]:xmax[patchInd]]
            (out[k, :nBins], _) = np.histogram(
                vol, bins=nBins, range=intRange, density=True)
            out[k, nBins] = 1 if labelsZYX[z, y, x] == 1 else 0
            if appendPositions:
                out[k, nBins+1:] = [z, y, x]

        return out

    def partFun(i):
        itemList = index[sliceList[i]]
        histos = _extractHistogramsSub(itemList)
        histoList[i] = histos

        reporter.report(i)
    #END subroutine

    # pool the extraction requests
    pool = RequestPool()

    for i in range(nChunks):
        req = Request(partial(partFun, i))
        pool.add(req)

    pool.wait()
    pool.clean()

    return np.vstack(histoList)
示例#23
0
    def _felzenszwalbTraining(self, negative, positive):
        '''
        we want to train on a 'hard' subset of the training data, see
        FELZENSZWALB ET AL.: OBJECT DETECTION WITH DISCRIMINATIVELY TRAINED PART-BASED MODELS (4.4), PAMI 32/9
        '''

        #TODO sanity checks

        n = (self.PatchSize.value + self.HaloSize.value)**2
        method = self.DetectionMethod.value

        # set options for Felzenszwalb training
        firstSamples = self._felzenOpts["firstSamples"]
        maxRemovePerStep = self._felzenOpts["maxRemovePerStep"]
        maxAddPerStep = self._felzenOpts["maxAddPerStep"]
        maxSamples = self._felzenOpts["maxSamples"]
        nTrainingSteps = self._felzenOpts["nTrainingSteps"]

        # initial choice of training samples
        (initNegative, choiceNegative, _, _) = \
            _chooseRandomSubset(negative, min(firstSamples, len(negative)))
        (initPositive, choicePositive, _, _) = \
            _chooseRandomSubset(positive, min(firstSamples, len(positive)))

        # setup for parallel training
        samples = [negative, positive]
        choice = [choiceNegative, choicePositive]
        S_t = [initNegative, initPositive]

        finished = [False, False]

        ### BEGIN SUBROUTINE ###
        def felzenstep(x, cache, ind):

            case = ("positive" if ind > 0 else "negative") + " set"
            pred = self.predict(x, method=method)

            hard = np.where(pred != ind)[0]
            easy = np.setdiff1d(range(len(x)), hard)
            logger.debug(" {}: currently {} hard and {} easy samples".format(
                case, len(hard), len(easy)))

            # shrink the cache
            easyInCache = np.intersect1d(easy, cache) if len(easy) > 0 else []
            if len(easyInCache) > 0:
                (removeFromCache, _, _, _) = _chooseRandomSubset(
                    easyInCache, min(len(easyInCache), maxRemovePerStep))
                cache = np.setdiff1d(cache, removeFromCache)
                logger.debug(" {}: shrunk the cache by {} elements".format(
                    case, len(removeFromCache)))

            # grow the cache
            temp = len(cache)
            addToCache = _chooseRandomSubset(
                hard, min(len(hard), maxAddPerStep))[0]
            cache = np.union1d(cache, addToCache)
            addedHard = len(cache)-temp
            logger.debug(" {}: grown the cache by {} elements".format(
                case, addedHard))

            if len(cache) > maxSamples:
                logger.debug(
                    " {}: Cache to big, removing elements.".format(case))
                cache = _chooseRandomSubset(cache, maxSamples)[0]

            # apply the cache
            C = x[cache]

            return (C, cache, addedHard == 0)
        ### END SUBROUTINE ###

        ### BEGIN PARALLELIZATION FUNCTION ###
        def partFun(i):
            (C, newChoice, newFinished) = felzenstep(samples[i], choice[i], i)
            S_t[i] = C
            choice[i] = newChoice
            finished[i] = newFinished
        ### END PARALLELIZATION FUNCTION ###

        for k in range(nTrainingSteps):

            logger.debug(
                "Felzenszwalb Training " +
                "(step {}/{}): {} hard negative samples, {}".format(
                    k+1, nTrainingSteps, len(S_t[0]), len(S_t[1])) +
                "hard positive samples.")
            self.fit(S_t[0], S_t[1], method=method)

            pool = RequestPool()

            for i in range(len(S_t)):
                req = Request(partial(partFun, i))
                pool.add(req)

            pool.wait()
            pool.clean()

            if np.all(finished):
                #already have all hard examples in training set
                break

        self.fit(S_t[0], S_t[1], method=method)

        logger.debug(" Finished Felzenszwalb Training.")
    def execute(self, slot, subindex, roi, result):
        assert slot in [
            self.Predictions, self.Probabilities, self.CachedProbabilities,
            self.ProbabilityChannels, self.BadObjects
        ]

        times = roi._l
        if len(times) == 0:
            # we assume that 0-length requests are requesting everything
            times = range(self.Predictions.meta.shape[0])

        if slot is self.CachedProbabilities:
            return {
                t: self.prob_cache[t]
                for t in times if t in self.prob_cache
            }

        forests = self.inputs["Classifier"][:].wait()
        if forests is None or forests[0] is None:
            # this happens if there was no data to train with
            return dict((t, numpy.array([])) for t in times)

        feats = {}
        prob_predictions = {}

        selected = self.SelectedFeatures([]).wait()

        # FIXME: self.prob_cache is shared, so we need to block.
        # However, this makes prediction single-threaded.
        self.lock.acquire()
        try:
            for t in times:
                if t in self.prob_cache:
                    continue

                tmpfeats = self.Features([t]).wait()
                ftmatrix, _, col_names = make_feature_array(tmpfeats, selected)
                rows, cols = replace_missing(ftmatrix)
                self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0], ))
                self.bad_objects[t][rows] = 1
                feats[t] = ftmatrix
                prob_predictions[t] = [0] * len(forests)

            def predict_forest(_t, forest_index):
                # Note: We can't use RandomForest.predictLabels() here because we're training in parallel,
                #        and we have to average the PROBABILITIES from all forests.
                #       Averaging the label predictions from each forest is NOT equivalent.
                #       For details please see wikipedia:
                #       http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote
                #       (^-^)
                prob_predictions[_t][forest_index] = forests[
                    forest_index].predictProbabilities(feats[_t].astype(
                        numpy.float32))

            # predict the data with all the forests in parallel
            pool = RequestPool()
            for t in times:
                if t in self.prob_cache:
                    continue
                for i, f in enumerate(forests):
                    req = Request(partial(predict_forest, t, i))
                    pool.add(req)

            pool.wait()
            pool.clean()

            for t in times:
                if t not in self.prob_cache:
                    # prob_predictions is a dict-of-lists-of-arrays, indexed as follows:
                    # prob_predictions[t][forest_index][object_index, class_index]

                    # Stack the forests together and average them.
                    stacked_predictions = numpy.array(prob_predictions[t])
                    averaged_predictions = numpy.average(stacked_predictions,
                                                         axis=0)
                    assert averaged_predictions.shape[0] == len(feats[t])
                    self.prob_cache[t] = averaged_predictions

                    self.prob_cache[t][
                        0] = 0  # Background probability is always zero

            if slot == self.Probabilities:
                return {t: self.prob_cache[t] for t in times}
            elif slot == self.Predictions:
                # FIXME: Support SegmentationThreshold again...
                labels = dict()
                for t in times:
                    prob_sum = numpy.sum(self.prob_cache[t], axis=1)
                    labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1)
                    labels[t][0] = 0  # Background gets the zero label

                return labels

            elif slot == self.ProbabilityChannels:
                try:
                    prob_single_channel = {
                        t: self.prob_cache[t][:, subindex[0]]
                        for t in times
                    }
                except:
                    # no probabilities available for this class; return zeros
                    prob_single_channel = {
                        t: numpy.zeros((self.prob_cache[t].shape[0], 1))
                        for t in times
                    }
                return prob_single_channel

            elif slot == self.BadObjects:
                return {t: self.bad_objects[t] for t in times}

            else:
                assert False, "Unknown input slot"
        finally:
            self.lock.release()
    def execute(self, slot, subindex, slot_roi, target):
        assert slot == self.Features or slot == self.Output
        if slot == self.Features:
            feature_slice = roiToSlice(slot_roi.start, slot_roi.stop)
            index = subindex[0]
            feature_slice = list(feature_slice)

            # Translate channel slice of this feature to the channel slice of the output slot.
            output_channel_offset = self.featureOutputChannels[index][0]
            feature_slice[1] = slice(
                output_channel_offset + feature_slice[1].start,
                output_channel_offset + feature_slice[1].stop)
            slot_roi = SubRegion(self.Output, pslice=feature_slice)

            # Get output slot region for this channel
            return self.execute(self.Output, (), slot_roi, target)
        elif slot == self.Output:
            # Correlation of variable 'families' representing reference frames:
            #  ______________________________
            # | input/output frame           |  input/output shape given by slots
            # |  _________________________   |
            # | | smooth frame            |  |  pre-smoothing op needs halo around filter roi
            # | |  ____________________   |  |
            # | | |filter frame        |  |  |  filter needs halo around target roi
            # | | |  _______________   |  |  |
            # | | | | target frame  |  |  |  |  target is given by output_roi

            # note: The 'full_' variable prefix refers to the full 5D shape (tczyx), without 'full_' variables mostly
            #       refer to the 3D space subregion (zyx)

            full_output_slice = slot_roi.toSlice()

            logger.debug(
                f"OpPixelFeaturesPresmoothed: request {slot_roi.pprint()}")

            assert (slot_roi.stop <= self.Output.meta.shape).all()

            full_output_shape = self.Output.meta.shape
            full_output_start, full_output_stop = sliceToRoi(
                full_output_slice, full_output_shape)
            assert len(full_output_shape) == 5
            if all(self.ComputeIn2d.value
                   ):  # todo: check for this particular slice
                axes2enlarge = (0, 1, 1)
            else:
                axes2enlarge = (1, 1, 1)

            output_shape = full_output_shape[2:]
            output_start = full_output_start[2:]
            output_stop = full_output_stop[2:]

            axistags = self.Output.meta.axistags
            target = target.view(vigra.VigraArray)
            target.axistags = copy.copy(axistags)

            # filter roi in input frame
            # sigma = 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood
            input_filter_start, input_filter_stop = roi.enlargeRoiForHalo(
                output_start,
                output_stop,
                output_shape,
                0.7,
                self.WINDOW_SIZE,
                enlarge_axes=axes2enlarge)

            # smooth roi in input frame
            input_smooth_start, input_smooth_stop = roi.enlargeRoiForHalo(
                input_filter_start,
                input_filter_stop,
                output_shape,
                self.max_sigma,
                self.WINDOW_SIZE,
                enlarge_axes=axes2enlarge,
            )

            # target roi in filter frame
            filter_target_start = roi.TinyVector(output_start -
                                                 input_filter_start)
            filter_target_stop = roi.TinyVector(output_stop -
                                                input_filter_start)

            # filter roi in smooth frame
            smooth_filter_start = roi.TinyVector(input_filter_start -
                                                 input_smooth_start)
            smooth_filter_stop = roi.TinyVector(input_filter_stop -
                                                input_smooth_start)

            filter_target_slice = roi.roiToSlice(filter_target_start,
                                                 filter_target_stop)
            input_smooth_slice = roi.roiToSlice(input_smooth_start,
                                                input_smooth_stop)

            # pre-smooth for all requested time slices and all channels
            full_input_smooth_slice = (full_output_slice[0], slice(None),
                                       *input_smooth_slice)
            req = self.Input[full_input_smooth_slice]
            source = req.wait()
            req.clean()
            req.destination = None
            if source.dtype != numpy.float32:
                sourceF = source.astype(numpy.float32)
                try:
                    source.resize((1, ), refcheck=False)
                except Exception:
                    pass
                del source
                source = sourceF

            sourceV = source.view(vigra.VigraArray)
            sourceV.axistags = copy.copy(self.Input.meta.axistags)

            dimCol = len(self.scales)
            dimRow = self.matrix.shape[0]

            presmoothed_source = [None] * dimCol

            source_smooth_shape = tuple(smooth_filter_stop -
                                        smooth_filter_start)
            full_source_smooth_shape = (
                full_output_stop[0] - full_output_start[0],
                self.Input.meta.shape[1],
            ) + source_smooth_shape
            try:
                for j in range(dimCol):
                    for i in range(dimRow):
                        if self.matrix[i, j]:
                            # There is at least one filter op with this scale
                            break
                    else:
                        # There is no filter op at this scale
                        continue

                    if self.scales[j] > 1.0:
                        tempSigma = math.sqrt(self.scales[j]**2 - 1.0)
                    else:
                        tempSigma = self.scales[j]

                    presmoothed_source[j] = numpy.ndarray(
                        full_source_smooth_shape, numpy.float32)

                    droi = (
                        (0, *tuple(smooth_filter_start._asint())),
                        (sourceV.shape[1],
                         *tuple(smooth_filter_stop._asint())),
                    )
                    for i, vsa in enumerate(sourceV.timeIter()):
                        presmoothed_source[j][
                            i, ...] = self._computeGaussianSmoothing(
                                vsa,
                                tempSigma,
                                droi,
                                in2d=self.ComputeIn2d.value[j])

            except RuntimeError as e:
                if "kernel longer than line" in str(e):
                    raise RuntimeError(
                        "Feature computation error:\nYour image is too small to apply a filter with "
                        f"sigma={self.scales[j]:.1f}. Please select features with smaller sigmas."
                    )
                else:
                    raise e

            del sourceV
            try:
                source.resize((1, ), refcheck=False)
            except ValueError:
                # Sometimes this fails, but that's okay.
                logger.debug("Failed to free array memory.")
            del source

            cnt = 0
            written = 0
            closures = []
            # connect individual operators
            for i in range(dimRow):
                for j in range(dimCol):
                    if self.matrix[i, j]:
                        oslot = self.featureOps[i][j].Output
                        req = None
                        slices = oslot.meta.shape[1]
                        if (cnt + slices >= slot_roi.start[1]
                                and slot_roi.start[1] - cnt < slices
                                and slot_roi.start[1] + written <
                                slot_roi.stop[1]):
                            begin = 0
                            if cnt < slot_roi.start[1]:
                                begin = slot_roi.start[1] - cnt
                            end = slices
                            if cnt + end > slot_roi.stop[1]:
                                end = slot_roi.stop[1] - cnt

                            # feature slice in output frame
                            feature_slice = (slice(None),
                                             slice(
                                                 written, written + end -
                                                 begin)) + (slice(None), ) * 3

                            subtarget = target[feature_slice]
                            # readjust the roi for the new source array
                            full_filter_target_slice = [
                                full_output_slice[0],
                                slice(begin, end), *filter_target_slice
                            ]
                            filter_target_roi = SubRegion(
                                oslot, pslice=full_filter_target_slice)

                            closure = partial(
                                oslot.operator.execute,
                                oslot,
                                (),
                                filter_target_roi,
                                subtarget,
                                sourceArray=presmoothed_source[j],
                            )
                            closures.append(closure)

                            written += end - begin
                        cnt += slices
            pool = RequestPool()
            for c in closures:
                pool.request(c)
            pool.wait()
            pool.clean()

            for i in range(len(presmoothed_source)):
                if presmoothed_source[i] is not None:
                    try:
                        presmoothed_source[i].resize((1, ))
                    except Exception:
                        presmoothed_source[i] = None
示例#26
0
    def execute(self, slot, subindex, roi, result):
        featMatrix = []
        labelsMatrix = []

        for i in range(len(self.Labels)):
            feats = self.Features[i]([]).wait()

            # TODO: we should be able to use self.Labels[i].value,
            # but the current implementation of Slot.value() does not
            # do the right thing.
            labels = self.Labels[i]([]).wait()

            for t in sorted(feats.keys()):
                featsMatrix_tmp = []
                labelsMatrix_tmp = []
                lab = labels[t].squeeze()
                index = numpy.nonzero(lab)
                labelsMatrix_tmp.append(lab[index])

                #check that all requested features are present
                for featname in config.selected_features:
                    for channel in feats[t]:
                        if not featname in channel.keys():
                            print "Feature", featname, "has not been computed in the previous step"
                            print "We only have the following features now:", channel.keys()
                            result[:] = None
                            return
                        else:
                            value = channel[featname]
                            ft = numpy.asarray(value.squeeze())
                            featsMatrix_tmp.append(ft[index])
                
                featMatrix.append(_concatenate(featsMatrix_tmp, axis=1))
                labelsMatrix.append(_concatenate(labelsMatrix_tmp, axis=1))

        featMatrix = _concatenate(featMatrix, axis=0)
        labelsMatrix = _concatenate(labelsMatrix, axis=0)
        print "training on matrix:", featMatrix.shape
        
        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            result[:] = None
            return
        oob = [0]*self.ForestCount.value
        try:
            # Ensure there are no NaNs in the feature matrix
            # TODO: There should probably be a better way to fix this...
            featMatrix = numpy.asarray(featMatrix, dtype=numpy.float32)
            nanFeatMatrix = numpy.isnan(featMatrix)
            if nanFeatMatrix.any():
                warnings.warn("Feature matrix has NaN values!  Replacing with 0.0...")
                featMatrix[numpy.where(nanFeatMatrix)] = 0.0
            # train and store forests in parallel
            pool = RequestPool()
            for i in range(self.ForestCount.value):
                def train_and_store(number):
                    result[number] = vigra.learning.RandomForest(self._tree_count)
                    oob[number] = result[number].learnRF(featMatrix, numpy.asarray(labelsMatrix, dtype=numpy.uint32))
                    print "intermediate oob:", oob[number]
                req = Request( partial(train_and_store, i) )
                pool.add( req )
            pool.wait()
            pool.clean()
        except:
            print ("couldn't learn classifier")
            raise
        oob_total = numpy.mean(oob)
        print "training finished, out of bag error:", oob_total
        return result
示例#27
0
    def _executeOutput(self, slot, subindex, roi, result):
        t = time.time()
        key = roi.toSlice()

        shape = self.Output.meta.shape
        start, stop = sliceToRoi(key, shape)

        self._lock.acquire()

        ch = self._cacheHits
        ch += 1
        self._cacheHits = ch

        self._running += 1

        if self._cache is None:
            self._allocateCache()

        cacheView = self._cache[:]  #prevent freeing of cache during running this function

        blockStart = (1.0 * start / self._blockShape).floor()
        blockStop = (1.0 * stop / self._blockShape).ceil()
        blockKey = roiToSlice(blockStart, blockStop)

        blockSet = self._blockState[blockKey]

        # this is a little optimization to shortcut
        # many lines of python code when all data is
        # is already in the cache:
        if numpy.logical_or(blockSet == OpArrayCache.CLEAN,
                            blockSet == OpArrayCache.FIXED_DIRTY).all():
            result[:] = self._cache[roiToSlice(start, stop)]
            self._running -= 1
            self._updatePriority()
            cacheView = None
            self._lock.release()
            return

        inProcessQueries = numpy.unique(
            numpy.extract(blockSet == OpArrayCache.IN_PROCESS,
                          self._blockQuery[blockKey]))

        cond = (blockSet == OpArrayCache.DIRTY)
        tileWeights = fastWhere(cond, 1, 128**3, numpy.uint32)
        trueDirtyIndices = numpy.nonzero(cond)

        tileArray = drtile.test_DRTILE(tileWeights, 128**3).swapaxes(0, 1)

        dirtyRois = []
        half = tileArray.shape[0] / 2
        dirtyPool = RequestPool()

        for i in range(tileArray.shape[1]):

            drStart3 = tileArray[:half, i]
            drStop3 = tileArray[half:, i]
            drStart2 = drStart3 + blockStart
            drStop2 = drStop3 + blockStart
            drStart = drStart2 * self._blockShape
            drStop = drStop2 * self._blockShape

            shape = self.Output.meta.shape
            drStop = numpy.minimum(drStop, shape)
            drStart = numpy.minimum(drStart, shape)

            key3 = roiToSlice(drStart3, drStop3)
            key2 = roiToSlice(drStart2, drStop2)

            key = roiToSlice(drStart, drStop)

            if not self._fixed:
                dirtyRois.append([drStart, drStop])

                req = self.inputs["Input"][key].writeInto(self._cache[key])

                req.uncancellable = True  #FIXME

                dirtyPool.add(req)

                self._blockQuery[key2] = weakref.ref(req)

                #sanity check:
                if (self._blockState[key2] != OpArrayCache.DIRTY).any():
                    logger.warning("original condition" + str(cond))
                    logger.warning("original tilearray {} {}".format(
                        tileArray, tileArray.shape))
                    logger.warning("original tileWeights {} {}".format(
                        tileWeights, tileWeights.shape))
                    logger.warning("sub condition {}".format(
                        self._blockState[key2] == OpArrayCache.DIRTY))
                    logger.warning("START={}, STOP={}".format(
                        drStart2, drStop2))
                    import h5py
                    with h5py.File("test.h5", "w") as f:
                        f.create_dataset("data", data=tileWeights)
                        logger.warning(
                            "%r \n %r \n %r\n %r\n %r \n%r" %
                            (key2, blockKey, self._blockState[key2],
                             self._blockState[blockKey][trueDirtyIndices],
                             self._blockState[blockKey], tileWeights))
                    assert False
                self._blockState[key2] = OpArrayCache.IN_PROCESS

        # indicate the inprocessing state, by setting array to 0 (i.e. IN_PROCESS)
        if not self._fixed:
            blockSet[:] = fastWhere(cond, OpArrayCache.IN_PROCESS, blockSet,
                                    numpy.uint8)
        else:
            # Someone asked for some dirty blocks while we were fixed.
            # Mark these blocks to be signaled as dirty when we become unfixed
            blockSet[:] = fastWhere(cond, OpArrayCache.FIXED_DIRTY, blockSet,
                                    numpy.uint8)
            self._has_fixed_dirty_blocks = True
        self._lock.release()

        temp = itertools.count(0)

        #wait for all requests to finish
        dirtyPool.wait()
        if len(dirtyPool) > 0:
            # Signal that something was updated.
            # Note that we don't need to do this for the 'in process' queries (below)
            #  because they are already in the dirtyPool in some other thread
            self.Output._sig_value_changed()
        dirtyPool.clean()

        # indicate the finished inprocess state (i.e. CLEAN)
        if not self._fixed and temp.next() == 0:
            with self._lock:
                blockSet[:] = fastWhere(cond, OpArrayCache.CLEAN, blockSet,
                                        numpy.uint8)
                self._blockQuery[blockKey] = fastWhere(
                    cond, None, self._blockQuery[blockKey], object)

        inProcessPool = RequestPool()
        #wait for all in process queries
        for req in inProcessQueries:
            req = req()  # get original req object from weakref
            if req is not None:
                inProcessPool.add(req)

        inProcessPool.wait()
        inProcessPool.clean()

        # finally, store results in result area
        self._lock.acquire()
        if self._cache is not None:
            result[:] = self._cache[roiToSlice(start, stop)]
        else:
            self.inputs["Input"][roiToSlice(start,
                                            stop)].writeInto(result).wait()
        self._running -= 1
        self._updatePriority()
        cacheView = None

        self._lock.release()
        self.logger.debug("read %s took %f sec." %
                          (roi.pprint(), time.time() - t))
    def execute(self, slot, subindex, roi, result):
        featList = []
        all_col_names = []
        labelsList = []

        # will be available at slot self.Warnings
        all_bad_objects = defaultdict(lambda: defaultdict(list))
        all_bad_feats = set()

        selected = self.SelectedFeatures([]).wait()
        if len(selected) == 0:
            # no features - no predictions
            self.Classifier.setValue(None)
            return

        for i in range(len(self.Labels)):
            # FIXME: we should only compute the features if there are nonzero labels in this image
            feats = self.Features[i]([]).wait()

            # TODO: we should be able to use self.Labels[i].value,
            # but the current implementation of Slot.value() does not
            # do the right thing.
            labels = self.Labels[i]([]).wait()

            featstmp, row_names, col_names, labelstmp = make_feature_array(
                feats, selected, labels)
            if labelstmp.size == 0 or featstmp.size == 0:
                continue

            rows, cols = replace_missing(featstmp)

            featList.append(featstmp)
            all_col_names.append(tuple(col_names))
            labelsList.append(labelstmp)

            for idx in rows:
                t, obj = row_names[idx]
                all_bad_objects[i][t].append(obj)

            for c in cols:
                all_bad_feats.add(col_names[c])

        if len(labelsList) == 0:
            #no labels, return here
            self.Classifier.setValue(None)
            return

        self._warnBadObjects(all_bad_objects, all_bad_feats)

        if not len(set(all_col_names)) == 1:
            raise Exception(
                'different time slices did not have same features.')

        featMatrix = _concatenate(featList, axis=0)
        labelsMatrix = _concatenate(labelsList, axis=0)

        logger.info("training on matrix of shape {}".format(featMatrix.shape))

        if featMatrix.size == 0 or labelsMatrix.size == 0:
            result[:] = None
            return
        oob = [0] * self.ForestCount.value
        try:
            # train and store forests in parallel
            pool = RequestPool()
            for i in range(self.ForestCount.value):

                def train_and_store(number):
                    result[number] = vigra.learning.RandomForest(
                        self._tree_count)
                    oob[number] = result[number].learnRF(
                        featMatrix.astype(numpy.float32),
                        numpy.asarray(labelsMatrix, dtype=numpy.uint32))

                req = Request(partial(train_and_store, i))
                pool.add(req)
            pool.wait()
            pool.clean()
        except:
            logger.warn("couldn't learn classifier")
            raise
        oob_total = numpy.mean(oob)
        logger.info(
            "training finished, out of bag error: {}".format(oob_total))
        return result
示例#29
0
    def _executeOutput(self, slot, subindex, roi, result):
        t = time.time()
        key = roi.toSlice()

        shape = self.Output.meta.shape
        start, stop = sliceToRoi(key, shape)

        with self._lock:
            ch = self._cacheHits
            ch += 1
            self._cacheHits = ch
    
            self._running += 1
    
            if self._cache is None:
                self._allocateCache()
    
            cacheView = self._cache[:] #prevent freeing of cache during running this function
    
    
            blockStart = (1.0 * start / self._blockShape).floor()
            blockStop = (1.0 * stop / self._blockShape).ceil()
            blockKey = roiToSlice(blockStart,blockStop)
    
            blockSet = self._blockState[blockKey]
    
            # this is a little optimization to shortcut
            # many lines of python code when all data is
            # is already in the cache:
            if numpy.logical_or(blockSet == OpArrayCache.CLEAN, blockSet == OpArrayCache.FIXED_DIRTY).all():
                result[:] = self._cache[roiToSlice(start, stop)]
                self._running -= 1
                self._updatePriority()
                cacheView = None
                return
    
            inProcessQueries = numpy.unique(numpy.extract( blockSet == OpArrayCache.IN_PROCESS, self._blockQuery[blockKey]))
    
            cond = (blockSet == OpArrayCache.DIRTY)
            tileWeights = fastWhere(cond, 1, 128**3, numpy.uint32)
            trueDirtyIndices = numpy.nonzero(cond)
    
            tileArray = drtile.test_DRTILE(tileWeights, 128**3).swapaxes(0,1)
    
            dirtyRois = []
            half = tileArray.shape[0]/2
            dirtyPool = RequestPool()
    
            for i in range(tileArray.shape[1]):
    
                drStart3 = tileArray[:half,i]
                drStop3 = tileArray[half:,i]
                drStart2 = drStart3 + blockStart
                drStop2 = drStop3 + blockStart
                drStart = drStart2*self._blockShape
                drStop = drStop2*self._blockShape
    
                shape = self.Output.meta.shape
                drStop = numpy.minimum(drStop, shape)
                drStart = numpy.minimum(drStart, shape)
    
                key3 = roiToSlice(drStart3,drStop3)
                key2 = roiToSlice(drStart2,drStop2)
    
                key = roiToSlice(drStart,drStop)
    
                if not self._fixed:
                    dirtyRois.append([drStart,drStop])
    
                    req = self.inputs["Input"][key].writeInto(self._cache[key])
    
                    req.uncancellable = True #FIXME
                    
                    dirtyPool.add(req)
    
                    self._blockQuery[key2] = weakref.ref(req)
    
                    #sanity check:
                    if (self._blockState[key2] != OpArrayCache.DIRTY).any():
                        logger.warning( "original condition" + str(cond) )
                        logger.warning( "original tilearray {} {}".format( tileArray, tileArray.shape ) )
                        logger.warning( "original tileWeights {} {}".format( tileWeights, tileWeights.shape ) )
                        logger.warning( "sub condition {}".format( self._blockState[key2] == OpArrayCache.DIRTY ) )
                        logger.warning( "START={}, STOP={}".format( drStart2, drStop2 ) )
                        import h5py
                        with h5py.File("test.h5", "w") as f:
                            f.create_dataset("data",data = tileWeights)
                            logger.warning( "%r \n %r \n %r\n %r\n %r \n%r" % (key2, blockKey,self._blockState[key2], self._blockState[blockKey][trueDirtyIndices],self._blockState[blockKey],tileWeights) )
                        assert False
                    self._blockState[key2] = OpArrayCache.IN_PROCESS
    
            # indicate the inprocessing state, by setting array to 0 (i.e. IN_PROCESS)
            if not self._fixed:
                blockSet[:]  = fastWhere(cond, OpArrayCache.IN_PROCESS, blockSet, numpy.uint8)
            else:
                # Someone asked for some dirty blocks while we were fixed.
                # Mark these blocks to be signaled as dirty when we become unfixed
                blockSet[:]  = fastWhere(cond, OpArrayCache.FIXED_DIRTY, blockSet, numpy.uint8)
                self._has_fixed_dirty_blocks = True

        temp = itertools.count(0)

        #wait for all requests to finish
        dirtyPool.wait()
        if len( dirtyPool ) > 0:
            # Signal that something was updated.
            # Note that we don't need to do this for the 'in process' queries (below)  
            #  because they are already in the dirtyPool in some other thread
            self.Output._sig_value_changed()
        dirtyPool.clean()

        # indicate the finished inprocess state (i.e. CLEAN)
        if not self._fixed and temp.next() == 0:
            with self._lock:
                blockSet[:] = fastWhere(cond, OpArrayCache.CLEAN, blockSet, numpy.uint8)
                self._blockQuery[blockKey] = fastWhere(cond, None, self._blockQuery[blockKey], object)

        inProcessPool = RequestPool()
        #wait for all in process queries
        for req in inProcessQueries:
            req = req() # get original req object from weakref
            if req is not None:
                inProcessPool.add(req) 

        inProcessPool.wait()
        inProcessPool.clean()

        # finally, store results in result area
        with self._lock:
            if self._cache is not None:
                result[:] = self._cache[roiToSlice(start, stop)]
            else:
                self.inputs["Input"][roiToSlice(start, stop)].writeInto(result).wait()
            self._running -= 1
            self._updatePriority()
            cacheView = None
        self.logger.debug("read %s took %f sec." % (roi.pprint(), time.time()-t))
示例#30
0
    def execute(self, slot, subindex, roi, result):
        progress = 0
        self.progressSignal(progress)
        numImages = len(self.Images)

        key = roi.toSlice()
        featMatrix=[]
        labelsMatrix=[]
        for i,labels in enumerate(self.inputs["Labels"]):
            if labels.meta.shape is not None:
                #labels=labels[:].wait()
                blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait()

                progress += 10/numImages
                self.progressSignal(progress)

                reqlistlabels = []
                reqlistfeat = []
                traceLogger.debug("Sending requests for {} non-zero blocks (labels and data)".format( len(blocks[0])) )
                for b in blocks[0]:

                    request = labels[b]
                    featurekey = list(b)
                    featurekey[-1] = slice(None, None, None)
                    request2 = self.inputs["Images"][i][featurekey]

                    reqlistlabels.append(request)
                    reqlistfeat.append(request2)

                traceLogger.debug("Requests prepared")

                numLabelBlocks = len(reqlistlabels)
                progress_outer = [progress] # Store in list for closure access
                if numLabelBlocks > 0:
                    progressInc = (80-10)/numLabelBlocks/numImages

                def progressNotify(req):
                    # Note: If we wanted perfect progress reporting, we could use lock here
                    #       to protect the progress from being incremented simultaneously.
                    #       But that would slow things down and imperfect reporting is okay for our purposes.
                    progress_outer[0] += progressInc/2
                    self.progressSignal(progress_outer[0])

                for ir, req in enumerate(reqlistfeat):
                    image = req.notify_finished(progressNotify)

                for ir, req in enumerate(reqlistlabels):
                    labblock = req.notify_finished(progressNotify)

                traceLogger.debug("Requests fired")

                for ir, req in enumerate(reqlistlabels):
                    traceLogger.debug("Waiting for a label block...")
                    labblock = req.wait()

                    traceLogger.debug("Waiting for an image block...")
                    image = reqlistfeat[ir].wait()

                    indexes=numpy.nonzero(labblock[...,0].view(numpy.ndarray))
                    features=image[indexes]
                    labbla=labblock[indexes]

                    featMatrix.append(features)
                    labelsMatrix.append(labbla)

                progress = progress_outer[0]

                traceLogger.debug("Requests processed")

        self.progressSignal(80/numImages)

        if len(featMatrix) == 0 or len(labelsMatrix) == 0:
            # If there was no actual data for the random forest to train with, we return None
            result[:] = None
        else:
            featMatrix=numpy.concatenate(featMatrix,axis=0)
            labelsMatrix=numpy.concatenate(labelsMatrix,axis=0)
            maxLabel = self.inputs["MaxLabel"].value
            labelList = range(1, maxLabel+1) if maxLabel > 0 else list()

            try:
                logger.debug("Learning with Vigra...")
                # train and store self._forest_count forests in parallel
                pool = RequestPool()

                for i in range(self._forest_count):
                    def train_and_store(number):
                        result[number] = vigra.learning.RandomForest(self._tree_count, labels=labelList)
                        result[number].learnRF( numpy.asarray(featMatrix, dtype=numpy.float32),
                                                numpy.asarray(labelsMatrix, dtype=numpy.uint32))
                    req = pool.request(partial(train_and_store, i))

                pool.wait()
                pool.clean()

                logger.debug("Vigra finished")
            except:
                logger.error( "ERROR: could not learn classifier" )
                logger.error( "featMatrix shape={}, max={}, dtype={}".format(featMatrix.shape, featMatrix.max(), featMatrix.dtype) )
                logger.error( "labelsMatrix shape={}, max={}, dtype={}".format(labelsMatrix.shape, labelsMatrix.max(), labelsMatrix.dtype ) )
                raise
            finally:
                self.progressSignal(100)

        return result
    def execute(self, slot, subindex, slot_roi, target):
        assert slot == self.Features or slot == self.Output
        if slot == self.Features:
            feature_slice = roiToSlice(slot_roi.start, slot_roi.stop)
            index = subindex[0]
            feature_slice = list(feature_slice)

            # Translate channel slice of this feature to the channel slice of the output slot.
            output_channel_offset = self.featureOutputChannels[index][0]
            feature_slice[1] = slice(
                output_channel_offset + feature_slice[1].start, output_channel_offset + feature_slice[1].stop
            )
            slot_roi = SubRegion(self.Output, pslice=feature_slice)

            # Get output slot region for this channel
            return self.execute(self.Output, (), slot_roi, target)
        elif slot == self.Output:
            # Correlation of variable 'families' representing reference frames:
            #  ______________________________
            # | input/output frame           |  input/output shape given by slots
            # |  _________________________   |
            # | | smooth frame            |  |  pre-smoothing op needs halo around filter roi
            # | |  ____________________   |  |
            # | | |filter frame        |  |  |  filter needs halo around target roi
            # | | |  _______________   |  |  |
            # | | | | target frame  |  |  |  |  target is given by output_roi

            # note: The 'full_' variable prefix refers to the full 5D shape (tczyx), without 'full_' variables mostly
            #       refer to the 3D space subregion (zyx)

            full_output_slice = slot_roi.toSlice()

            logger.debug(f"OpPixelFeaturesPresmoothed: request {slot_roi.pprint()}")

            assert (slot_roi.stop <= self.Output.meta.shape).all()

            full_output_shape = self.Output.meta.shape
            full_output_start, full_output_stop = sliceToRoi(full_output_slice, full_output_shape)
            assert len(full_output_shape) == 5
            if all(self.ComputeIn2d.value):  # todo: check for this particular slice
                axes2enlarge = (0, 1, 1)
            else:
                axes2enlarge = (1, 1, 1)

            output_shape = full_output_shape[2:]
            output_start = full_output_start[2:]
            output_stop = full_output_stop[2:]

            axistags = self.Output.meta.axistags
            target = target.view(vigra.VigraArray)
            target.axistags = copy.copy(axistags)

            # filter roi in input frame
            # sigma = 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood
            input_filter_start, input_filter_stop = roi.enlargeRoiForHalo(
                output_start, output_stop, output_shape, 0.7, self.WINDOW_SIZE, enlarge_axes=axes2enlarge
            )

            # smooth roi in input frame
            input_smooth_start, input_smooth_stop = roi.enlargeRoiForHalo(
                input_filter_start,
                input_filter_stop,
                output_shape,
                self.max_sigma,
                self.WINDOW_SIZE,
                enlarge_axes=axes2enlarge,
            )

            # target roi in filter frame
            filter_target_start = roi.TinyVector(output_start - input_filter_start)
            filter_target_stop = roi.TinyVector(output_stop - input_filter_start)

            # filter roi in smooth frame
            smooth_filter_start = roi.TinyVector(input_filter_start - input_smooth_start)
            smooth_filter_stop = roi.TinyVector(input_filter_stop - input_smooth_start)

            filter_target_slice = roi.roiToSlice(filter_target_start, filter_target_stop)
            input_smooth_slice = roi.roiToSlice(input_smooth_start, input_smooth_stop)

            # pre-smooth for all requested time slices and all channels
            full_input_smooth_slice = (full_output_slice[0], slice(None), *input_smooth_slice)
            req = self.Input[full_input_smooth_slice]
            source = req.wait()
            req.clean()
            req.destination = None
            if source.dtype != numpy.float32:
                sourceF = source.astype(numpy.float32)
                try:
                    source.resize((1,), refcheck=False)
                except Exception:
                    pass
                del source
                source = sourceF

            sourceV = source.view(vigra.VigraArray)
            sourceV.axistags = copy.copy(self.Input.meta.axistags)

            dimCol = len(self.scales)
            dimRow = self.matrix.shape[0]

            presmoothed_source = [None] * dimCol

            source_smooth_shape = tuple(smooth_filter_stop - smooth_filter_start)
            full_source_smooth_shape = (
                full_output_stop[0] - full_output_start[0],
                self.Input.meta.shape[1],
            ) + source_smooth_shape
            try:
                for j in range(dimCol):
                    for i in range(dimRow):
                        if self.matrix[i, j]:
                            # There is at least one filter op with this scale
                            break
                    else:
                        # There is no filter op at this scale
                        continue

                    if self.scales[j] > 1.0:
                        tempSigma = math.sqrt(self.scales[j] ** 2 - 1.0)
                    else:
                        tempSigma = self.scales[j]

                    presmoothed_source[j] = numpy.ndarray(full_source_smooth_shape, numpy.float32)

                    droi = (
                        (0, *tuple(smooth_filter_start._asint())),
                        (sourceV.shape[1], *tuple(smooth_filter_stop._asint())),
                    )
                    for i, vsa in enumerate(sourceV.timeIter()):
                        presmoothed_source[j][i, ...] = self._computeGaussianSmoothing(
                            vsa, tempSigma, droi, in2d=self.ComputeIn2d.value[j]
                        )

            except RuntimeError as e:
                if "kernel longer than line" in str(e):
                    raise RuntimeError(
                        "Feature computation error:\nYour image is too small to apply a filter with "
                        f"sigma={self.scales[j]:.1f}. Please select features with smaller sigmas."
                    )
                else:
                    raise e

            del sourceV
            try:
                source.resize((1,), refcheck=False)
            except ValueError:
                # Sometimes this fails, but that's okay.
                logger.debug("Failed to free array memory.")
            del source

            cnt = 0
            written = 0
            closures = []
            # connect individual operators
            for i in range(dimRow):
                for j in range(dimCol):
                    if self.matrix[i, j]:
                        oslot = self.featureOps[i][j].Output
                        req = None
                        slices = oslot.meta.shape[1]
                        if (
                            cnt + slices >= slot_roi.start[1]
                            and slot_roi.start[1] - cnt < slices
                            and slot_roi.start[1] + written < slot_roi.stop[1]
                        ):
                            begin = 0
                            if cnt < slot_roi.start[1]:
                                begin = slot_roi.start[1] - cnt
                            end = slices
                            if cnt + end > slot_roi.stop[1]:
                                end = slot_roi.stop[1] - cnt

                            # feature slice in output frame
                            feature_slice = (slice(None), slice(written, written + end - begin)) + (slice(None),) * 3

                            subtarget = target[feature_slice]
                            # readjust the roi for the new source array
                            full_filter_target_slice = [full_output_slice[0], slice(begin, end), *filter_target_slice]
                            filter_target_roi = SubRegion(oslot, pslice=full_filter_target_slice)

                            closure = partial(
                                oslot.operator.execute,
                                oslot,
                                (),
                                filter_target_roi,
                                subtarget,
                                sourceArray=presmoothed_source[j],
                            )
                            closures.append(closure)

                            written += end - begin
                        cnt += slices
            pool = RequestPool()
            for c in closures:
                pool.request(c)
            pool.wait()
            pool.clean()

            for i in range(len(presmoothed_source)):
                if presmoothed_source[i] is not None:
                    try:
                        presmoothed_source[i].resize((1,))
                    except Exception:
                        presmoothed_source[i] = None
    def execute(self, slot, subindex, roi, result):
        assert slot == self.Predictions or slot == self.Probabilities or slot == self.ProbabilityChannels

        times = roi._l
        if len(times) == 0:
            # we assume that 0-length requests are requesting everything
            times = range(self.Predictions.meta.shape[0])

        forests=self.inputs["Classifier"][:].wait()
        if forests is None or forests[0] is None:
            # this happens if there was no data to train with
            return dict((t, numpy.array([])) for t in times)

        feats = {}
        prob_predictions = {}
        for t in times:
            if t in self.prob_cache:
                continue

            tmpfeats = self.Features([t]).wait()
            feats[t] = make_feature_array(tmpfeats)
            prob_predictions[t] = [0] * len(forests)

        def predict_forest(_t, forest_index):
            # Note: We can't use RandomForest.predictLabels() here because we're training in parallel,
            #        and we have to average the PROBABILITIES from all forests.
            #       Averaging the label predictions from each forest is NOT equivalent.
            #       For details please see wikipedia:
            #       http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote
            #       (^-^)
            prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t])

        # predict the data with all the forests in parallel
        pool = RequestPool()
        for t in times:
            if t in self.prob_cache:
                continue
            for i, f in enumerate(forests):
                req = Request( partial(predict_forest, t, i) )
                pool.add(req)

        pool.wait()
        pool.clean()

        for t in times:
            if t not in self.prob_cache:
                # prob_predictions is a dict-of-lists-of-arrays, indexed as follows:
                # prob_predictions[t][forest_index][object_index, class_index]

                # Stack the forests together and average them.
                stacked_predictions = numpy.array( prob_predictions[t] )
                averaged_predictions = numpy.average( stacked_predictions, axis=0 )
                assert averaged_predictions.shape[0] == len(feats[t])
                self.prob_cache[t] = averaged_predictions
                self.prob_cache[t][0] = 0 # Background probability is always zero

        if slot == self.Probabilities:
            return { t : self.prob_cache[t] for t in times }
        elif slot == self.Predictions:
            # FIXME: Support SegmentationThreshold again...
            labels = { t : 1 + numpy.argmax(self.prob_cache[t], axis=1) for t in times }
            for t in times:
                labels[t][0] = 0 # Background gets the zero label
            return labels
        elif slot == self.ProbabilityChannels:
            prob_single_channel = {t: self.prob_cache[t][:, subindex[0]] for t in times}
            return prob_single_channel

        else:
            assert False, "Unknown input slot"
    def _execute_graphcut(self, roi, result):
        for i in (0, 4):
            assert roi.stop[i] - roi.start[i] == 1,\
                "Invalid roi for graph-cut: {}".format(str(roi))
        t = roi.start[0]
        c = roi.start[4]

        margin = self.Margin.value
        beta = self.Beta.value
        MAXBOXSIZE = 10000000  # FIXME justification??

        ## request the bounding box coordinates ##
        # the trailing index brackets give us the dictionary (instead of an
        # array of size 1)
        feats = self.BoundingBoxes.get(roi).wait()
        mins = feats["Coord<Minimum>"]
        maxs = feats["Coord<Maximum>"]
        nobj = mins.shape[0]
        # these are indices, so they should have an index datatype
        mins = mins.astype(np.uint32)
        maxs = maxs.astype(np.uint32)

        ## request the prediction image ##
        pred = self.Prediction.get(roi).wait()
        pred = vigra.taggedView(pred, axistags=self.Prediction.meta.axistags)
        pred = pred.withAxes(*'xyz')

        ## request the connected components image ##
        cc = self.LabelImage.get(roi).wait()
        cc = vigra.taggedView(cc, axistags=self.LabelImage.meta.axistags)
        cc = cc.withAxes(*'xyz')

        # provide xyz view for the output (just need 8bit for segmentation
        resultXYZ = vigra.taggedView(np.zeros(cc.shape, dtype=np.uint8),
                                     axistags='xyz')

        def processSingleObject(i):
            logger.debug("processing object {}".format(i))
            # maxs are inclusive, so we need to add 1
            xmin = max(mins[i][0]-margin[0], 0)
            ymin = max(mins[i][1]-margin[1], 0)
            zmin = max(mins[i][2]-margin[2], 0)
            xmax = min(maxs[i][0]+margin[0]+1, cc.shape[0])
            ymax = min(maxs[i][1]+margin[1]+1, cc.shape[1])
            zmax = min(maxs[i][2]+margin[2]+1, cc.shape[2])
            ccbox = cc[xmin:xmax, ymin:ymax, zmin:zmax]
            resbox = resultXYZ[xmin:xmax, ymin:ymax, zmin:zmax]

            nVoxels = ccbox.size
            if nVoxels > MAXBOXSIZE:
                #problem too large to run graph cut, assign to seed
                logger.warn("Object {} too large for graph cut.".format(i))
                resbox[ccbox == i] = 1
                return

            probbox = pred[xmin:xmax, ymin:ymax, zmin:zmax]
            gcsegm = segmentGC(probbox, beta)
            gcsegm = vigra.taggedView(gcsegm, axistags='xyz')
            ccsegm = vigra.analysis.labelVolumeWithBackground(
                gcsegm.astype(np.uint8))

            # Extended bboxes of different objects might overlap.
            # To avoid conflicting segmentations, we find all connected
            # components in the results and only take the one, which
            # overlaps with the object "core" or "seed", defined by the
            # pre-thresholding
            seed = ccbox == i
            filtered = seed*ccsegm
            passed = vigra.analysis.unique(filtered.astype(np.uint32))
            assert len(passed.shape) == 1
            if passed.size > 2:
                logger.warn("ambiguous label assignment for region {}".format(
                    (xmin, xmax, ymin, ymax, zmin, zmax)))
                resbox[ccbox == i] = 1
            elif passed.size <= 1:
                logger.warn(
                    "box {} segmented out with beta {}".format(i, beta))
            else:
                # assign to the overlap region
                label = passed[1]  # 0 is background
                resbox[ccsegm == label] = 1

        pool = RequestPool()
        #FIXME make sure that the parallel computations fit into memory
        for i in range(1, nobj):
            req = Request(functools.partial(processSingleObject, i))
            pool.add(req)

        logger.info("Processing {} objects ...".format(nobj-1))

        pool.wait()
        pool.clean()

        logger.info("object loop done")

        # prepare result
        resView = vigra.taggedView(result, axistags=self.Output.meta.axistags)
        resView = resView.withAxes(*'xyz')

        # some labels could have been removed => relabel
        vigra.analysis.labelVolumeWithBackground(resultXYZ, out=resView)
示例#34
0
def extractHistograms(volume, labels, patchSize=64, haloSize=0,
                      nBins=30, intRange=(0, 255), appendPositions=False):
    '''
    extracts histograms from 3d-volume
     - labels are
        0       ignore
        1       positive
        2       negative
     - histogram extraction is attempted to be done in parallel
     - patches that intersect with the volume border are discarded
     - volume and labels must be 3d, and in order 'zyx' (if not VigraArrays)
     - returns: np.ndarray, shape: (nSamples,nBins+1), last column is the label
    '''

    # progress reporter class, histogram extraction can take quite a long time
    class ProgressReporter(object):

        lock = None

        def __init__(self, nThreads):
            self.lock = ThreadLock()
            self.nThreads = nThreads
            self.status = np.zeros((nThreads,))

        def report(self, index):
            self.lock.acquire()
            self.status[index] = 1
            logger.debug("Finished threads: %d/%d." %
                         (self.status.sum(), len(self.status)))
            self.lock.release()

    # sanity checks
    assert len(volume.shape) == 3, "Volume must be 3d data"
    assert volume.shape == labels.shape,\
        "Volume and labels must have the same shape"

    try:
        volumeZYX = volume.withAxes(*'zyx')
        labelsZYX = labels.withAxes(*'zyx')
    except AttributeError:
        # can't blame me
        volumeZYX = volume
        labelsZYX = labels
        pass

    # compute actual patch size
    patchSize = patchSize + 2*haloSize

    # fill list of patch centers (VigraArray does not support bitwise_or)
    ind_z, ind_y, ind_x = np.where(
        (labelsZYX == 1).view(np.ndarray) | (labelsZYX == 2).view(np.ndarray))
    index = np.arange(len(ind_z))

    # prepare chunking of histogram centers
    chunkSize = 10000  # FIXME magic number??
    nChunks = len(index)//chunkSize + (1 if len(index) % chunkSize > 0 else 0)
    sliceList = [slice(k*chunkSize, min((k+1)*chunkSize, len(index)))
                 for k in range(nChunks)]
    histoList = [None]*nChunks

    # prepare subroutine for parallel extraction
    reporter = ProgressReporter(nChunks)

    #BEGIN subroutine
    def _extractHistogramsSub(itemList):

        xs = ind_x[itemList]
        ys = ind_y[itemList]
        zs = ind_z[itemList]

        ymin = ys - patchSize//2
        ymax = ymin + patchSize

        xmin = xs - patchSize//2
        xmax = xmin + patchSize

        validPatchIndices = np.where(
            np.all(
                (ymin >= 0,
                 xmin >= 0,
                 xmax <= volumeZYX.shape[2],
                 ymax <= volumeZYX.shape[1]),
                axis=0))[0]

        if appendPositions:
            out = np.zeros((len(validPatchIndices), nBins+4))
        else:
            out = np.zeros((len(validPatchIndices), nBins+1))

        for k, patchInd in enumerate(validPatchIndices):
            x = xs[patchInd]
            y = ys[patchInd]
            z = zs[patchInd]

            vol = volumeZYX[z, ymin[patchInd]:ymax[patchInd],
                            xmin[patchInd]:xmax[patchInd]]
            (out[k, :nBins], _) = np.histogram(
                vol, bins=nBins, range=intRange, density=True)
            out[k, nBins] = 1 if labelsZYX[z, y, x] == 1 else 0
            if appendPositions:
                out[k, nBins+1:] = [z, y, x]

        return out

    def partFun(i):
        itemList = index[sliceList[i]]
        histos = _extractHistogramsSub(itemList)
        histoList[i] = histos

        reporter.report(i)
    #END subroutine

    # pool the extraction requests
    pool = RequestPool()

    for i in range(nChunks):
        req = Request(partial(partFun, i))
        pool.add(req)

    pool.wait()
    pool.clean()

    return np.vstack(histoList)