Exemplo n.º 1
0
    def execute(self, slot, subindex, roi, result):
        t = time.time()
        assert self._h5N5File is not None
        # Read the desired data directly from the hdf5File
        key = roi.toSlice()
        h5N5File = self._h5N5File
        internalPath = self.InternalPath.value

        timer = None
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f"Reading HDF5/N5 block: [{roi.start}, {roi.stop}]")
            timer = Timer()
            timer.unpause()

        if result.flags.c_contiguous:
            h5N5File[internalPath].read_direct(result[...], key)
        else:
            result[...] = h5N5File[internalPath][key]
        if logger.getEffectiveLevel() >= logging.DEBUG:
            t = 1000.0 * (time.time() - t)
            logger.debug("took %f msec." % t)

        if timer:
            timer.pause()
            logger.debug(f"Completed HDF5 read in {timer.seconds()} seconds: [{roi.start}, {roi.stop}]")
Exemplo n.º 2
0
    def execute(self, slot, subindex, roi, result):
        assert all(roi.stop <= self.Input.meta.shape),\
            "Requested roi {} is too large for this input image of shape {}.".format(roi, self.Input.meta.shape)

        # Determine how much input data we'll need, and where the result will be
        # relative to that input roi
        # inputRoi is a 5d roi, computeRoi depends on the number of singletons
        # in shape, but is at most 3d
        inputRoi, computeRoi = self._getInputComputeRois(roi)

        # Obtain the input data
        with Timer() as resultTimer:
            data = self.Input(*inputRoi).wait()
        logger.debug("Obtaining input data took {} seconds for roi {}".format(
            resultTimer.seconds(), inputRoi))
        data = vigra.taggedView(data, axistags='tzyxc')

        # input is in tzyxc order
        tIndex = 0
        cIndex = 4

        # Must be float32
        if data.dtype != numpy.float32:
            data = data.astype(numpy.float32)

        # we need to remove a singleton z axis, otherwise we get
        # 'kernel longer than line' errors
        ts = self.Input.meta.getTaggedShape()
        tags = [k for k in 'zyx' if ts[k] > 1]
        sigma = [self._sigmas[k] for k in tags]

        # Check if we need to smooth
        if any([x < 0.1 for x in sigma]):
            # just pipe the input through
            result[...] = data
            return

        for i, t in enumerate(range(roi.start[tIndex], roi.stop[tIndex])):
            for j, c in enumerate(range(roi.start[cIndex], roi.stop[cIndex])):
                # prepare the result as an argument
                resview = vigra.taggedView(result[i, ..., j], axistags='zyx')
                dataview = data[i, ..., j]
                # TODO make this general, not just for z axis
                resview = resview.withAxes(*tags)
                dataview = dataview.withAxes(*tags)

                # Smooth the input data
                vigra.filters.gaussianSmoothing(dataview,
                                                sigma,
                                                window_size=2.0,
                                                roi=computeRoi,
                                                out=resview)
Exemplo n.º 3
0
    def execute(self, slot, subindex, roi, result):
        edge_probabilities = self.EdgeProbabilities.value
        rag = self.Rag.value
        beta = self.Beta.value
        solver_name = self.SolverName.value

        with Timer() as timer:
            agglomerated_labels = self.agglomerate_with_multicut(
                rag, edge_probabilities, beta, solver_name)
        logger.info("'{}' Multicut took {} seconds".format(
            solver_name, timer.seconds()))

        result[:] = agglomerated_labels[..., None]
    def _train_forests_with_feature_importance(forests,
                                               X,
                                               y,
                                               feature_names,
                                               export_path=None):
        """
        Train all RFs (in parallel) and compute feature importances while doing so.
        The importances table will be logged as INFO, and also exported to a file if export_path is given.

        Returns: oobs and importances
        """
        oobs = [None] * len(forests)
        importances = [None] * len(forests)

        def store_training_results(i, training_results):
            oob, importance_results = training_results
            oobs[i] = oob
            importances[i] = importance_results

        with Timer() as train_timer:
            pool = RequestPool()
            for i, forest in enumerate(forests):
                req = Request(partial(forest.learnRFWithFeatureSelection, X,
                                      y))
                # save the training results
                req.notify_finished(partial(store_training_results, i))
                pool.add(req)
            pool.wait()

        logger.info("Training took, {} seconds".format(train_timer.seconds()))

        # Forests may have different numbers of trees,
        # so take a weighted average of their importances
        tree_counts = [f.treeCount() for f in forests]
        weights = numpy.array(tree_counts).astype(float)
        weights /= weights.sum()

        named_importances = collections.OrderedDict(
            list(
                zip(feature_names,
                    numpy.average(importances, weights=weights, axis=0))))

        importance_table = generate_importance_table(named_importances,
                                                     sort="overall",
                                                     export_path=export_path)

        logger.info(
            "Feature importance measurements during training: \n{}".format(
                importance_table))

        return oobs, named_importances
Exemplo n.º 5
0
    def execute(self, slot, subindex, roi, result):
        assert all(roi.stop <= self.Input.meta.shape), "Requested roi {} is too large for this input image of shape {}.".format( roi, self.Input.meta.shape )
        # Determine how much input data we'll need, and where the result will be relative to that input roi
        inputRoi, computeRoi = self._getInputComputeRois(roi)        
        # Obtain the input data 
        with Timer() as resultTimer:
            data = self.Input( *inputRoi ).wait()
        logger.debug("Obtaining input data took {} seconds for roi {}".format( resultTimer.seconds(), inputRoi ))
        
        xIndex = self.Input.meta.axistags.index('x')
        yIndex = self.Input.meta.axistags.index('y')
        zIndex = self.Input.meta.axistags.index('z') if self.Input.meta.axistags.index('z')<len(self.Input.meta.shape) else None
        cIndex = self.Input.meta.axistags.index('c') if self.Input.meta.axistags.index('c')<len(self.Input.meta.shape) else None
        
        # Must be float32
        if data.dtype != numpy.float32:
            data = data.astype(numpy.float32)
        
        axiskeys = self.Input.meta.getAxisKeys()
        spatialkeys = filter( lambda k: k in 'xyz', axiskeys )

        # we need to remove a singleton z axis, otherwise we get 
        # 'kernel longer than line' errors
        reskey = [slice(None, None, None)]*len(self.Input.meta.shape)
        reskey[cIndex]=0
        if zIndex and self.Input.meta.shape[zIndex]==1:
            removedZ = True
            data = data.reshape((data.shape[xIndex], data.shape[yIndex]))
            reskey[zIndex]=0
            spatialkeys = filter( lambda k: k in 'xy', axiskeys )
        else:
            removedZ = False

        sigma = map(self._sigmas.get, spatialkeys)
        #Check if we need to smooth
        if any([x < 0.1 for x in sigma]):
            if removedZ:
                resultXY = vigra.taggedView(result, axistags="".join(axiskeys))
                resultXY = resultXY.withAxes(*'xy')
                resultXY[:] = data
            else:
                result[:] = data
            return result

        # Smooth the input data
        smoothed = vigra.filters.gaussianSmoothing(data, sigma, window_size=2.0, roi=computeRoi, out=result[tuple(reskey)]) # FIXME: Assumes channel is last axis
        expectedShape = tuple(TinyVector(computeRoi[1]) - TinyVector(computeRoi[0]))
        assert tuple(smoothed.shape) == expectedShape, "Smoothed data shape {} didn't match expected shape {}".format( smoothed.shape, roi.stop - roi.start )
        
        return result
def export_from_tiled_volume(tiles_description_json_path, roi,
                             output_hdf5_path, output_dataset_name):
    """
    Export a cutout volume from a TiledVolume into an hdf5 dataset.

    Args:
        tiles_description_json_path: path to the TiledVolume's json description file.
        roi: The (start, stop) corners of the cutout region to export. (Must be tuple-of-tuples.)
        output_hdf5_path: The HDF5 file to export to.
        output_dataset_name: The name of the HDF5 dataset to write.  Will be deleted first if necessary.
    """
    if not os.path.exists(tiles_description_json_path):
        raise Exception("Description file does not exist: " +
                        tiles_description_json_path)

    start, stop = numpy.array(roi)
    shape = tuple(stop - start)

    tiled_volume = TiledVolume(tiles_description_json_path)

    with Timer() as timer:
        result_array = numpy.ndarray(shape, tiled_volume.description.dtype)

        logger.info("Reading cutout volume of shape: {}".format(shape))
        tiled_volume.read((start, stop), result_out=result_array)

        logger.info("Writing data to: {}/{}".format(output_hdf5_path,
                                                    output_dataset_name))
        with h5py.File(output_hdf5_path, 'a') as output_h5_file:
            if output_dataset_name in output_h5_file:
                del output_h5_file[output_dataset_name]
            dset = output_h5_file.create_dataset(output_dataset_name,
                                                 shape,
                                                 result_array.dtype,
                                                 chunks=True,
                                                 data=result_array)
            try:
                import vigra
            except ImportError:
                pass
            else:
                # Attach axistags to the exported dataset, so ilastik
                #  automatically interprets the volume correctly.
                output_axes = tiled_volume.description.output_axes
                dset.attrs['axistags'] = vigra.defaultAxistags(
                    output_axes).toJSON()

        logger.info("Exported {:.1e} pixels in {:.1f} seconds.".format(
            numpy.prod(shape), timer.seconds()))
Exemplo n.º 7
0
    def _train_forests(forests, X, y):
        """
        Train all RFs (in parallel), and return the oobs.
        """
        oobs = [None] * len(forests)
        def store_oob_results(i, oob):
            oobs[i] = oob

        with Timer() as train_timer:
            pool = RequestPool()
            for i, forest in enumerate(forests):
                req = Request( partial(forest.learnRF, X, y) )
                # save the oob results
                req.notify_finished( partial( store_oob_results, i ) )
                pool.add( req )
            pool.wait()          
        logger.info("Training took, {} seconds".format( train_timer.seconds() ) )
        return oobs
Exemplo n.º 8
0
    def create_and_train(self, X, y, feature_names=None):
        # Distribute trees as evenly as possible
        tree_counts = numpy.array([self._num_trees // self._num_forests] *
                                  self._num_forests)
        tree_counts[:self._num_trees % self._num_forests] += 1
        assert tree_counts.sum() == self._num_trees
        tree_counts = map(int, tree_counts)
        tree_counts[:] = (tree_count for tree_count in tree_counts
                          if tree_count != 0)

        logger.debug("Training parallel vigra RF")
        # Save for future reference
        known_labels = numpy.unique(y)

        X = numpy.asarray(X, numpy.float32)
        y = numpy.asarray(y, numpy.uint32)
        if y.ndim == 1:
            y = y[:, numpy.newaxis]

        assert X.ndim == 2
        assert len(X) == len(y)

        # Create N forests
        forests = []
        for tree_count in tree_counts:
            forests.append(
                vigra.learning.RandomForest(tree_count, **self._kwargs))

        # Train them all in parallel
        oobs = [None] * len(forests)
        pool = RequestPool()
        for i, forest in enumerate(forests):
            req = Request(partial(forest.learnRF, X, y))
            # save the oobs
            req.notify_finished(partial(oobs.__setitem__, i))
            pool.add(req)

        with Timer() as timer:
            pool.wait()
        logger.info("Training completed in {} seconds. Average OOB: {}".format(
            timer.seconds(), numpy.average(oobs)))
        return ParallelVigraRfLazyflowClassifier(forests, oobs, known_labels,
                                                 feature_names)
Exemplo n.º 9
0
        def execute(self, slot, subindex, roi, result):
            rag = self.Rag.value
            beta = self.Beta.value
            solver_name = self.SolverName.value
            edge_probabilities = self.EdgeProbabilities.value
            if edge_probabilities is None:
                # No probabilities cached yet. Merge everything
                result[0] = np.zeros((rag.max_sp + 1,), dtype=np.uint32)
                return

            with Timer() as timer:
                node_labeling = self.agglomerate_with_multicut(
                    rag, edge_probabilities, beta, solver_name)
            logger.info("'{}' Multicut took {} seconds".format(
                solver_name, timer.seconds()))

            # FIXME: Is it okay to produce 0-based supervoxels?
            # node_labeling[:] += 1 # RAG labels are 0-based, but we want 1-based

            result[0] = node_labeling
Exemplo n.º 10
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self.Output, "Unknown output slot: {}".format(slot.name)
        if self.SelectedLabel.value == 0:
            # Special case: Label zero selects nothing.
            result[:] = 0
            return

        with Timer() as timer:
            # Can't use writeInto() here because dtypes don't match.
            inputLabels = self.Input(roi.start, roi.stop).wait()

            # Use two in-place bitwise operations instead of numpy.where
            # This avoids the temporary variable created by (inputLabels == x)
            #result[:] = numpy.where( inputLabels == self.SelectedLabel.value, 1, 0 )
            numpy.bitwise_xor(inputLabels,
                              self.SelectedLabel.value,
                              out=inputLabels)  # All
            numpy.logical_not(inputLabels, out=inputLabels)
            result[:] = inputLabels  # Copy from uint32 to uint8

        logger.debug("OpSelectLabel took {} seconds for roi {}".format(
            timer.seconds(), roi))
        return result
Exemplo n.º 11
0
    def execute(self, slot, subindex, roi, result):
        classifier = self.Classifier.value

        # Training operator may return 'None' if there was no data to train with
        skip_prediction = classifier is None

        # Shortcut: If the mask is totally zero, skip this request entirely
        if not skip_prediction and self.PredictionMask.ready():
            mask_roi = numpy.array((roi.start, roi.stop))
            mask_roi[:, -1:] = [[0], [1]]
            start, stop = list(map(tuple, mask_roi))
            mask = self.PredictionMask(start, stop).wait()
            skip_prediction = not numpy.any(mask)
            del mask

        if skip_prediction:
            result[:] = 0.0
            return result

        assert issubclass(
            type(classifier), LazyflowVectorwiseClassifierABC
        ), "Classifier is of type {}, which does not satisfy the LazyflowVectorwiseClassifierABC interface.".format(
            type(classifier))

        key = roi.toSlice()
        newKey = key[:-1]
        newKey += (slice(0, self.Image.meta.shape[-1], None), )

        with Timer() as features_timer:
            input_data = self.Image[newKey].wait()

        input_data = numpy.asarray(input_data, numpy.float32)

        shape = input_data.shape
        prod = numpy.prod(shape[:-1])
        features = input_data.reshape((prod, shape[-1]))
        features = self.SupervoxelFeatures.value
        # print("features before prediction {}".format(features))
        # features = get_supervoxel_features(features, self.SupervoxelSegmentation.value)
        # import ipdb; ipdb.set_trace()
        with Timer() as prediction_timer:
            probabilities = classifier.predict_probabilities(features)
        # import ipdb; ipdb.set_trace()
        probabilities = slic_to_mask(self.SupervoxelSegmentation.value,
                                     probabilities).reshape(
                                         -1, probabilities.shape[-1])
        logger.debug(
            "Features took {} seconds, Prediction took {} seconds for roi: {} : {}"
            .format(features_timer.seconds(), prediction_timer.seconds(),
                    roi.start, roi.stop))

        assert probabilities.shape[1] <= self.PMaps.meta.shape[-1], (
            "Error: Somehow the classifier has more label classes than expected:"
            " Got {} classes, expected {} classes".format(
                probabilities.shape[1], self.PMaps.meta.shape[-1]))

        # We're expecting a channel for each label class.
        # If we didn't provide at least one sample for each label,
        #  we may get back fewer channels.
        if probabilities.shape[1] < self.PMaps.meta.shape[-1]:
            # Copy to an array of the correct shape
            # This is slow, but it's an unusual case
            assert probabilities.shape[-1] == len(classifier.known_classes)
            full_probabilities = numpy.zeros(probabilities.shape[:-1] +
                                             (self.PMaps.meta.shape[-1], ),
                                             dtype=numpy.float32)
            for i, label in enumerate(classifier.known_classes):
                full_probabilities[:, label - 1] = probabilities[:, i]

            probabilities = full_probabilities

        # Reshape to image
        probabilities.shape = shape[:-1] + (self.PMaps.meta.shape[-1], )

        # Copy only the prediction channels the client requested.
        result[...] = probabilities[..., roi.start[-1]:roi.stop[-1]]
        return result
    def predict_probabilities_pixelwise(self, X, roi, axistags=None):
        logger.debug("predicting PIXELWISE vigra RF")

        # This classifier doesn't benefit from any context around the input, (does it?)
        #  so just strip it off and only use the given roi.
        assert len(roi[0]) == len(roi[1]) == X.ndim - 1
        X = X[roi_to_slice(*roi)]

        FRAME_SPAN = 10  # Number of frames to wait until the mask is recalculated
        DILATION_RADIUS = 50  # In pixels
        BACKGROUND_LABEL = 1

        # Allocate memory for probability volume and mask
        prob_vol = numpy.zeros((X.shape[:-1] + (len(self._known_labels),)), dtype=numpy.float32)
        mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool)

        frm_cnt = 0

        for X_t in X:
            if frm_cnt % FRAME_SPAN == 0:
                mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool)

            prob_mat = numpy.zeros((bigintprod(X.shape[1:-1]), len(self._known_labels)), dtype=numpy.float32)

            # Reshape the image into a 2D feature matrix
            mat_shape = (bigintprod(X_t.shape[:-1]), X_t.shape[-1])
            feature_mat = numpy.reshape(X_t, mat_shape)

            # Mask the feature matrix
            feature_mat_masked = feature_mat[mask == 1, :]

            # Run classifier
            prob_mat_masked = self._vigra_rf.predictProbabilities(feature_mat_masked.view(numpy.ndarray))

            prob_mat[mask == 1, :] = prob_mat_masked
            prob_mat[mask == 0, 0] = 1.0  # Fill background

            prob_img = prob_mat.reshape((1,) + X_t.shape[:-1] + (prob_mat.shape[-1],))

            # Recalculate the mask every 20 frames
            if frm_cnt % FRAME_SPAN == 0:
                predicted_labels = numpy.argmax(prob_img[0], axis=-1) + 1
                prob_slice = (predicted_labels != BACKGROUND_LABEL).astype(numpy.bool)

                kernel = numpy.ones((DILATION_RADIUS * 2 + 1), dtype=bool)

                with Timer() as morpho_timer:
                    prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice, kernel[None, :])
                    prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice_dilated, kernel[:, None])

                logger.debug("[PROF] Morphology took {} ".format(morpho_timer.seconds()))

                mask = prob_slice_dilated.reshape(bigintprod(prob_slice_dilated.shape))

                # vigra.impex.writeHDF5(prob_slice_dilated, 'mask.h5', 'data')

            prob_vol[frm_cnt, :, :, :] = prob_img

            frm_cnt = frm_cnt + 1

        # Reshape into an image.
        # Choose the prediction image shape carefully:
        #
        # Most classifiers omit a channel entirely if there are no labels given for a particular class,
        # So the number of prediction channels we got is the same as the number of known_classes
        # But if the classifier attempts to "help us out" by including channels for "missing" labels,
        #  then we want to just return the whole thing.
        num_probability_channels = max(len(self.known_classes), prob_vol.shape[-1])

        prediction_shape = X.shape[:-1] + (num_probability_channels,)
        return numpy.reshape(prob_vol, prediction_shape)
Exemplo n.º 13
0
        data = prediction_file['volume/predictions'][:]  #[0:50,0:50,0:50,:]

    # Scale and convert to uint8, then add axistags and drange
    data = (data * 255).astype(numpy.uint8)
    data = vigra.taggedView(data, 'xyzc')
    data.drange = (0, 255)

    graph = Graph()
    op = OpVigraWatershedViewer(graph=graph)
    op.InputImage.setValue(data)
    op.InputChannelIndexes.setValue([0])
    op.WatershedPadding.setValue(0)
    op.FreezeCache.setValue(False)
    op.CacheBlockShape.setValue((520, 520))
    op.OverrideLabels.setValue({})
    op.SeedThresholdValue.setValue(0)
    op.MinSeedSize.setValue(5)

    assert op.WatershedLabels.ready()

    print "Computing watershed..."
    with Timer() as timer:
        watershed_labels = op.opWatershed.Output[:].wait()
    print "Computing watershed took {} seconds".format(timer.seconds())

    print "Saving watershed..."
    with h5py.File('/tmp/watershed_output.h5', 'w') as output_file:
        output_file.create_dataset('watershed_labels', data=watershed_labels)

    print "DONE."