Пример #1
0
    def test_Writer(self):
        # Create the h5 file
        hdf5File = h5py.File(self.testDataH5FileName)
        n5File = z5py.N5File(self.testDataN5FileName)

        opPiper = OpArrayPiper(graph=self.graph)
        opPiper.Input.setValue(self.testData)

        # Force extra metadata onto the output
        opPiper.Output.meta.ideal_blockshape = (1, 1, 0, 0, 1)
        # Pretend the RAM usage will be really high to force lots of tiny blocks
        opPiper.Output.meta.ram_usage_per_requested_pixel = 1000000.0

        h5_opWriter = OpH5N5WriterBigDataset(graph=self.graph)
        n5_opWriter = OpH5N5WriterBigDataset(graph=self.graph)

        # This checks that you can give a preexisting group as the file
        h5_g = hdf5File.create_group("volume")
        n5_g = n5File.create_group("volume")
        h5_opWriter.h5N5File.setValue(h5_g)
        n5_opWriter.h5N5File.setValue(n5_g)
        h5_opWriter.h5N5Path.setValue("data")
        n5_opWriter.h5N5Path.setValue("data")
        h5_opWriter.Image.connect(opPiper.Output)
        n5_opWriter.Image.connect(opPiper.Output)

        # Force the operator to execute by asking for the output (a bool)
        h5_success = h5_opWriter.WriteImage.value
        n5_success = n5_opWriter.WriteImage.value
        assert h5_success
        assert n5_success

        hdf5File.close()
        n5File.close()

        # Check the file.
        hdf5File = h5py.File(self.testDataH5FileName, "r")
        n5File = h5py.File(self.testDataH5FileName, "r")
        h5_dataset = hdf5File[self.datasetInternalPath]
        n5_dataset = n5File[self.datasetInternalPath]
        assert h5_dataset.shape == self.dataShape
        assert n5_dataset.shape == self.dataShape
        assert (numpy.all(
            h5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all()
        assert (numpy.all(
            n5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all()
        hdf5File.close()
        n5File.close()
Пример #2
0
    def _export_h5n5(self, compress=False):
        self.progressSignal(0)

        # Create and open the hdf5/n5 file
        export_components = PathComponents(self.ExportPath.value)
        try:
            with OpStreamingH5N5Reader.get_h5_n5_file(
                    export_components.externalPath, mode="a") as h5N5File:
                # Create a temporary operator to do the work for us
                opH5N5Writer = OpH5N5WriterBigDataset(parent=self)
                with contextlib.suppress(KeyError):
                    del h5N5File[export_components.internalPath]
                try:
                    opH5N5Writer.CompressionEnabled.setValue(compress)
                    opH5N5Writer.h5N5File.setValue(h5N5File)
                    opH5N5Writer.h5N5Path.setValue(
                        export_components.internalPath)
                    opH5N5Writer.Image.connect(self.Input)

                    # The H5 Writer provides it's own progress signal, so just connect ours to it.
                    opH5N5Writer.progressSignal.subscribe(self.progressSignal)

                    # Perform the export and block for it in THIS THREAD.
                    opH5N5Writer.WriteImage[:].wait()
                finally:
                    opH5N5Writer.cleanUp()
                    self.progressSignal(100)
        except IOError as ex:
            import sys

            msg = "\nException raised when attempting to export to {}: {}\n".format(
                export_components.externalPath, str(ex))
            sys.stderr.write(msg)
            raise
Пример #3
0
    def test_Writer(self):

        # Create the h5 file
        hdf5File = h5py.File(self.testDataH5FileName)
        n5File = z5py.N5File(self.testDataN5FileName)

        opPiper = OpArrayPiper(graph=self.graph)
        opPiper.Input.setValue(self.testData)

        h5_opWriter = OpH5N5WriterBigDataset(graph=self.graph)
        n5_opWriter = OpH5N5WriterBigDataset(graph=self.graph)

        # This checks that you can give a preexisting group as the file
        h5_g = hdf5File.create_group("volume")
        n5_g = n5File.create_group("volume")
        h5_opWriter.h5N5File.setValue(h5_g)
        n5_opWriter.h5N5File.setValue(n5_g)
        h5_opWriter.h5N5Path.setValue("data")
        n5_opWriter.h5N5Path.setValue("data")
        h5_opWriter.Image.connect(opPiper.Output)
        n5_opWriter.Image.connect(opPiper.Output)

        # Force the operator to execute by asking for the output (a bool)
        h5_success = h5_opWriter.WriteImage.value
        n5_success = n5_opWriter.WriteImage.value
        assert h5_success
        assert n5_success

        hdf5File.close()
        n5File.close()

        # Check the file.
        hdf5File = h5py.File(self.testDataH5FileName, "r")
        n5File = h5py.File(self.testDataH5FileName, "r")
        h5_dataset = hdf5File[self.datasetInternalPath]
        n5_dataset = n5File[self.datasetInternalPath]
        assert h5_dataset.shape == self.dataShape
        assert n5_dataset.shape == self.dataShape
        assert (numpy.all(
            h5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all()
        assert (numpy.all(
            n5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all()
        hdf5File.close()
        n5File.close()
Пример #4
0
    def _export_h5n5(self, compress=False):
        self.progressSignal(0)

        # Create and open the hdf5/n5 file
        export_components = PathComponents(self.ExportPath.value)
        try:
            if os.path.isdir(export_components.externalPath
                             ):  # externalPath leads to a n5 file
                shutil.rmtree(export_components.externalPath
                              )  # n5 is stored as a directory structure
            else:
                os.remove(export_components.externalPath)
        except OSError as ex:
            # It's okay if the file isn't there.
            if ex.errno != 2:
                raise
        try:
            with OpStreamingH5N5Reader.get_h5_n5_file(
                    export_components.externalPath, "w") as h5N5File:
                # Create a temporary operator to do the work for us
                opH5N5Writer = OpH5N5WriterBigDataset(parent=self)
                try:
                    opH5N5Writer.CompressionEnabled.setValue(compress)
                    opH5N5Writer.h5N5File.setValue(h5N5File)
                    opH5N5Writer.h5N5Path.setValue(
                        export_components.internalPath)
                    opH5N5Writer.Image.connect(self.Input)

                    # The H5 Writer provides it's own progress signal, so just connect ours to it.
                    opH5N5Writer.progressSignal.subscribe(self.progressSignal)

                    # Perform the export and block for it in THIS THREAD.
                    opH5N5Writer.WriteImage[:].wait()
                finally:
                    opH5N5Writer.cleanUp()
                    self.progressSignal(100)
        except IOError as ex:
            import sys

            msg = "\nException raised when attempting to export to {}: {}\n".format(
                export_components.externalPath, str(ex))
            sys.stderr.write(msg)
            raise
Пример #5
0
 def dumpToHdf5(
     self, h5_file: h5py.File, inner_path: str, progress_signal: Callable[[int], None] = lambda x: None
 ) -> str:
     progress_signal(0)
     try:
         h5_file.require_group(Path("/").joinpath(inner_path).parent.as_posix())
         graph = Graph()
         op_writer = OpH5N5WriterBigDataset(
             graph=graph,
             h5N5File=h5_file,
             h5N5Path=inner_path,
             CompressionEnabled=False,
             BatchSize=1,
             Image=self.get_provider_slot(graph=graph),
         )
         op_writer.progressSignal.subscribe(progress_signal)
         success = op_writer.WriteImage.value  # reading this slot triggers the write
     finally:
         progress_signal(100)
Пример #6
0
 def importStackAsLocalDataset(self,
                               abs_paths: List[str],
                               sequence_axis: str = "z",
                               progress_signal: Callable[[int],
                                                         None] = None):
     progress_signal = progress_signal or self.progressSignal
     progress_signal(0)
     op_reader = None
     op_writer = None
     try:
         colon_paths = os.path.pathsep.join(abs_paths)
         op_reader = OpInputDataReader(graph=self.topLevelOperator.graph,
                                       FilePath=colon_paths,
                                       SequenceAxis=sequence_axis)
         axistags = op_reader.Output.meta.axistags
         inner_path = self.local_data_path.joinpath(
             DatasetInfo.generate_id()).as_posix()
         project_file = self.topLevelOperator.ProjectFile.value
         op_writer = OpH5N5WriterBigDataset(
             graph=self.topLevelOperator.graph,
             h5N5File=project_file,
             h5N5Path=inner_path,
             CompressionEnabled=False,
             BatchSize=1,
             Image=op_reader.Output,
         )
         op_writer.progressSignal.subscribe(progress_signal)
         success = op_writer.WriteImage.value
         for index, tag in enumerate(axistags):
             project_file[inner_path].dims[index].label = tag.key
         project_file[inner_path].attrs["axistags"] = axistags.toJSON()
         if op_reader.Output.meta.get("drange"):
             project_file[inner_path].attrs[
                 "drange"] = op_reader.Output.meta.get("drange")
         return inner_path
     finally:
         if op_writer:
             op_writer.Image.disconnect()
         if op_reader:
             op_reader.cleanUp()
         progress_signal(100)
Пример #7
0
    def _serialize(self, group, name, slot):
        """Called when the currently stored predictions are dirty. If
        prediction storage is currently enabled, store them to the
        file. Otherwise, just delete them/

        (Avoid inconsistent project states, e.g. don't allow old
        predictions to be stored with a new classifier.)

        """
        predictionDir = group.create_group(self.name)

        # Disconnect the operators that might be using the old data.
        self.deserialize(group)
        
        failedToSave = False
        opWriter = None
        try:
            num = len(slot)
            if num > 0:
                increment = 100 / float(num)

            progress = 0
            for imageIndex in range(num):
                # Have we been cancelled?
                if not self.predictionStorageEnabled:
                    break

                datasetName = self.subname.format(imageIndex)

                # Use a big dataset writer to do this in chunks
                opWriter = OpH5N5WriterBigDataset(graph=self.operator.graph, parent = self.operator.parent)
                opWriter.h5N5File.setValue(predictionDir)
                opWriter.h5N5Path.setValue(datasetName)
                opWriter.Image.connect(slot[imageIndex])

                def handleProgress(percent):
                    # Stop sending progress if we were cancelled
                    if self.predictionStorageEnabled:
                        curprogress = progress + percent * (increment / 100.0)
                        self.progressSignal(curprogress)
                opWriter.progressSignal.subscribe(handleProgress)

                # Create the request
                self._predictionStorageRequest = opWriter.WriteImage[...]

                # Must use a threading event here because if we wait on the 
                # request from within a "real" thread, it refuses to be cancelled.
                finishedEvent = threading.Event()
                def handleFinish(result):
                    finishedEvent.set()

                def handleCancel():
                    logger.info("Full volume prediction save CANCELLED.")
                    self._predictionStorageRequest = None
                    finishedEvent.set()

                # Trigger the write and wait for it to complete or cancel.
                self._predictionStorageRequest.notify_finished(handleFinish)
                self._predictionStorageRequest.notify_cancelled(handleCancel)
                self._predictionStorageRequest.submit() # Can't call wait().  See note above.
                finishedEvent.wait()
                progress += increment
                opWriter.cleanUp()
                opWriter = None
        except:
            failedToSave = True
            raise
        finally:
            if opWriter is not None:
                opWriter.cleanUp()

            # If we were cancelled, delete the predictions we just started
            if not self.predictionStorageEnabled or failedToSave:
                deleteIfPresent(group, name)
Пример #8
0
    def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath):
        # Write any missing local datasets to the local_data group
        localDataGroup = getOrCreateGroup(topGroup, 'local_data')
        wroteInternalData = False
        for laneIndex, multislot in enumerate(
                self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if not slot.ready():
                    continue
                info = slot.value
                # If this dataset should be stored in the project, but it isn't there yet
                if  info.location == DatasetInfo.Location.ProjectInternal \
                and info.datasetId not in list(localDataGroup.keys()):
                    # Obtain the data from the corresponding output and store it to the project.
                    dataSlot = self.topLevelOperator._NonTransposedImageGroup[
                        laneIndex][roleIndex]

                    try:
                        opWriter = OpH5N5WriterBigDataset(
                            parent=self.topLevelOperator.parent,
                            graph=self.topLevelOperator.graph)
                        # Compression slows down browsing a lot, and raw data tends
                        # to be noisy and doesn't compress very well, anyway.
                        opWriter.CompressionEnabled.setValue(False)
                        opWriter.h5N5File.setValue(localDataGroup)
                        opWriter.h5N5Path.setValue(info.datasetId)
                        opWriter.Image.connect(dataSlot)

                        # Trigger the copy
                        success = opWriter.WriteImage.value
                        assert success
                    finally:
                        opWriter.cleanUp()

                    # Add axistags and drange attributes, in case someone uses this dataset outside ilastik
                    localDataGroup[info.datasetId].attrs[
                        'axistags'] = dataSlot.meta.axistags.toJSON().encode(
                            'utf-8')
                    if dataSlot.meta.drange is not None:
                        localDataGroup[info.datasetId].attrs[
                            'drange'] = dataSlot.meta.drange

                    # Make sure the dataSlot's axistags are updated with the dataset as we just wrote it
                    # (The top-level operator may use an OpReorderAxes, which changed the axisorder)
                    info.axistags = dataSlot.meta.axistags

                    wroteInternalData = True

        # Construct a list of all the local dataset ids we want to keep
        localDatasetIds = set()
        for laneIndex, multislot in enumerate(
                self.topLevelOperator.DatasetGroup):
            for roleIndex, slot in enumerate(multislot):
                if slot.ready(
                ) and slot.value.location == DatasetInfo.Location.ProjectInternal:
                    localDatasetIds.add(slot.value.datasetId)

        # Delete any datasets in the project that aren't needed any more
        for datasetName in list(localDataGroup.keys()):
            if datasetName not in localDatasetIds:
                del localDataGroup[datasetName]

        if wroteInternalData:
            # We can only re-configure the operator if we're not saving a snapshot
            # We know we're saving a snapshot if the project file isn't the one we deserialized with.
            if self._projectFilePath is None or self._projectFilePath == projectFilePath:
                # Force the operator to setupOutputs() again so it gets data from the project, not external files
                firstInfo = self.topLevelOperator.DatasetGroup[0][0].value
                self.topLevelOperator.DatasetGroup[0][0].setValue(
                    firstInfo, check_changed=False)

        deleteIfPresent(topGroup, 'Role Names')
        role_names = [
            name.encode('utf-8')
            for name in self.topLevelOperator.DatasetRoles.value
        ]
        topGroup.create_dataset('Role Names', data=role_names)

        # Access the info group
        infoDir = getOrCreateGroup(topGroup, 'infos')

        # Delete all infos
        for infoName in list(infoDir.keys()):
            del infoDir[infoName]

        # Rebuild the list of infos
        roleNames = self.topLevelOperator.DatasetRoles.value
        for laneIndex, multislot in enumerate(
                self.topLevelOperator.DatasetGroup):
            laneGroupName = 'lane{:04d}'.format(laneIndex)
            laneGroup = infoDir.create_group(laneGroupName)

            for roleIndex, slot in enumerate(multislot):
                infoGroup = laneGroup.create_group(roleNames[roleIndex])
                if slot.ready():
                    datasetInfo = slot.value
                    locationString = self.LocationStrings[datasetInfo.location]
                    infoGroup.create_dataset(
                        'location', data=locationString.encode('utf-8'))
                    infoGroup.create_dataset(
                        'filePath', data=datasetInfo.filePath.encode('utf-8'))
                    infoGroup.create_dataset(
                        'datasetId',
                        data=datasetInfo.datasetId.encode('utf-8'))
                    infoGroup.create_dataset('allowLabels',
                                             data=datasetInfo.allowLabels)
                    infoGroup.create_dataset(
                        'nickname', data=datasetInfo.nickname.encode('utf-8'))
                    infoGroup.create_dataset('fromstack',
                                             data=datasetInfo.fromstack)
                    infoGroup.create_dataset(
                        'display_mode',
                        data=datasetInfo.display_mode.encode('utf-8'))
                    if datasetInfo.drange is not None:
                        infoGroup.create_dataset('drange',
                                                 data=datasetInfo.drange)

                    # Pull the axistags from the NonTransposedImage,
                    #  which is what the image looks like before 'forceAxisOrder' is applied,
                    #  and before 'c' is automatically appended
                    image_group_meta = self.topLevelOperator._NonTransposedImageGroup[
                        laneIndex][roleIndex].meta
                    axistags = image_group_meta.axistags
                    infoGroup.create_dataset(
                        'axistags', data=axistags.toJSON().encode('utf-8'))
                    axisorder = "".join(tag.key
                                        for tag in axistags).encode('utf-8')
                    infoGroup.create_dataset('axisorder', data=axisorder)
                    # serialize shape/dtype so that we could re-create the metadata
                    # for the raw data in the headless mode -> no need for raw data in headless
                    infoGroup.create_dataset('shape',
                                             data=image_group_meta.shape)
                    infoGroup.create_dataset(
                        'dtype',
                        data=str(numpy.dtype(
                            image_group_meta.dtype)).encode('utf-8'))
                    if datasetInfo.subvolume_roi is not None:
                        infoGroup.create_dataset(
                            'subvolume_roi', data=datasetInfo.subvolume_roi)

        self._dirty = False
Пример #9
0
    def importStackAsLocalDataset(self, info, sequence_axis='t'):
        """
        Add the given stack data to the project file as a local dataset.
        Does not update the topLevelOperator.
        
        :param info: A DatasetInfo object.
                     Note: info.filePath must be a str which lists the stack files, delimited with os.path.pathsep
                     Note: info will be MODIFIED by this function.  Use the modified info when assigning it to a dataset.
        """
        self.progressSignal(0)

        projectFileHdf5 = self.topLevelOperator.ProjectFile.value

        globstring = info.filePath
        info.location = DatasetInfo.Location.ProjectInternal
        firstPathParts = PathComponents(
            info.filePath.split(os.path.pathsep)[0])
        info.filePath = firstPathParts.externalDirectory + '/??' + firstPathParts.extension
        info.fromstack = True

        # Use absolute path
        cwd = self.topLevelOperator.WorkingDirectory
        if os.path.pathsep not in globstring and not os.path.isabs(globstring):
            globstring = os.path.normpath(os.path.join(cwd, globstring))

        if firstPathParts.extension.lower() in OpTiffReader.TIFF_EXTS:
            # Special loader for TIFFs
            opLoader = OpTiffSequenceReader(
                parent=self.topLevelOperator.parent)
            opLoader.SequenceAxis.setValue(sequence_axis)
            opLoader.GlobString.setValue(globstring)
            data_slot = opLoader.Output
        elif firstPathParts.extension.lower() in (
                OpStreamingH5N5SequenceReaderM.H5EXTS +
                OpStreamingH5N5SequenceReaderM.N5EXTS):
            # Now use the .checkGlobString method of the stack readers
            isSingleFile = True
            try:
                OpStreamingH5N5SequenceReaderS.checkGlobString(globstring)
            except (OpStreamingH5N5SequenceReaderS.NoInternalPlaceholderError,
                    OpStreamingH5N5SequenceReaderS.NotTheSameFileError,
                    OpStreamingH5N5SequenceReaderS.ExternalPlaceholderError):
                isSingleFile = False

            isMultiFile = True
            try:
                OpStreamingH5N5SequenceReaderM.checkGlobString(globstring)
            except (OpStreamingH5N5SequenceReaderM.NoExternalPlaceholderError,
                    OpStreamingH5N5SequenceReaderM.SameFileError,
                    OpStreamingH5N5SequenceReaderM.InternalPlaceholderError):
                isMultiFile = False

            assert (not (isMultiFile and isSingleFile)), (
                "Something is wrong, glob string shouldn't allow both")
            assert (isMultiFile or isSingleFile), (
                "Glob string doesn't conform to h5 stack glob string rules")

            if isSingleFile:
                opLoader = OpStreamingH5N5SequenceReaderS(
                    parent=self.topLevelOperator.parent)
            elif isMultiFile:
                opLoader = OpStreamingH5N5SequenceReaderM(
                    parent=self.topLevelOperator.parent)

            opLoader.SequenceAxis.setValue(sequence_axis)
            opLoader.GlobString.setValue(globstring)
            data_slot = opLoader.OutputImage
        else:
            # All other sequences (e.g. pngs, jpegs, etc.)
            opLoader = OpStackLoader(parent=self.topLevelOperator.parent)
            opLoader.SequenceAxis.setValue(sequence_axis)
            opLoader.globstring.setValue(globstring)
            data_slot = opLoader.stack

        try:
            opWriter = OpH5N5WriterBigDataset(
                parent=self.topLevelOperator.parent)
            opWriter.h5N5File.setValue(projectFileHdf5)
            opWriter.h5N5Path.setValue(self.topGroupName + '/local_data/' +
                                       info.datasetId)
            opWriter.CompressionEnabled.setValue(False)
            # We assume that the main bottleneck is the hard disk,
            #  so adding lots of threads to access it at once seems like a bad idea.
            opWriter.BatchSize.setValue(1)
            opWriter.Image.connect(data_slot)

            # Forward progress from the writer directly to our applet
            opWriter.progressSignal.subscribe(self.progressSignal)

            success = opWriter.WriteImage.value
        finally:
            opWriter.cleanUp()
            opLoader.cleanUp()
            self.progressSignal(100)

        return success