def test_Writer(self): # Create the h5 file hdf5File = h5py.File(self.testDataH5FileName) n5File = z5py.N5File(self.testDataN5FileName) opPiper = OpArrayPiper(graph=self.graph) opPiper.Input.setValue(self.testData) # Force extra metadata onto the output opPiper.Output.meta.ideal_blockshape = (1, 1, 0, 0, 1) # Pretend the RAM usage will be really high to force lots of tiny blocks opPiper.Output.meta.ram_usage_per_requested_pixel = 1000000.0 h5_opWriter = OpH5N5WriterBigDataset(graph=self.graph) n5_opWriter = OpH5N5WriterBigDataset(graph=self.graph) # This checks that you can give a preexisting group as the file h5_g = hdf5File.create_group("volume") n5_g = n5File.create_group("volume") h5_opWriter.h5N5File.setValue(h5_g) n5_opWriter.h5N5File.setValue(n5_g) h5_opWriter.h5N5Path.setValue("data") n5_opWriter.h5N5Path.setValue("data") h5_opWriter.Image.connect(opPiper.Output) n5_opWriter.Image.connect(opPiper.Output) # Force the operator to execute by asking for the output (a bool) h5_success = h5_opWriter.WriteImage.value n5_success = n5_opWriter.WriteImage.value assert h5_success assert n5_success hdf5File.close() n5File.close() # Check the file. hdf5File = h5py.File(self.testDataH5FileName, "r") n5File = h5py.File(self.testDataH5FileName, "r") h5_dataset = hdf5File[self.datasetInternalPath] n5_dataset = n5File[self.datasetInternalPath] assert h5_dataset.shape == self.dataShape assert n5_dataset.shape == self.dataShape assert (numpy.all( h5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all() assert (numpy.all( n5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all() hdf5File.close() n5File.close()
def _export_h5n5(self, compress=False): self.progressSignal(0) # Create and open the hdf5/n5 file export_components = PathComponents(self.ExportPath.value) try: with OpStreamingH5N5Reader.get_h5_n5_file( export_components.externalPath, mode="a") as h5N5File: # Create a temporary operator to do the work for us opH5N5Writer = OpH5N5WriterBigDataset(parent=self) with contextlib.suppress(KeyError): del h5N5File[export_components.internalPath] try: opH5N5Writer.CompressionEnabled.setValue(compress) opH5N5Writer.h5N5File.setValue(h5N5File) opH5N5Writer.h5N5Path.setValue( export_components.internalPath) opH5N5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5N5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5N5Writer.WriteImage[:].wait() finally: opH5N5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n".format( export_components.externalPath, str(ex)) sys.stderr.write(msg) raise
def test_Writer(self): # Create the h5 file hdf5File = h5py.File(self.testDataH5FileName) n5File = z5py.N5File(self.testDataN5FileName) opPiper = OpArrayPiper(graph=self.graph) opPiper.Input.setValue(self.testData) h5_opWriter = OpH5N5WriterBigDataset(graph=self.graph) n5_opWriter = OpH5N5WriterBigDataset(graph=self.graph) # This checks that you can give a preexisting group as the file h5_g = hdf5File.create_group("volume") n5_g = n5File.create_group("volume") h5_opWriter.h5N5File.setValue(h5_g) n5_opWriter.h5N5File.setValue(n5_g) h5_opWriter.h5N5Path.setValue("data") n5_opWriter.h5N5Path.setValue("data") h5_opWriter.Image.connect(opPiper.Output) n5_opWriter.Image.connect(opPiper.Output) # Force the operator to execute by asking for the output (a bool) h5_success = h5_opWriter.WriteImage.value n5_success = n5_opWriter.WriteImage.value assert h5_success assert n5_success hdf5File.close() n5File.close() # Check the file. hdf5File = h5py.File(self.testDataH5FileName, "r") n5File = h5py.File(self.testDataH5FileName, "r") h5_dataset = hdf5File[self.datasetInternalPath] n5_dataset = n5File[self.datasetInternalPath] assert h5_dataset.shape == self.dataShape assert n5_dataset.shape == self.dataShape assert (numpy.all( h5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all() assert (numpy.all( n5_dataset[...] == self.testData.view(numpy.ndarray)[...])).all() hdf5File.close() n5File.close()
def _export_h5n5(self, compress=False): self.progressSignal(0) # Create and open the hdf5/n5 file export_components = PathComponents(self.ExportPath.value) try: if os.path.isdir(export_components.externalPath ): # externalPath leads to a n5 file shutil.rmtree(export_components.externalPath ) # n5 is stored as a directory structure else: os.remove(export_components.externalPath) except OSError as ex: # It's okay if the file isn't there. if ex.errno != 2: raise try: with OpStreamingH5N5Reader.get_h5_n5_file( export_components.externalPath, "w") as h5N5File: # Create a temporary operator to do the work for us opH5N5Writer = OpH5N5WriterBigDataset(parent=self) try: opH5N5Writer.CompressionEnabled.setValue(compress) opH5N5Writer.h5N5File.setValue(h5N5File) opH5N5Writer.h5N5Path.setValue( export_components.internalPath) opH5N5Writer.Image.connect(self.Input) # The H5 Writer provides it's own progress signal, so just connect ours to it. opH5N5Writer.progressSignal.subscribe(self.progressSignal) # Perform the export and block for it in THIS THREAD. opH5N5Writer.WriteImage[:].wait() finally: opH5N5Writer.cleanUp() self.progressSignal(100) except IOError as ex: import sys msg = "\nException raised when attempting to export to {}: {}\n".format( export_components.externalPath, str(ex)) sys.stderr.write(msg) raise
def dumpToHdf5( self, h5_file: h5py.File, inner_path: str, progress_signal: Callable[[int], None] = lambda x: None ) -> str: progress_signal(0) try: h5_file.require_group(Path("/").joinpath(inner_path).parent.as_posix()) graph = Graph() op_writer = OpH5N5WriterBigDataset( graph=graph, h5N5File=h5_file, h5N5Path=inner_path, CompressionEnabled=False, BatchSize=1, Image=self.get_provider_slot(graph=graph), ) op_writer.progressSignal.subscribe(progress_signal) success = op_writer.WriteImage.value # reading this slot triggers the write finally: progress_signal(100)
def importStackAsLocalDataset(self, abs_paths: List[str], sequence_axis: str = "z", progress_signal: Callable[[int], None] = None): progress_signal = progress_signal or self.progressSignal progress_signal(0) op_reader = None op_writer = None try: colon_paths = os.path.pathsep.join(abs_paths) op_reader = OpInputDataReader(graph=self.topLevelOperator.graph, FilePath=colon_paths, SequenceAxis=sequence_axis) axistags = op_reader.Output.meta.axistags inner_path = self.local_data_path.joinpath( DatasetInfo.generate_id()).as_posix() project_file = self.topLevelOperator.ProjectFile.value op_writer = OpH5N5WriterBigDataset( graph=self.topLevelOperator.graph, h5N5File=project_file, h5N5Path=inner_path, CompressionEnabled=False, BatchSize=1, Image=op_reader.Output, ) op_writer.progressSignal.subscribe(progress_signal) success = op_writer.WriteImage.value for index, tag in enumerate(axistags): project_file[inner_path].dims[index].label = tag.key project_file[inner_path].attrs["axistags"] = axistags.toJSON() if op_reader.Output.meta.get("drange"): project_file[inner_path].attrs[ "drange"] = op_reader.Output.meta.get("drange") return inner_path finally: if op_writer: op_writer.Image.disconnect() if op_reader: op_reader.cleanUp() progress_signal(100)
def _serialize(self, group, name, slot): """Called when the currently stored predictions are dirty. If prediction storage is currently enabled, store them to the file. Otherwise, just delete them/ (Avoid inconsistent project states, e.g. don't allow old predictions to be stored with a new classifier.) """ predictionDir = group.create_group(self.name) # Disconnect the operators that might be using the old data. self.deserialize(group) failedToSave = False opWriter = None try: num = len(slot) if num > 0: increment = 100 / float(num) progress = 0 for imageIndex in range(num): # Have we been cancelled? if not self.predictionStorageEnabled: break datasetName = self.subname.format(imageIndex) # Use a big dataset writer to do this in chunks opWriter = OpH5N5WriterBigDataset(graph=self.operator.graph, parent = self.operator.parent) opWriter.h5N5File.setValue(predictionDir) opWriter.h5N5Path.setValue(datasetName) opWriter.Image.connect(slot[imageIndex]) def handleProgress(percent): # Stop sending progress if we were cancelled if self.predictionStorageEnabled: curprogress = progress + percent * (increment / 100.0) self.progressSignal(curprogress) opWriter.progressSignal.subscribe(handleProgress) # Create the request self._predictionStorageRequest = opWriter.WriteImage[...] # Must use a threading event here because if we wait on the # request from within a "real" thread, it refuses to be cancelled. finishedEvent = threading.Event() def handleFinish(result): finishedEvent.set() def handleCancel(): logger.info("Full volume prediction save CANCELLED.") self._predictionStorageRequest = None finishedEvent.set() # Trigger the write and wait for it to complete or cancel. self._predictionStorageRequest.notify_finished(handleFinish) self._predictionStorageRequest.notify_cancelled(handleCancel) self._predictionStorageRequest.submit() # Can't call wait(). See note above. finishedEvent.wait() progress += increment opWriter.cleanUp() opWriter = None except: failedToSave = True raise finally: if opWriter is not None: opWriter.cleanUp() # If we were cancelled, delete the predictions we just started if not self.predictionStorageEnabled or failedToSave: deleteIfPresent(group, name)
def _serializeToHdf5(self, topGroup, hdf5File, projectFilePath): # Write any missing local datasets to the local_data group localDataGroup = getOrCreateGroup(topGroup, 'local_data') wroteInternalData = False for laneIndex, multislot in enumerate( self.topLevelOperator.DatasetGroup): for roleIndex, slot in enumerate(multislot): if not slot.ready(): continue info = slot.value # If this dataset should be stored in the project, but it isn't there yet if info.location == DatasetInfo.Location.ProjectInternal \ and info.datasetId not in list(localDataGroup.keys()): # Obtain the data from the corresponding output and store it to the project. dataSlot = self.topLevelOperator._NonTransposedImageGroup[ laneIndex][roleIndex] try: opWriter = OpH5N5WriterBigDataset( parent=self.topLevelOperator.parent, graph=self.topLevelOperator.graph) # Compression slows down browsing a lot, and raw data tends # to be noisy and doesn't compress very well, anyway. opWriter.CompressionEnabled.setValue(False) opWriter.h5N5File.setValue(localDataGroup) opWriter.h5N5Path.setValue(info.datasetId) opWriter.Image.connect(dataSlot) # Trigger the copy success = opWriter.WriteImage.value assert success finally: opWriter.cleanUp() # Add axistags and drange attributes, in case someone uses this dataset outside ilastik localDataGroup[info.datasetId].attrs[ 'axistags'] = dataSlot.meta.axistags.toJSON().encode( 'utf-8') if dataSlot.meta.drange is not None: localDataGroup[info.datasetId].attrs[ 'drange'] = dataSlot.meta.drange # Make sure the dataSlot's axistags are updated with the dataset as we just wrote it # (The top-level operator may use an OpReorderAxes, which changed the axisorder) info.axistags = dataSlot.meta.axistags wroteInternalData = True # Construct a list of all the local dataset ids we want to keep localDatasetIds = set() for laneIndex, multislot in enumerate( self.topLevelOperator.DatasetGroup): for roleIndex, slot in enumerate(multislot): if slot.ready( ) and slot.value.location == DatasetInfo.Location.ProjectInternal: localDatasetIds.add(slot.value.datasetId) # Delete any datasets in the project that aren't needed any more for datasetName in list(localDataGroup.keys()): if datasetName not in localDatasetIds: del localDataGroup[datasetName] if wroteInternalData: # We can only re-configure the operator if we're not saving a snapshot # We know we're saving a snapshot if the project file isn't the one we deserialized with. if self._projectFilePath is None or self._projectFilePath == projectFilePath: # Force the operator to setupOutputs() again so it gets data from the project, not external files firstInfo = self.topLevelOperator.DatasetGroup[0][0].value self.topLevelOperator.DatasetGroup[0][0].setValue( firstInfo, check_changed=False) deleteIfPresent(topGroup, 'Role Names') role_names = [ name.encode('utf-8') for name in self.topLevelOperator.DatasetRoles.value ] topGroup.create_dataset('Role Names', data=role_names) # Access the info group infoDir = getOrCreateGroup(topGroup, 'infos') # Delete all infos for infoName in list(infoDir.keys()): del infoDir[infoName] # Rebuild the list of infos roleNames = self.topLevelOperator.DatasetRoles.value for laneIndex, multislot in enumerate( self.topLevelOperator.DatasetGroup): laneGroupName = 'lane{:04d}'.format(laneIndex) laneGroup = infoDir.create_group(laneGroupName) for roleIndex, slot in enumerate(multislot): infoGroup = laneGroup.create_group(roleNames[roleIndex]) if slot.ready(): datasetInfo = slot.value locationString = self.LocationStrings[datasetInfo.location] infoGroup.create_dataset( 'location', data=locationString.encode('utf-8')) infoGroup.create_dataset( 'filePath', data=datasetInfo.filePath.encode('utf-8')) infoGroup.create_dataset( 'datasetId', data=datasetInfo.datasetId.encode('utf-8')) infoGroup.create_dataset('allowLabels', data=datasetInfo.allowLabels) infoGroup.create_dataset( 'nickname', data=datasetInfo.nickname.encode('utf-8')) infoGroup.create_dataset('fromstack', data=datasetInfo.fromstack) infoGroup.create_dataset( 'display_mode', data=datasetInfo.display_mode.encode('utf-8')) if datasetInfo.drange is not None: infoGroup.create_dataset('drange', data=datasetInfo.drange) # Pull the axistags from the NonTransposedImage, # which is what the image looks like before 'forceAxisOrder' is applied, # and before 'c' is automatically appended image_group_meta = self.topLevelOperator._NonTransposedImageGroup[ laneIndex][roleIndex].meta axistags = image_group_meta.axistags infoGroup.create_dataset( 'axistags', data=axistags.toJSON().encode('utf-8')) axisorder = "".join(tag.key for tag in axistags).encode('utf-8') infoGroup.create_dataset('axisorder', data=axisorder) # serialize shape/dtype so that we could re-create the metadata # for the raw data in the headless mode -> no need for raw data in headless infoGroup.create_dataset('shape', data=image_group_meta.shape) infoGroup.create_dataset( 'dtype', data=str(numpy.dtype( image_group_meta.dtype)).encode('utf-8')) if datasetInfo.subvolume_roi is not None: infoGroup.create_dataset( 'subvolume_roi', data=datasetInfo.subvolume_roi) self._dirty = False
def importStackAsLocalDataset(self, info, sequence_axis='t'): """ Add the given stack data to the project file as a local dataset. Does not update the topLevelOperator. :param info: A DatasetInfo object. Note: info.filePath must be a str which lists the stack files, delimited with os.path.pathsep Note: info will be MODIFIED by this function. Use the modified info when assigning it to a dataset. """ self.progressSignal(0) projectFileHdf5 = self.topLevelOperator.ProjectFile.value globstring = info.filePath info.location = DatasetInfo.Location.ProjectInternal firstPathParts = PathComponents( info.filePath.split(os.path.pathsep)[0]) info.filePath = firstPathParts.externalDirectory + '/??' + firstPathParts.extension info.fromstack = True # Use absolute path cwd = self.topLevelOperator.WorkingDirectory if os.path.pathsep not in globstring and not os.path.isabs(globstring): globstring = os.path.normpath(os.path.join(cwd, globstring)) if firstPathParts.extension.lower() in OpTiffReader.TIFF_EXTS: # Special loader for TIFFs opLoader = OpTiffSequenceReader( parent=self.topLevelOperator.parent) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.GlobString.setValue(globstring) data_slot = opLoader.Output elif firstPathParts.extension.lower() in ( OpStreamingH5N5SequenceReaderM.H5EXTS + OpStreamingH5N5SequenceReaderM.N5EXTS): # Now use the .checkGlobString method of the stack readers isSingleFile = True try: OpStreamingH5N5SequenceReaderS.checkGlobString(globstring) except (OpStreamingH5N5SequenceReaderS.NoInternalPlaceholderError, OpStreamingH5N5SequenceReaderS.NotTheSameFileError, OpStreamingH5N5SequenceReaderS.ExternalPlaceholderError): isSingleFile = False isMultiFile = True try: OpStreamingH5N5SequenceReaderM.checkGlobString(globstring) except (OpStreamingH5N5SequenceReaderM.NoExternalPlaceholderError, OpStreamingH5N5SequenceReaderM.SameFileError, OpStreamingH5N5SequenceReaderM.InternalPlaceholderError): isMultiFile = False assert (not (isMultiFile and isSingleFile)), ( "Something is wrong, glob string shouldn't allow both") assert (isMultiFile or isSingleFile), ( "Glob string doesn't conform to h5 stack glob string rules") if isSingleFile: opLoader = OpStreamingH5N5SequenceReaderS( parent=self.topLevelOperator.parent) elif isMultiFile: opLoader = OpStreamingH5N5SequenceReaderM( parent=self.topLevelOperator.parent) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.GlobString.setValue(globstring) data_slot = opLoader.OutputImage else: # All other sequences (e.g. pngs, jpegs, etc.) opLoader = OpStackLoader(parent=self.topLevelOperator.parent) opLoader.SequenceAxis.setValue(sequence_axis) opLoader.globstring.setValue(globstring) data_slot = opLoader.stack try: opWriter = OpH5N5WriterBigDataset( parent=self.topLevelOperator.parent) opWriter.h5N5File.setValue(projectFileHdf5) opWriter.h5N5Path.setValue(self.topGroupName + '/local_data/' + info.datasetId) opWriter.CompressionEnabled.setValue(False) # We assume that the main bottleneck is the hard disk, # so adding lots of threads to access it at once seems like a bad idea. opWriter.BatchSize.setValue(1) opWriter.Image.connect(data_slot) # Forward progress from the writer directly to our applet opWriter.progressSignal.subscribe(self.progressSignal) success = opWriter.WriteImage.value finally: opWriter.cleanUp() opLoader.cleanUp() self.progressSignal(100) return success