def test_expandGlobStrings(self): expected_datasets = ['g1/g2/data2', 'g1/g2/data3'] with tempdir() as d: file_name = '{}/test.h5'.format(d) try: f = h5py.File(file_name, mode='w') g1 = f.create_group('g1') g2 = g1.create_group('g2') g3 = f.create_group('g3') g1.create_dataset('data1', data=numpy.ones((10, 10))) g2.create_dataset('data2', data=numpy.ones((10, 10))) g2.create_dataset('data3', data=numpy.ones((10, 10))) g3.create_dataset('data4', data=numpy.ones((10, 10))) f.flush() glob_res1 = OpStreamingHdf5SequenceReaderS.expandGlobStrings( f, '{}/g1/g2/data*'.format(file_name)) self.assertEqual(glob_res1, expected_datasets) finally: f.close() glob_res2 = OpStreamingHdf5SequenceReaderS.expandGlobStrings( file_name, '{}/g1/g2/data*'.format(file_name)) self.assertEqual(glob_res2, expected_datasets)
def test_globStringValidity(self): """Check whether globStrings are correctly verified""" testGlobString = '/tmp/test.h5' with self.assertRaises( OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError): OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString) testGlobString = '/tmp/test.h5/a' + os.pathsep + '/tmp/test2.h5/a' with self.assertRaises( OpStreamingHdf5SequenceReaderS.NotTheSameFileError): OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString) testGlobString = '/tmp/test*.h5/a' + os.pathsep + '/tmp/test*.h5/a' with self.assertRaises( OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError): OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString) testGlobString = '/tmp/test.jpg/*' with self.assertRaises( OpStreamingHdf5SequenceReaderS.WrongFileTypeError): OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString) validGlobStrings = [ '/tmp/test.h5/*', '/tmp/test.h5/data1' + os.pathsep + '/tmp/test.h5/data2', '/tmp/test.h5/data*' ] # Implicit test for validity; test fails if an exception is raised for testGlobString in validGlobStrings: OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString) self.assertTrue(True)
def _applyPattern(self): globStrings = encode_from_qstring(self.patternEdit.text()) H5EXTS = OpStreamingHdf5SequenceReaderM.H5EXTS filenames = [] # see if some glob strings include HDF5 files globStrings = globStrings.split(os.path.pathsep) pcs = [PathComponents(x) for x in globStrings] ish5 = [x.extension in H5EXTS for x in pcs] h5GlobStrings = os.path.pathsep.join( [x for x, y in zip(globStrings, ish5) if y is True]) globStrings = os.path.pathsep.join( [x for x, y in zip(globStrings, ish5) if y is False]) filenames.extend(OpStackLoader.expandGlobStrings(globStrings)) try: OpStreamingHdf5SequenceReaderS.checkGlobString(h5GlobStrings) # OK, if nothing raised there is a single h5 file in h5GlobStrings: pathComponents = PathComponents( h5GlobStrings.split(os.path.pathsep)[0]) h5file = h5py.File(pathComponents.externalPath, mode='r') filenames.extend( "{}/{}".format(pathComponents.externalPath, internal) for internal in OpStreamingHdf5SequenceReaderS. expandGlobStrings(h5file, h5GlobStrings)) except (OpStreamingHdf5SequenceReaderS.WrongFileTypeError, OpStreamingHdf5SequenceReaderS.NotTheSameFileError, OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError, OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError): pass try: OpStreamingHdf5SequenceReaderM.checkGlobString(h5GlobStrings) filenames.extend( "{}/{}".format(external, internal) for external, internal in zip( *OpStreamingHdf5SequenceReaderM.expandGlobStrings( h5GlobStrings))) except (OpStreamingHdf5SequenceReaderM.WrongFileTypeError, OpStreamingHdf5SequenceReaderM.SameFileError, OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError, OpStreamingHdf5SequenceReaderM.InternalPlaceholderError): pass self._updateFileList(filenames)
def _applyPattern(self): globStrings = self.patternEdit.text() H5EXTS = OpStreamingHdf5SequenceReaderM.H5EXTS filenames = [] # see if some glob strings include HDF5 files globStrings = globStrings.split(os.path.pathsep) pcs = [PathComponents(x) for x in globStrings] ish5 = [x.extension in H5EXTS for x in pcs] h5GlobStrings = os.path.pathsep.join([x for x, y in zip(globStrings, ish5) if y is True]) globStrings = os.path.pathsep.join([x for x, y in zip(globStrings, ish5) if y is False]) filenames.extend(OpStackLoader.expandGlobStrings(globStrings)) try: OpStreamingHdf5SequenceReaderS.checkGlobString(h5GlobStrings) # OK, if nothing raised there is a single h5 file in h5GlobStrings: pathComponents = PathComponents(h5GlobStrings.split(os.path.pathsep)[0]) h5file = h5py.File(pathComponents.externalPath, mode='r') filenames.extend( "{}/{}".format(pathComponents.externalPath, internal) for internal in OpStreamingHdf5SequenceReaderS.expandGlobStrings(h5file, h5GlobStrings)) except ( OpStreamingHdf5SequenceReaderS.WrongFileTypeError, OpStreamingHdf5SequenceReaderS.NotTheSameFileError, OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError, OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError): pass try: OpStreamingHdf5SequenceReaderM.checkGlobString(h5GlobStrings) filenames.extend( "{}/{}".format(external, internal) for external, internal in zip(*OpStreamingHdf5SequenceReaderM.expandGlobStrings(h5GlobStrings)) ) except ( OpStreamingHdf5SequenceReaderM.WrongFileTypeError, OpStreamingHdf5SequenceReaderM.SameFileError, OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError, OpStreamingHdf5SequenceReaderM.InternalPlaceholderError): pass self._updateFileList(filenames)
def _attemptOpenAsHdf5Stack(self, filePath): if not ('*' in filePath or os.path.pathsep in filePath): return ([], None) # Now use the .checkGlobString method of the stack readers isSingleFile = True try: OpStreamingHdf5SequenceReaderS.checkGlobString(filePath) except OpStreamingHdf5SequenceReaderS.WrongFileTypeError: return ([], None) except (OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError, OpStreamingHdf5SequenceReaderS.NotTheSameFileError, OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError): isSingleFile = False isMultiFile = True try: OpStreamingHdf5SequenceReaderM.checkGlobString(filePath) except (OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError, OpStreamingHdf5SequenceReaderM.SameFileError, OpStreamingHdf5SequenceReaderM.InternalPlaceholderError): isMultiFile = False assert (not (isMultiFile and isSingleFile)) if isSingleFile is True: opReader = OpStreamingHdf5SequenceReaderS(parent=self) elif isMultiFile is True: opReader = OpStreamingHdf5SequenceReaderM(parent=self) try: opReader.SequenceAxis.connect(self.SequenceAxis) opReader.GlobString.setValue(filePath) return ([opReader], opReader.OutputImage) except (OpStreamingHdf5SequenceReaderM.WrongFileTypeError, OpStreamingHdf5SequenceReaderS.WrongFileTypeError): return ([], None) else: return ([], None)
def test_2d_vigra_along_t(self): """Test if 2d files generated through vigra are recognized correctly""" # Prepare some data set for this case data = numpy.random.randint(0, 255, (20, 100, 200, 3)).astype(numpy.uint8) axistags = vigra.defaultAxistags('yxc') expected_axistags = vigra.defaultAxistags('tyxc') op = OpStreamingHdf5SequenceReaderS(graph=self.graph) with tempdir() as d: try: testDataFileName = '{}/test.h5'.format(d) # Write the dataset to an hdf5 file # (Note: Don't use vigra to do this, which may reorder the axes) h5File = h5py.File(testDataFileName) try: h5File.create_group('volumes') internalPathString = "subvolume-{sliceIndex:02d}" for sliceIndex, zSlice in enumerate(data): subpath = internalPathString.format( sliceIndex=sliceIndex) h5File['volumes'].create_dataset(subpath, data=zSlice) # Write the axistags attribute current_path = 'volumes/{}'.format(subpath) h5File[current_path].attrs[ 'axistags'] = axistags.toJSON() finally: h5File.close() # Read the data with an operator hdf5GlobString = "{}/volumes/subvolume-*".format( testDataFileName) op.SequenceAxis.setValue('t') op.GlobString.setValue(hdf5GlobString) assert op.OutputImage.ready() assert op.OutputImage.meta.axistags == expected_axistags assert (op.OutputImage[5:10, 50:100, 100:150].wait() == data[5:10, 50:100, 100:150]).all() finally: op.cleanUp()
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None, sequence_axis=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. sequence_axis: Axis along which to stack (only applicable for stacks). """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location # The original path to the data (also used as a fallback if the data isn't in the project yet) self._filePath = "" self._datasetId = "" # The name of the data within the project file (if it is stored locally) # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.allowLabels = True self.drange = None self.normalizeDisplay = True self.sequenceAxis = None self.fromstack = False self.nickname = "" self.axistags = None self.original_axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # overwrite internalPaths, will be assembled further down glob_string = "{}{}".format( externalPaths[0], internalPaths[0]) internalPaths = \ OpStreamingHdf5SequenceReaderS.expandGlobStrings( externalPaths[0], glob_string) if internalPaths: file_list = [externalPaths[0] ] * len(internalPaths) else: file_list = None else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening") file_list = [ex for ex in externalPaths] assert all( pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions") # The following is necessary for h5 as well as npz-files internalPathExts = (OpInputDataReader.h5Exts + OpInputDataReader.npzExts) internalPathExts = [ ".{}".format(ipx) for ipx in internalPathExts ] if pathComponents[ 0].extension in internalPathExts and internalPaths: if len(file_list) == len(internalPaths): # assuming a matching internal paths to external paths file_list_with_internal = [] for external, internal in zip( file_list, internalPaths): if internal: file_list_with_internal.append( '{}/{}'.format(external, internal)) else: file_list_with_internal.append(external) file_list = file_list_with_internal else: # sort of fallback, in case of a mismatch in lengths for i in range(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = [make_absolute(f, cwd) for f in file_list] if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join(file_list) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1]) nickname = PathComponents(prefix).filenameBase + ( "_" * num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack self.sequenceAxis = sequence_axis if jsonNamespace is not None: self.updateFromJson(jsonNamespace)
def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None): """ filepath: may be a globstring or a full hdf5 path+dataset jsonNamespace: If provided, overrides default settings after filepath is applied cwd: The working directory for interpeting relative paths. If not provided, os.getcwd() is used. preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided. In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray. """ assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array" cwd = cwd or os.getcwd() self.preloaded_array = preloaded_array # See description above. Location = DatasetInfo.Location self._filePath = "" # The original path to the data (also used as a fallback if the data isn't in the project yet) self._datasetId = "" # The name of the data within the project file (if it is stored locally) self.allowLabels = True # OBSOLETE: Whether or not this dataset should be used for training a classifier. self.drange = None self.normalizeDisplay = True self.fromstack = False self.nickname = "" self.axistags = None self.subvolume_roi = None self.location = Location.FileSystem self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask. if self.preloaded_array is not None: self.filePath = "" # set property to ensure unique _datasetId self.location = Location.PreloadedArray self.fromstack = False self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name ) if hasattr(self.preloaded_array, 'axistags'): self.axistags = self.preloaded_array.axistags # Set defaults for location, nickname, filepath, and fromstack if filepath: # Check for sequences (either globstring or separated paths), file_list = None # To support h5 sequences, filepath may contain external and # internal path components if not isUrl(filepath): file_list = filepath.split(os.path.pathsep) pathComponents = [PathComponents(x) for x in file_list] externalPaths = [pc.externalPath for pc in pathComponents] internalPaths = [pc.internalPath for pc in pathComponents] if len(file_list) > 0: if len(externalPaths) == 1: if '*' in externalPaths[0]: if internalPaths[0] is not None: assert ('*' not in internalPaths[0]), ( "Only internal OR external glob placeholder supported" ) file_list = sorted(glob.glob(filepath)) else: file_list = [externalPaths[0]] if internalPaths[0] is not None: if '*' in internalPaths[0]: # overwrite internalPaths, will be assembled further down glob_string = "{}{}".format(externalPaths[0], internalPaths[0]) internalPaths = \ OpStreamingHdf5SequenceReaderS.expandGlobStrings( externalPaths[0], glob_string) if internalPaths: file_list = [externalPaths[0]] * len(internalPaths) else: file_list = None else: assert (not any('*' in ep for ep in externalPaths)), ( "Multiple glob paths shouldn't be happening" ) file_list = [ex for ex in externalPaths] assert all(pc.extension == pathComponents[0].extension for pc in pathComponents[1::]), ( "Supplied multiple files with multiple extensions" ) # The following is necessary for h5 as well as npz-files internalPathExts = ( OpInputDataReader.h5Exts + OpInputDataReader.npzExts ) internalPathExts = [".{}".format(ipx) for ipx in internalPathExts] if pathComponents[0].extension in internalPathExts and internalPaths: if len(file_list) == len(internalPaths): # assuming a matching internal paths to external paths file_list_with_internal = [] for external, internal in zip(file_list, internalPaths): if internal: file_list_with_internal.append('{}/{}'.format(external, internal)) else: file_list_with_internal.append(external) file_list = file_list_with_internal else: # sort of fallback, in case of a mismatch in lengths for i in range(len(file_list)): file_list[i] += '/' + internalPaths[0] # For stacks, choose nickname based on a common prefix if file_list: fromstack = True # Convert all paths to absolute file_list = [make_absolute(f, cwd) for f in file_list] if '*' in filepath: filepath = make_absolute(filepath, cwd) else: filepath = os.path.pathsep.join( file_list ) # Add an underscore for each wildcard digit prefix = os.path.commonprefix(file_list) num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] ) nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards) else: fromstack = False if not isUrl(filepath): # Convert all (non-url) paths to absolute filepath = make_absolute(filepath, cwd) nickname = PathComponents(filepath).filenameBase self.location = DatasetInfo.Location.FileSystem self.nickname = nickname self.filePath = filepath self.fromstack = fromstack if jsonNamespace is not None: self.updateFromJson( jsonNamespace )