def test_expandGlobStrings(self):
        expected_datasets = ['g1/g2/data2', 'g1/g2/data3']

        with tempdir() as d:
            file_name = '{}/test.h5'.format(d)
            try:
                f = h5py.File(file_name, mode='w')
                g1 = f.create_group('g1')
                g2 = g1.create_group('g2')
                g3 = f.create_group('g3')
                g1.create_dataset('data1', data=numpy.ones((10, 10)))
                g2.create_dataset('data2', data=numpy.ones((10, 10)))
                g2.create_dataset('data3', data=numpy.ones((10, 10)))
                g3.create_dataset('data4', data=numpy.ones((10, 10)))
                f.flush()

                glob_res1 = OpStreamingHdf5SequenceReaderS.expandGlobStrings(
                    f, '{}/g1/g2/data*'.format(file_name))
                self.assertEqual(glob_res1, expected_datasets)

            finally:
                f.close()

            glob_res2 = OpStreamingHdf5SequenceReaderS.expandGlobStrings(
                file_name, '{}/g1/g2/data*'.format(file_name))
            self.assertEqual(glob_res2, expected_datasets)
    def test_globStringValidity(self):
        """Check whether globStrings are correctly verified"""
        testGlobString = '/tmp/test.h5'
        with self.assertRaises(
                OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError):
            OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString)

        testGlobString = '/tmp/test.h5/a' + os.pathsep + '/tmp/test2.h5/a'
        with self.assertRaises(
                OpStreamingHdf5SequenceReaderS.NotTheSameFileError):
            OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString)

        testGlobString = '/tmp/test*.h5/a' + os.pathsep + '/tmp/test*.h5/a'
        with self.assertRaises(
                OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError):
            OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString)

        testGlobString = '/tmp/test.jpg/*'
        with self.assertRaises(
                OpStreamingHdf5SequenceReaderS.WrongFileTypeError):
            OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString)

        validGlobStrings = [
            '/tmp/test.h5/*',
            '/tmp/test.h5/data1' + os.pathsep + '/tmp/test.h5/data2',
            '/tmp/test.h5/data*'
        ]

        # Implicit test for validity; test fails if an exception is raised
        for testGlobString in validGlobStrings:
            OpStreamingHdf5SequenceReaderS.checkGlobString(testGlobString)

        self.assertTrue(True)
    def _applyPattern(self):
        globStrings = encode_from_qstring(self.patternEdit.text())
        H5EXTS = OpStreamingHdf5SequenceReaderM.H5EXTS
        filenames = []
        # see if some glob strings include HDF5 files
        globStrings = globStrings.split(os.path.pathsep)
        pcs = [PathComponents(x) for x in globStrings]
        ish5 = [x.extension in H5EXTS for x in pcs]

        h5GlobStrings = os.path.pathsep.join(
            [x for x, y in zip(globStrings, ish5) if y is True])
        globStrings = os.path.pathsep.join(
            [x for x, y in zip(globStrings, ish5) if y is False])

        filenames.extend(OpStackLoader.expandGlobStrings(globStrings))

        try:
            OpStreamingHdf5SequenceReaderS.checkGlobString(h5GlobStrings)
            # OK, if nothing raised there is a single h5 file in h5GlobStrings:
            pathComponents = PathComponents(
                h5GlobStrings.split(os.path.pathsep)[0])
            h5file = h5py.File(pathComponents.externalPath, mode='r')
            filenames.extend(
                "{}/{}".format(pathComponents.externalPath, internal)
                for internal in OpStreamingHdf5SequenceReaderS.
                expandGlobStrings(h5file, h5GlobStrings))
        except (OpStreamingHdf5SequenceReaderS.WrongFileTypeError,
                OpStreamingHdf5SequenceReaderS.NotTheSameFileError,
                OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError,
                OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError):
            pass

        try:
            OpStreamingHdf5SequenceReaderM.checkGlobString(h5GlobStrings)
            filenames.extend(
                "{}/{}".format(external, internal)
                for external, internal in zip(
                    *OpStreamingHdf5SequenceReaderM.expandGlobStrings(
                        h5GlobStrings)))
        except (OpStreamingHdf5SequenceReaderM.WrongFileTypeError,
                OpStreamingHdf5SequenceReaderM.SameFileError,
                OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError,
                OpStreamingHdf5SequenceReaderM.InternalPlaceholderError):
            pass

        self._updateFileList(filenames)
    def _applyPattern(self):
        globStrings = self.patternEdit.text()
        H5EXTS = OpStreamingHdf5SequenceReaderM.H5EXTS
        filenames = []
        # see if some glob strings include HDF5 files
        globStrings = globStrings.split(os.path.pathsep)
        pcs = [PathComponents(x) for x in globStrings]
        ish5 = [x.extension in H5EXTS for x in pcs]

        h5GlobStrings = os.path.pathsep.join([x for x, y in zip(globStrings, ish5) if y is True])
        globStrings = os.path.pathsep.join([x for x, y in zip(globStrings, ish5) if y is False])

        filenames.extend(OpStackLoader.expandGlobStrings(globStrings))

        try:
            OpStreamingHdf5SequenceReaderS.checkGlobString(h5GlobStrings)
            # OK, if nothing raised there is a single h5 file in h5GlobStrings:
            pathComponents = PathComponents(h5GlobStrings.split(os.path.pathsep)[0])
            h5file = h5py.File(pathComponents.externalPath, mode='r')
            filenames.extend(
                "{}/{}".format(pathComponents.externalPath, internal)
                for internal in OpStreamingHdf5SequenceReaderS.expandGlobStrings(h5file, h5GlobStrings))
        except (
                OpStreamingHdf5SequenceReaderS.WrongFileTypeError,
                OpStreamingHdf5SequenceReaderS.NotTheSameFileError,
                OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError,
                OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError):
            pass

        try:
            OpStreamingHdf5SequenceReaderM.checkGlobString(h5GlobStrings)
            filenames.extend(
                "{}/{}".format(external, internal)
                for external, internal
                in zip(*OpStreamingHdf5SequenceReaderM.expandGlobStrings(h5GlobStrings))
            )
        except (
                OpStreamingHdf5SequenceReaderM.WrongFileTypeError,
                OpStreamingHdf5SequenceReaderM.SameFileError,
                OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError,
                OpStreamingHdf5SequenceReaderM.InternalPlaceholderError):
            pass

        self._updateFileList(filenames)
    def _attemptOpenAsHdf5Stack(self, filePath):
        if not ('*' in filePath or os.path.pathsep in filePath):
            return ([], None)

        # Now use the .checkGlobString method of the stack readers
        isSingleFile = True
        try:
            OpStreamingHdf5SequenceReaderS.checkGlobString(filePath)
        except OpStreamingHdf5SequenceReaderS.WrongFileTypeError:
            return ([], None)
        except (OpStreamingHdf5SequenceReaderS.NoInternalPlaceholderError,
                OpStreamingHdf5SequenceReaderS.NotTheSameFileError,
                OpStreamingHdf5SequenceReaderS.ExternalPlaceholderError):
            isSingleFile = False

        isMultiFile = True
        try:
            OpStreamingHdf5SequenceReaderM.checkGlobString(filePath)
        except (OpStreamingHdf5SequenceReaderM.NoExternalPlaceholderError,
                OpStreamingHdf5SequenceReaderM.SameFileError,
                OpStreamingHdf5SequenceReaderM.InternalPlaceholderError):
            isMultiFile = False

        assert (not (isMultiFile and isSingleFile))

        if isSingleFile is True:
            opReader = OpStreamingHdf5SequenceReaderS(parent=self)
        elif isMultiFile is True:
            opReader = OpStreamingHdf5SequenceReaderM(parent=self)

        try:
            opReader.SequenceAxis.connect(self.SequenceAxis)
            opReader.GlobString.setValue(filePath)
            return ([opReader], opReader.OutputImage)
        except (OpStreamingHdf5SequenceReaderM.WrongFileTypeError,
                OpStreamingHdf5SequenceReaderS.WrongFileTypeError):
            return ([], None)
        else:
            return ([], None)
    def test_2d_vigra_along_t(self):
        """Test if 2d files generated through vigra are recognized correctly"""
        # Prepare some data set for this case
        data = numpy.random.randint(0, 255,
                                    (20, 100, 200, 3)).astype(numpy.uint8)
        axistags = vigra.defaultAxistags('yxc')

        expected_axistags = vigra.defaultAxistags('tyxc')

        op = OpStreamingHdf5SequenceReaderS(graph=self.graph)

        with tempdir() as d:
            try:
                testDataFileName = '{}/test.h5'.format(d)
                # Write the dataset to an hdf5 file
                # (Note: Don't use vigra to do this, which may reorder the axes)
                h5File = h5py.File(testDataFileName)
                try:
                    h5File.create_group('volumes')

                    internalPathString = "subvolume-{sliceIndex:02d}"
                    for sliceIndex, zSlice in enumerate(data):
                        subpath = internalPathString.format(
                            sliceIndex=sliceIndex)
                        h5File['volumes'].create_dataset(subpath, data=zSlice)
                        # Write the axistags attribute
                        current_path = 'volumes/{}'.format(subpath)
                        h5File[current_path].attrs[
                            'axistags'] = axistags.toJSON()
                finally:
                    h5File.close()

                # Read the data with an operator
                hdf5GlobString = "{}/volumes/subvolume-*".format(
                    testDataFileName)
                op.SequenceAxis.setValue('t')
                op.GlobString.setValue(hdf5GlobString)

                assert op.OutputImage.ready()
                assert op.OutputImage.meta.axistags == expected_axistags
                assert (op.OutputImage[5:10, 50:100,
                                       100:150].wait() == data[5:10, 50:100,
                                                               100:150]).all()
            finally:
                op.cleanUp()
示例#7
0
    def __init__(self,
                 filepath=None,
                 jsonNamespace=None,
                 cwd=None,
                 preloaded_array=None,
                 sequence_axis=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset

        jsonNamespace: If provided, overrides default settings after filepath is applied

        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.

        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged
                         vigra.VigraArray.

        sequence_axis: Axis along which to stack (only applicable for stacks).
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array  # See description above.
        Location = DatasetInfo.Location
        # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._filePath = ""
        self._datasetId = ""  # The name of the data within the project file (if it is stored locally)
        # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.allowLabels = True
        self.drange = None
        self.normalizeDisplay = True
        self.sequenceAxis = None
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.original_axistags = None
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default'  # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = ""  # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.nickname = "preloaded-{}-array".format(
                self.preloaded_array.dtype.name)
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None

            # To support h5 sequences, filepath may contain external and
            # internal path components
            if not isUrl(filepath):
                file_list = filepath.split(os.path.pathsep)

                pathComponents = [PathComponents(x) for x in file_list]
                externalPaths = [pc.externalPath for pc in pathComponents]
                internalPaths = [pc.internalPath for pc in pathComponents]

                if len(file_list) > 0:
                    if len(externalPaths) == 1:
                        if '*' in externalPaths[0]:
                            if internalPaths[0] is not None:
                                assert ('*' not in internalPaths[0]), (
                                    "Only internal OR external glob placeholder supported"
                                )
                            file_list = sorted(glob.glob(filepath))
                        else:
                            file_list = [externalPaths[0]]
                            if internalPaths[0] is not None:
                                if '*' in internalPaths[0]:
                                    # overwrite internalPaths, will be assembled further down
                                    glob_string = "{}{}".format(
                                        externalPaths[0], internalPaths[0])
                                    internalPaths = \
                                        OpStreamingHdf5SequenceReaderS.expandGlobStrings(
                                            externalPaths[0], glob_string)
                                    if internalPaths:
                                        file_list = [externalPaths[0]
                                                     ] * len(internalPaths)
                                    else:
                                        file_list = None

                    else:
                        assert (not any('*' in ep for ep in externalPaths)), (
                            "Multiple glob paths shouldn't be happening")
                        file_list = [ex for ex in externalPaths]

                    assert all(
                        pc.extension == pathComponents[0].extension
                        for pc in pathComponents[1::]), (
                            "Supplied multiple files with multiple extensions")
                    # The following is necessary for h5 as well as npz-files
                    internalPathExts = (OpInputDataReader.h5Exts +
                                        OpInputDataReader.npzExts)
                    internalPathExts = [
                        ".{}".format(ipx) for ipx in internalPathExts
                    ]

                    if pathComponents[
                            0].extension in internalPathExts and internalPaths:
                        if len(file_list) == len(internalPaths):
                            # assuming a matching internal paths to external paths
                            file_list_with_internal = []
                            for external, internal in zip(
                                    file_list, internalPaths):
                                if internal:
                                    file_list_with_internal.append(
                                        '{}/{}'.format(external, internal))
                                else:
                                    file_list_with_internal.append(external)
                            file_list = file_list_with_internal
                        else:
                            # sort of fallback, in case of a mismatch in lengths
                            for i in range(len(file_list)):
                                file_list[i] += '/' + internalPaths[0]

            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
                # Convert all paths to absolute
                file_list = [make_absolute(f, cwd) for f in file_list]
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join(file_list)

                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len(
                    os.path.splitext(file_list[-1])[1])
                nickname = PathComponents(prefix).filenameBase + (
                    "_" * num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack
            self.sequenceAxis = sequence_axis

        if jsonNamespace is not None:
            self.updateFromJson(jsonNamespace)
示例#8
0
    def __init__(self, filepath=None, jsonNamespace=None, cwd=None, preloaded_array=None):
        """
        filepath: may be a globstring or a full hdf5 path+dataset
        
        jsonNamespace: If provided, overrides default settings after filepath is applied
        
        cwd: The working directory for interpeting relative paths.  If not provided, os.getcwd() is used.
        
        preloaded_array: Instead of providing a filePath to read from, a pre-loaded array can be directly provided.
                         In that case, you'll probably want to configure the axistags member, or provide a tagged vigra.VigraArray.
        """
        assert preloaded_array is None or not filepath, "You can't provide filepath and a preloaded_array"
        cwd = cwd or os.getcwd()
        self.preloaded_array = preloaded_array # See description above.
        Location = DatasetInfo.Location
        self._filePath = ""                 # The original path to the data (also used as a fallback if the data isn't in the project yet)
        self._datasetId = ""                # The name of the data within the project file (if it is stored locally)
        self.allowLabels = True             # OBSOLETE: Whether or not this dataset should be used for training a classifier.
        self.drange = None
        self.normalizeDisplay = True
        self.fromstack = False
        self.nickname = ""
        self.axistags = None
        self.subvolume_roi = None
        self.location = Location.FileSystem
        self.display_mode = 'default' # choices: default, grayscale, rgba, random-colortable, binary-mask.

        if self.preloaded_array is not None:
            self.filePath = "" # set property to ensure unique _datasetId
            self.location = Location.PreloadedArray
            self.fromstack = False
            self.nickname = "preloaded-{}-array".format( self.preloaded_array.dtype.name )
            if hasattr(self.preloaded_array, 'axistags'):
                self.axistags = self.preloaded_array.axistags

        # Set defaults for location, nickname, filepath, and fromstack
        if filepath:
            # Check for sequences (either globstring or separated paths),
            file_list = None

            # To support h5 sequences, filepath may contain external and
            # internal path components
            if not isUrl(filepath):
                file_list = filepath.split(os.path.pathsep)

                pathComponents = [PathComponents(x) for x in file_list]
                externalPaths = [pc.externalPath for pc in pathComponents]
                internalPaths = [pc.internalPath for pc in pathComponents]

                if len(file_list) > 0:
                    if len(externalPaths) == 1:
                        if '*' in externalPaths[0]:
                            if internalPaths[0] is not None:
                                assert ('*' not in internalPaths[0]), (
                                    "Only internal OR external glob placeholder supported"
                                )
                            file_list = sorted(glob.glob(filepath))
                        else:
                            file_list = [externalPaths[0]]
                            if internalPaths[0] is not None:
                                if '*' in internalPaths[0]:
                                    # overwrite internalPaths, will be assembled further down
                                    glob_string = "{}{}".format(externalPaths[0], internalPaths[0])
                                    internalPaths = \
                                        OpStreamingHdf5SequenceReaderS.expandGlobStrings(
                                            externalPaths[0], glob_string)
                                    if internalPaths:
                                        file_list = [externalPaths[0]] * len(internalPaths)
                                    else:
                                        file_list = None

                    else:
                        assert (not any('*' in ep for ep in externalPaths)), (
                            "Multiple glob paths shouldn't be happening"
                        )
                        file_list = [ex for ex in externalPaths]

                    assert all(pc.extension == pathComponents[0].extension
                               for pc in pathComponents[1::]), (
                        "Supplied multiple files with multiple extensions"
                    )
                    # The following is necessary for h5 as well as npz-files
                    internalPathExts = (
                        OpInputDataReader.h5Exts +
                        OpInputDataReader.npzExts
                    )
                    internalPathExts = [".{}".format(ipx) for ipx in internalPathExts]

                    if pathComponents[0].extension in internalPathExts and internalPaths:
                        if len(file_list) == len(internalPaths):
                            # assuming a matching internal paths to external paths
                            file_list_with_internal = []
                            for external, internal in zip(file_list, internalPaths):
                                if internal:
                                    file_list_with_internal.append('{}/{}'.format(external, internal))
                                else:
                                    file_list_with_internal.append(external)
                            file_list = file_list_with_internal
                        else:
                            # sort of fallback, in case of a mismatch in lengths
                            for i in range(len(file_list)):
                                file_list[i] += '/' + internalPaths[0]

            # For stacks, choose nickname based on a common prefix
            if file_list:
                fromstack = True
                # Convert all paths to absolute 
                file_list = [make_absolute(f, cwd) for f in file_list]
                if '*' in filepath:
                    filepath = make_absolute(filepath, cwd)
                else:
                    filepath = os.path.pathsep.join( file_list )
    
                # Add an underscore for each wildcard digit
                prefix = os.path.commonprefix(file_list)
                num_wildcards = len(file_list[-1]) - len(prefix) - len( os.path.splitext(file_list[-1])[1] )
                nickname = PathComponents(prefix).filenameBase + ("_"*num_wildcards)
            else:
                fromstack = False
                if not isUrl(filepath):
                    # Convert all (non-url) paths to absolute 
                    filepath = make_absolute(filepath, cwd)
                nickname = PathComponents(filepath).filenameBase

            self.location = DatasetInfo.Location.FileSystem
            self.nickname = nickname
            self.filePath = filepath
            self.fromstack = fromstack

        if jsonNamespace is not None:
            self.updateFromJson( jsonNamespace )