def setUp(self):
        """
        Create a blockwise fileset to test with.
        """
        if platform.system() == 'Windows':
            # On windows, there are errors, and we make no attempt to solve them (at the moment).
            raise nose.SkipTest

        try:
            BlockwiseFileset._prepare_system()
        except ValueError:
            # If the system isn't configured to allow lots of open files, we can't run this test.
            raise nose.SkipTest

        testConfig = \
        """
        {
            "_schema_name" : "blockwise-fileset-description",
            "_schema_version" : 1.0,

            "name" : "synapse_small",
            "format" : "hdf5",
            "axes" : "txyzc",
            "shape" : [1,400,400,100,1],
            "dtype" : "numpy.uint8",
            "block_shape" : [1, 50, 50, 50, 100],
            "block_file_name_format" : "cube{roiString}.h5/volume/data"
        }
        """
        self.tempDir = tempfile.mkdtemp()
        self.configpath = os.path.join(self.tempDir, "config.json")

        logger.debug("Loading config file...")
        with open(self.configpath, 'w') as f:
            f.write(testConfig)

        logger.debug("Creating random test data...")
        bfs = BlockwiseFileset(self.configpath, 'a')
        dataShape = tuple(bfs.description.shape)
        self.data = numpy.random.randint(255,
                                         size=dataShape).astype(numpy.uint8)

        logger.debug("Writing test data...")
        datasetRoi = ([0, 0, 0, 0, 0], dataShape)
        bfs.writeData(datasetRoi, self.data)
        block_starts = getIntersectingBlocks(bfs.description.block_shape,
                                             datasetRoi)
        for block_start in block_starts:
            bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE)
        bfs.close()
    def setUp(self):
        """
        Create a blockwise fileset to test with.
        """
        if platform.system() == 'Windows':
            # On windows, there are errors, and we make no attempt to solve them (at the moment).
            raise nose.SkipTest
        
        try:
            BlockwiseFileset._prepare_system()
        except ValueError:
            # If the system isn't configured to allow lots of open files, we can't run this test.
            raise nose.SkipTest
        
        testConfig = \
        """
        {
            "_schema_name" : "blockwise-fileset-description",
            "_schema_version" : 1.0,

            "name" : "synapse_small",
            "format" : "hdf5",
            "axes" : "txyzc",
            "shape" : [1,400,400,100,1],
            "dtype" : "numpy.uint8",
            "block_shape" : [1, 50, 50, 50, 100],
            "block_file_name_format" : "cube{roiString}.h5/volume/data"
        }
        """
        self.tempDir = tempfile.mkdtemp()
        self.configpath = os.path.join(self.tempDir, "config.json")

        logger.debug( "Loading config file..." )
        with open(self.configpath, 'w') as f:
            f.write(testConfig)
        
        logger.debug( "Creating random test data..." )
        bfs = BlockwiseFileset( self.configpath, 'a' )
        dataShape = tuple(bfs.description.shape)
        self.data = numpy.random.randint( 255, size=dataShape ).astype(numpy.uint8)
        
        logger.debug( "Writing test data..." )
        datasetRoi = ([0,0,0,0,0], dataShape)
        bfs.writeData( datasetRoi, self.data )
        block_starts = getIntersectingBlocks(bfs.description.block_shape, datasetRoi)
        for block_start in block_starts:
            bfs.setBlockStatus(block_start, BlockwiseFileset.BLOCK_AVAILABLE)
        bfs.close()
示例#3
0
    def _prepareDestination(self):
        """
        - If the result file doesn't exist yet, create it (and the dataset)
        - If the result file already exists, return a list of the rois that
        are NOT needed (their data already exists in the final output)
        """
        originalDescription = BlockwiseFileset.readDescription(self.OutputDatasetDescription.value)
        datasetDescription = copy.deepcopy(originalDescription)

        # Modify description fields as needed
        # -- axes
        datasetDescription.axes = "".join(list(self.Input.meta.getTaggedShape().keys()))
        assert set(originalDescription.axes) == set(datasetDescription.axes), (
            "Can't prepare destination dataset: original dataset description listed "
            "axes as {}, but actual output axes are {}".format(originalDescription.axes, datasetDescription.axes)
        )

        # -- shape
        datasetDescription.view_shape = list(self.Input.meta.shape)
        # -- block_shape
        assert originalDescription.block_shape is not None
        originalBlockDims = collections.OrderedDict(
            list(zip(originalDescription.axes, originalDescription.block_shape))
        )
        datasetDescription.block_shape = [originalBlockDims[a] for a in datasetDescription.axes]
        datasetDescription.block_shape = list(
            map(min, list(zip(datasetDescription.block_shape, self.Input.meta.shape)))
        )
        # -- chunks
        if originalDescription.chunks is not None:
            originalChunkDims = collections.OrderedDict(list(zip(originalDescription.axes, originalDescription.chunks)))
            datasetDescription.chunks = [originalChunkDims[a] for a in datasetDescription.axes]
            datasetDescription.chunks = list(map(min, list(zip(datasetDescription.chunks, self.Input.meta.shape))))
        # -- dtype
        if datasetDescription.dtype != self.Input.meta.dtype:
            dtype = self.Input.meta.dtype
            if type(dtype) is numpy.dtype:
                dtype = dtype.type
            datasetDescription.dtype = dtype().__class__.__name__

        # Create a unique hash for this blocking scheme.
        # If it changes, we can't use any previous data.
        sha = hashlib.sha1()
        sha.update(str(tuple(datasetDescription.block_shape)))
        sha.update(datasetDescription.axes)
        sha.update(datasetDescription.block_file_name_format)

        datasetDescription.hash_id = sha.hexdigest()

        if datasetDescription != originalDescription:
            descriptionFilePath = self.OutputDatasetDescription.value
            logger.info("Overwriting dataset description: {}".format(descriptionFilePath))
            BlockwiseFileset.writeDescription(descriptionFilePath, datasetDescription)
            with open(descriptionFilePath, "r") as f:
                logger.info(f.read())

        # Now open the dataset
        blockwiseFileset = BlockwiseFileset(self.OutputDatasetDescription.value)

        taskInfos = self._prepareTaskInfos(blockwiseFileset.getAllBlockRois())

        if blockwiseFileset.description.hash_id != originalDescription.hash_id:
            # Something about our blocking scheme changed.
            # Make sure all blocks are marked as NOT available.
            # (Just in case some were left over from a previous run.)
            for roi in list(taskInfos.keys()):
                blockwiseFileset.setBlockStatus(roi[0], BlockwiseFileset.BLOCK_NOT_AVAILABLE)

        return blockwiseFileset, taskInfos
示例#4
0
    def _prepareDestination(self):
        """
        - If the result file doesn't exist yet, create it (and the dataset)
        - If the result file already exists, return a list of the rois that 
        are NOT needed (their data already exists in the final output)
        """
        originalDescription = BlockwiseFileset.readDescription(self.OutputDatasetDescription.value)
        datasetDescription = copy.deepcopy(originalDescription)

        # Modify description fields as needed
        # -- axes
        datasetDescription.axes = "".join( list(self.Input.meta.getTaggedShape().keys()) )
        assert set(originalDescription.axes) == set( datasetDescription.axes ), \
            "Can't prepare destination dataset: original dataset description listed " \
            "axes as {}, but actual output axes are {}".format( originalDescription.axes, datasetDescription.axes )

        # -- shape
        datasetDescription.view_shape = list(self.Input.meta.shape)
        # -- block_shape
        assert originalDescription.block_shape is not None
        originalBlockDims = collections.OrderedDict( list(zip( originalDescription.axes, originalDescription.block_shape )) )
        datasetDescription.block_shape = [originalBlockDims[a] for a in datasetDescription.axes]
        datasetDescription.block_shape = list(map( min, list(zip( datasetDescription.block_shape, self.Input.meta.shape )) ))
        # -- chunks
        if originalDescription.chunks is not None:
            originalChunkDims = collections.OrderedDict( list(zip( originalDescription.axes, originalDescription.chunks )) )
            datasetDescription.chunks = [originalChunkDims[a] for a in datasetDescription.axes]
            datasetDescription.chunks = list(map( min, list(zip( datasetDescription.chunks, self.Input.meta.shape )) ))
        # -- dtype
        if datasetDescription.dtype != self.Input.meta.dtype:
            dtype = self.Input.meta.dtype
            if type(dtype) is numpy.dtype:
                dtype = dtype.type
            datasetDescription.dtype = dtype().__class__.__name__

        # Create a unique hash for this blocking scheme.
        # If it changes, we can't use any previous data.
        sha = hashlib.sha1()
        sha.update( str( tuple( datasetDescription.block_shape) ) )
        sha.update( datasetDescription.axes )
        sha.update( datasetDescription.block_file_name_format )

        datasetDescription.hash_id = sha.hexdigest()

        if datasetDescription != originalDescription:
            descriptionFilePath = self.OutputDatasetDescription.value
            logger.info( "Overwriting dataset description: {}".format( descriptionFilePath ) )
            BlockwiseFileset.writeDescription(descriptionFilePath, datasetDescription)
            with open( descriptionFilePath, 'r' ) as f:
                logger.info( f.read() )

        # Now open the dataset
        blockwiseFileset = BlockwiseFileset( self.OutputDatasetDescription.value )
        
        taskInfos = self._prepareTaskInfos( blockwiseFileset.getAllBlockRois() )
        
        if blockwiseFileset.description.hash_id != originalDescription.hash_id:
            # Something about our blocking scheme changed.
            # Make sure all blocks are marked as NOT available.
            # (Just in case some were left over from a previous run.)
            for roi in list(taskInfos.keys()):
                blockwiseFileset.setBlockStatus( roi[0], BlockwiseFileset.BLOCK_NOT_AVAILABLE )

        return blockwiseFileset, taskInfos