Exemplo n.º 1
0
 def _to_input_list(v):
     # TODO - move this fcn definition??
     if isinstance(v, list):
         return v
     else:
         # value is a string glob
         name, serverfilter = unifiedIO.split_cluster_url(v)
         return clusterIO.cglob(name, serverfilter)
Exemplo n.º 2
0
    def __init__(self,
                 storage_directory,
                 pyramid_tile_size=256,
                 mdh=None,
                 n_tiles_x=0,
                 n_tiles_y=0,
                 depth=0,
                 x0=0,
                 y0=0,
                 pixel_size=1,
                 backend=PZFTileIO):

        if isinstance(storage_directory, tempfile.TemporaryDirectory):
            # If the storage directory is a temporary directory, keep a reference and cleanup the directory when we delete the pyramid
            # used to support transitory pyramids.
            self._temp_directory = storage_directory
            storage_directory = storage_directory.name

        if unifiedIO.is_cluster_uri(storage_directory):
            assert (backend == ClusterPZFTileIO)

            storage_directory, _ = unifiedIO.split_cluster_url(
                storage_directory)

        self.base_dir = storage_directory
        self.tile_size = pyramid_tile_size

        self.pyramid_valid = False

        self._mdh = NestedClassMDHandler(mdh)
        self._mdh['Pyramid.TileSize'] = self.tile_size

        self.n_tiles_x = n_tiles_x
        self.n_tiles_y = n_tiles_y
        self.depth = depth

        self.x0 = x0
        self.y0 = y0
        self.pixel_size = pixel_size
        # TODO - should we be re-assigning these on load, not just when we create a new pyramid?
        self._mdh['Pyramid.x0'] = x0
        self._mdh['Pyramid.y0'] = y0
        self._mdh['Pyramid.PixelSize'] = pixel_size

        if (not os.path.exists(self.base_dir)) and (not backend
                                                    == ClusterPZFTileIO):
            os.makedirs(self.base_dir)

        #self._tilecache = TileCache()

        if backend is None:
            backend = infer_tileio_backend(self.base_dir)

        self._imgs = backend(base_dir=self.base_dir, suff='img')
        self._acc = backend(base_dir=self.base_dir, suff='acc')
        self._occ = backend(base_dir=self.base_dir, suff='occ')
Exemplo n.º 3
0
    def loadInput(self, filename, key='input'):
        """
        Load input data from a file and inject into namespace
        """
        from PYME.IO import unifiedIO
        import os

        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.hdf']:
            import tables
            from PYME.IO import h5rFile
            try:
                with unifiedIO.local_or_temp_filename(
                        filename) as fn, h5rFile.openH5R(
                            fn, mode='r')._h5file as h5f:
                    self._inject_tables_from_hdf5(key, h5f, fn, extension)
            except tables.exceptions.HDF5ExtError:  # access issue likely due to multiple processes
                if unifiedIO.is_cluster_uri(filename):
                    # try again, this time forcing access through the dataserver
                    # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't
                    # as this still opens / copies the file independently, albeit in the same process as is doing the writing.
                    # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact
                    # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open
                    # scenario would match what has been observed with old style spooling analysis where copying a file
                    # prior to opening in VisGUI would work more reliably than opening directly. This retains, however,
                    # an inherent race condition so we risk replacing a predictable failure with a less frequent one.
                    # TODO - consider whether h5r_part might be a better choice.
                    # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would
                    # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(),
                    # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit.
                    from PYME.IO import clusterIO
                    relative_filename, server_filter = unifiedIO.split_cluster_url(
                        filename)
                    file_as_bytes = clusterIO.get_file(
                        relative_filename,
                        serverfilter=server_filter,
                        local_short_circuit=False)
                    with tables.open_file('in-memory.h5',
                                          driver='H5FD_CORE',
                                          driver_core_image=file_as_bytes,
                                          driver_core_backing_store=0) as h5f:
                        self._inject_tables_from_hdf5(key, h5f, filename,
                                                      extension)
                else:
                    #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user.
                    raise

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
Exemplo n.º 4
0
def local_or_named_temp_filename(url):
    """ riff of PYME.IO.clusterIO.local_or_temp_filename, but one which returns
    a filename with a matching file stub to the original rather than a random
    temporary filename

    Parameters
    ----------
    url : str
        local path or pyme-cluster url

    Yields
    -------
    str
        path to a (temporary) file so `url` can be loaded using modules which
        expect a local filename
    """
    from PYME.IO.FileUtils import nameUtils
    from PYME.IO import unifiedIO
    import tempfile

    filename = nameUtils.getFullExistingFilename(url)

    if os.path.exists(filename):
        yield filename
    elif unifiedIO.is_cluster_uri(url):
        from PYME.IO import clusterIO

        name, clusterfilter = unifiedIO.split_cluster_url(filename)

        localpath = clusterIO.get_local_path(name, clusterfilter)
        if localpath:
            yield localpath
        else:
            ext = os.path.splitext(name)[-1]

            with tempfile.TemporaryDirectory() as temp_dir:
                with open(os.path.join(temp_dir,
                                       os.path.split(name)[-1]), 'wb') as f:
                    s = clusterIO.get_file(name, clusterfilter)
                    f.write(s)
                    f.flush()
                    yield f.name

    else:
        raise IOError('Path "%s" could not be found' % url)