Exemplo n.º 1
0
    def __init__(self, h5fFile, tablename='FitResults'):
        """ Data source for use with h5r files as saved by the PYME analysis
        component. Takes either an open h5r file or a string filename to be
        opened."""
        from PYME.IO import h5rFile
        self.tablename = tablename

        if isinstance(h5fFile, tables.file.File):
            try:
                self.fitResults = getattr(h5fFile.root, tablename)[:]
            except (AttributeError, tables.NoSuchNodeError):
                logger.exception('Was expecting to find a "%s" table' %
                                 tablename)
                raise

            #allow access using unnested original names
            self._keys = unNestNames(
                getattr(h5fFile.root, tablename).description._v_nested_names)

        else:
            if isinstance(h5fFile, h5rFile.H5RFile):
                h5f = h5fFile
            else:
                h5f = h5rFile.openH5R(h5fFile)

            with h5f:
                self.fitResults = h5f.getTableData(tablename, slice(None))
                if (len(self.fitResults) == 0):
                    raise RuntimeError('Was expecting to find a "%s" table' %
                                       tablename)

                #allow access using unnested original names
                self._keys = unNestNames(
                    getattr(h5f._h5file.root,
                            tablename).description._v_nested_names)
Exemplo n.º 2
0
    def to_hdf(self, filename, tablename='Data', keys=None, metadata=None,
               keep_alive_timeout=0):
        """
        Writes data to a table in an HDF5 file
        
        Parameters
        ----------
        
        filename: string
            the name of the file to save to
        tablename: string [optional]
            the name of the table within the file to save to. Defaults to "Data"
        keys: list [optional]
            a list of column names to save (if keys == None, all columns are saved)
        metadata: a MetaDataHandler instance [optional]
            associated metadata to write to the file
        keep_alive_timeout: float
            a timeout in seconds. If non-zero, the file is held open after we have finished writing to it until the
            timeout elapses. Useful as a performance optimisation when making multiple writes to a single file,
            potentially across multiple threads. NOTE: the keep_alive_timeout is not garuanteed to be observed - it
            gets set by the first open call of a given session, so if the file is already open due to a previous openH5R
            call, the timeout requested by that call will be used.
            
        """
        from PYME.IO import h5rFile

        with h5rFile.openH5R(filename, 'a', keep_alive_timeout=keep_alive_timeout) as f:
            f.appendToTable(tablename, self.to_recarray(keys))

            if metadata is not None:
                f.updateMetadata(metadata)
                
            #wait until data is written
            f.flush()
Exemplo n.º 3
0
    def to_hdf(self, filename, tablename='Data', keys=None, metadata=None):
        from PYME.IO import h5rFile

        with h5rFile.openH5R(filename, 'a') as f:
            f.appendToTable(tablename, self.to_recarray(keys))

            if metadata is not None:
                f.updateMetadata(metadata)
Exemplo n.º 4
0
    def loadInput(self, filename, key='input'):
        """
        Load input data from a file and inject into namespace
        """
        from PYME.IO import unifiedIO
        import os

        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.hdf']:
            import tables
            from PYME.IO import h5rFile
            try:
                with unifiedIO.local_or_temp_filename(
                        filename) as fn, h5rFile.openH5R(
                            fn, mode='r')._h5file as h5f:
                    self._inject_tables_from_hdf5(key, h5f, fn, extension)
            except tables.exceptions.HDF5ExtError:  # access issue likely due to multiple processes
                if unifiedIO.is_cluster_uri(filename):
                    # try again, this time forcing access through the dataserver
                    # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't
                    # as this still opens / copies the file independently, albeit in the same process as is doing the writing.
                    # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact
                    # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open
                    # scenario would match what has been observed with old style spooling analysis where copying a file
                    # prior to opening in VisGUI would work more reliably than opening directly. This retains, however,
                    # an inherent race condition so we risk replacing a predictable failure with a less frequent one.
                    # TODO - consider whether h5r_part might be a better choice.
                    # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would
                    # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(),
                    # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit.
                    from PYME.IO import clusterIO
                    relative_filename, server_filter = unifiedIO.split_cluster_url(
                        filename)
                    file_as_bytes = clusterIO.get_file(
                        relative_filename,
                        serverfilter=server_filter,
                        local_short_circuit=False)
                    with tables.open_file('in-memory.h5',
                                          driver='H5FD_CORE',
                                          driver_core_image=file_as_bytes,
                                          driver_core_backing_store=0) as h5f:
                        self._inject_tables_from_hdf5(key, h5f, filename,
                                                      extension)
                else:
                    #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user.
                    raise

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
    def _aggregate_h5r(self, path, data):
        """
        Support for results aggregation into an HDF5 file, using pytables.
        We treat any path components after the .h5r as locations within the file (ie table names).
        e.g. /path/to/data.h5r/<tablename>
        A few special cases / Table names are accommodated:

        MetaData: assumes we have sent PYME metadata in json format and saves to the file using the appropriate metadatahandler
        No table name: assumes we have a fitResults object (as returned by remFitBuf and saves to the the appropriate tables (as HDF task queue would)
        """
        import numpy as np
        from io import BytesIO
        from six.moves import cPickle
        from PYME.IO import MetaDataHandler
        from PYME.IO import h5rFile

        path = self.translate_path(path.lstrip('/')[len('__aggregate_h5r'):])
        filename, tablename = path.split('.h5r')
        filename += '.h5r'

        #logging.debug('opening h5r file')
        with h5rFile.openH5R(filename, 'a') as h5f:
            if tablename == '/MetaData':
                mdh_in = MetaDataHandler.CachingMDHandler(json.loads(data))
                h5f.updateMetadata(mdh_in)
            elif tablename == '':
                #legacy fitResults structure
                fitResults = cPickle.loads(data)
                h5f.fileFitResult(fitResults)
            else:
                try:
                    #try to read data as if it was numpy binary formatted
                    data = np.load(BytesIO(data))
                except IOError:
                    #it's not numpy formatted - try json
                    import pandas as pd
                    #FIXME!! - this will work, but will likely be really slow!
                    data = pd.read_json(data).to_records(False)

                #logging.debug('adding data to table')
                h5f.appendToTable(tablename.lstrip('/'), data)
                #logging.debug('added data to table')

        #logging.debug('left h5r file')
        return ResponseOK()
Exemplo n.º 6
0
    def getQueueData(self, fieldName, *args):
        """Get data, defined by fieldName and potntially additional arguments,  ascociated with queue"""
        if fieldName == 'FitResults':
            startingAt, = args
            #with self.fileResultsLock.rlock:
            #    if self.h5ResultsFile.__contains__('/FitResults'):
            #        res = self.h5ResultsFile.root.FitResults[startingAt:]
            #    else:
            #        res = []
            with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
                res = h5f.getTableData('FitResults', slice(startingAt, None))

            return res
        elif fieldName == 'PSF':
            #from PYME.ParallelTasks.relativeFiles import getFullExistingFilename
            from PYME.IO.load_psf import load_psf
            res = None

            modName = self.metaData.getEntry('PSFFile')
            # mf = open(getFullExistingFilename(modName), 'rb')
            #res = np.load(mf)
            #mf.close()
            res = load_psf(getFullExistingFilename(modName))

            return res
        elif fieldName == 'MAP':
            mapName, = args
            #from PYME.ParallelTasks.relativeFiles import getFullExistingFilename
            from PYME.IO.image import ImageStack

            print('Serving map: %s' % mapName)
            fn = getFullExistingFilename(mapName)
            varmap = ImageStack(
                filename=fn, haveGUI=False).data[:, :, 0].squeeze(
                )  #this should handle .tif, .h5, and a few others

            return varmap
        else:
            return None
Exemplo n.º 7
0
    def fileResults(self, ress):
        """
        File/save the results of fitting multiple frames

        Args:
            ress: list of fit results

        Returns:

        """

        with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
            for res in ress:
                if res is None:
                    logging.warn('got a none result')
                else:
                    if (len(res.results) > 0):
                        h5f.appendToTable('FitResults', res.results)

                    if (len(res.driftResults) > 0):
                        h5f.appendToTable('DriftResults', res.driftResults)

        self.numClosedTasks += len(ress)
Exemplo n.º 8
0
    def _aggregate_h5r(self):
        """
        Support for results aggregation into an HDF5 file, using pytables.
        We treat any path components after the .h5r as locations within the file (ie table names).
        e.g. /path/to/data.h5r/<tablename>
        A few special cases / Table names are accommodated:

        MetaData: assumes we have sent PYME metadata in json format and saves to the file using the appropriate metadatahandler
        No table name: assumes we have a fitResults object (as returned by remFitBuf and saves to the the appropriate tables (as HDF task queue would)
        """
        import numpy as np
        from io import BytesIO
        from six.moves import cPickle
        from PYME.IO import MetaDataHandler
        from PYME.IO import h5rFile

        # path = self.translate_path(self.path.lstrip('/')[len('__aggregate_h5r'):])
        # filename, tablename = path.split('.h5r')
        # filename += '.h5r'

        filename, tablename = self.path.lstrip(
            '/')[len('__aggregate_h5r'):].split('.h5r')
        filename = self.translate_path(filename + '.h5r')

        data = self._get_data()

        dirname = os.path.dirname(filename)
        #if not os.path.exists(dirname):
        #    os.makedirs(dirname)
        makedirs_safe(dirname)

        #logging.debug('opening h5r file')
        with h5rFile.openH5R(filename, 'a') as h5f:
            if tablename == '/MetaData':
                mdh_in = MetaDataHandler.CachingMDHandler(json.loads(data))
                h5f.updateMetadata(mdh_in)
            elif tablename == '':
                #legacy fitResults structure
                fitResults = cPickle.loads(data)
                h5f.fileFitResult(fitResults)
            else:
                try:
                    #pickle is much faster than numpy array format (despite the array format being simpler)
                    #reluctanltly use pickles
                    data = np.loads(data)
                except cPickle.UnpicklingError:
                    try:
                        #try to read data as if it was numpy binary formatted
                        data = np.load(BytesIO(data))
                    except IOError:
                        #it's not numpy formatted - try json
                        import pandas as pd
                        #FIXME!! - this will work, but will likely be really slow!
                        data = pd.read_json(data).to_records(False)

                #logging.debug('adding data to table')
                h5f.appendToTable(tablename.lstrip('/'), data)
                #logging.debug('added data to table')

        #logging.debug('left h5r file')
        if USE_DIR_CACHE:
            cl.dir_cache.update_cache(filename, int(len(data)))

        self.send_response(200)
        self.send_header("Content-Length", "0")
        self.end_headers()
        return
Exemplo n.º 9
0
    def get_tabular_part(self, path):
        """

        Parameters
        ----------
        path: str
            OS-translated path to an hdf or h5r file on the dataserver computer. 
            Append the part of the file to read after the file extension, e.g. 
            .h5r/Events. Return format (for arrays) can additionally be 
            specified, as can slices
            using the following syntax: test.h5r/FitResults.json?from=0&to=100. 
            Supported array formats include json and npy.

        Returns
        -------
        f: BytesIO
            Requested part of the file encoded as bytes

        """
        from PYME.IO import h5rFile, clusterResults

        # parse path
        ext = '.h5r' if '.h5r' in path else '.hdf'
        # TODO - should we just use the the untranslated path?
        filename, details = path.split(ext + os.sep)
        filename = filename + ext  # path to file on dataserver disk
        query = urlparse.urlparse(details).query
        details = details.strip('?' + query)
        if '.' in details:
            part, return_type = details.split('.')
        else:
            part, return_type = details, ''

        try:
            with h5rFile.openH5R(filename) as h5f:
                if part == 'Metadata':
                    wire_data, output_format = clusterResults.format_results(
                        h5f.mdh, return_type)
                else:
                    # figure out if we have any slicing to do
                    query = urlparse.parse_qs(query)
                    start = int(query.get('from', [0])[0])
                    end = None if 'to' not in query.keys() else int(
                        query['to'][0])
                    wire_data, output_format = clusterResults.format_results(
                        h5f.getTableData(part, slice(start, end)),
                        '.' + return_type)

            f, length = self._string_to_file(wire_data)
            self.send_response(200)
            self.send_header(
                "Content-Type",
                output_format if output_format else 'application/octet-stream')
            self.send_header("Content-Length", length)
            #self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
            self.end_headers()
            return f

        except IOError:
            self.send_error(404,
                            "File not found - %s, [%s]" % (self.path, path))
Exemplo n.º 10
0
    def getNumQueueEvents(self):
        with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
            res = len(h5f.events)

        return res
Exemplo n.º 11
0
 def addQueueEvents(self, events):
     with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
         h5f.addEvents(events)
Exemplo n.º 12
0
    def setQueueMetaDataEntries(self, mdh):
        with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
            h5f.updateMetadata(mdh)

        self.metaData.update(mdh)
Exemplo n.º 13
0
 def flushMetaData(self):
     if len(self.MDHCache) > 0:
         new_md = dict(self.MDHCache)
         self.MDHCache = []
         with h5rFile.openH5R(self.resultsFilename, 'a') as h5f:
             h5f.updateMetadata(new_md)