def getSlice(self, ind):
        frameName = '%s/frame%05d.pzf' % (self.sequenceName, ind)
        sl = PZFFormat.loads(clusterIO.get_file(frameName,
                                                self.clusterfilter))[0]

        #print sl.shape, sl.dtype
        return sl.squeeze()
Пример #2
0
    def sendData(self, wfile, bpoint=0, epoint=0):
        """Send the file to the client. Literally."""
        data = clusterIO.get_file(self.fsname)
        f_size = len(data)
        #st = clusterIO.stat(self.fsname)
        #with open(self.fsname, 'rb') as f:
        f = BytesIO(data)  #TODO - does this make sense?
        writ = 0
        # for send Range xxx-xxx
        if bpoint > 0 and bpoint < f_size:
            f.seek(bpoint)

        if epoint > bpoint:
            if epoint <= f_size:
                rsize = epoint - bpoint + 1
            else:
                rsize = f_size - bpoint
        else:
            rsize = f_size

        while writ < rsize:
            if (rsize - writ) < 65536:
                buf = f.read(rsize)
            else:
                buf = f.read(65536)
            if not buf:
                break
            writ += len(buf)
            wfile.write(buf)
Пример #3
0
    def loadInput(self, filename, key='input'):
        """
        Load input data from a file and inject into namespace
        """
        from PYME.IO import unifiedIO
        import os

        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.hdf']:
            import tables
            from PYME.IO import h5rFile
            try:
                with unifiedIO.local_or_temp_filename(
                        filename) as fn, h5rFile.openH5R(
                            fn, mode='r')._h5file as h5f:
                    self._inject_tables_from_hdf5(key, h5f, fn, extension)
            except tables.exceptions.HDF5ExtError:  # access issue likely due to multiple processes
                if unifiedIO.is_cluster_uri(filename):
                    # try again, this time forcing access through the dataserver
                    # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't
                    # as this still opens / copies the file independently, albeit in the same process as is doing the writing.
                    # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact
                    # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open
                    # scenario would match what has been observed with old style spooling analysis where copying a file
                    # prior to opening in VisGUI would work more reliably than opening directly. This retains, however,
                    # an inherent race condition so we risk replacing a predictable failure with a less frequent one.
                    # TODO - consider whether h5r_part might be a better choice.
                    # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would
                    # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(),
                    # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit.
                    from PYME.IO import clusterIO
                    relative_filename, server_filter = unifiedIO.split_cluster_url(
                        filename)
                    file_as_bytes = clusterIO.get_file(
                        relative_filename,
                        serverfilter=server_filter,
                        local_short_circuit=False)
                    with tables.open_file('in-memory.h5',
                                          driver='H5FD_CORE',
                                          driver_core_image=file_as_bytes,
                                          driver_core_backing_store=0) as h5f:
                        self._inject_tables_from_hdf5(key, h5f, filename,
                                                      extension)
                else:
                    #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user.
                    raise

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
 def getEvents(self):
     from PYME.IO import events
     import json
     try:
         ev = json.loads(
             clusterIO.get_file(self.eventFileName,
                                self.clusterfilter,
                                timeout=10))
         return events.EventLogger.list_to_array(ev)
     except (IOError, ValueError):
         #our series might not have any events
         return []
def test_single_put():
    testdata = 'foo bar\n'
    t = time.time()
    clusterIO.put_file('_testing/test.txt', testdata, 'TEST')

    print('putting a small file took %3.5f s' % (time.time() - t))

    t = time.time()
    clusterIO.put_file('_testing/test1.txt', testdata, 'TEST')

    print('putting a second small file took %3.5f s' % (time.time() - t))

    t = time.time()
    retrieved = clusterIO.get_file('_testing/test.txt', 'TEST')

    print('retrieving a small file took %3.5f s' % (time.time() - t))
Пример #6
0
def local_or_named_temp_filename(url):
    """ riff of PYME.IO.clusterIO.local_or_temp_filename, but one which returns
    a filename with a matching file stub to the original rather than a random
    temporary filename

    Parameters
    ----------
    url : str
        local path or pyme-cluster url

    Yields
    -------
    str
        path to a (temporary) file so `url` can be loaded using modules which
        expect a local filename
    """
    from PYME.IO.FileUtils import nameUtils
    from PYME.IO import unifiedIO
    import tempfile

    filename = nameUtils.getFullExistingFilename(url)

    if os.path.exists(filename):
        yield filename
    elif unifiedIO.is_cluster_uri(url):
        from PYME.IO import clusterIO

        name, clusterfilter = unifiedIO.split_cluster_url(filename)

        localpath = clusterIO.get_local_path(name, clusterfilter)
        if localpath:
            yield localpath
        else:
            ext = os.path.splitext(name)[-1]

            with tempfile.TemporaryDirectory() as temp_dir:
                with open(os.path.join(temp_dir,
                                       os.path.split(name)[-1]), 'wb') as f:
                    s = clusterIO.get_file(name, clusterfilter)
                    f.write(s)
                    f.flush()
                    yield f.name

    else:
        raise IOError('Path "%s" could not be found' % url)
    def getEvents(self):
        import pandas as pd #defer pandas import for as long as possible
        try:
            #return json.loads(clusterIO.getFile(eventFileName, self.clusterfilter))
            ev = pd.read_json(clusterIO.get_file(self.eventFileName, self.clusterfilter))
            if len(ev) == 0:
                return []
            
            ev.columns = ['EventName', 'EventDescr', 'Time']

            evts = np.empty(len(ev), dtype=[('EventName', 'S32'), ('Time', 'f8'), ('EventDescr', 'S256')])
            evts['EventName'] = ev['EventName']
            evts['EventDescr'] = ev['EventDescr']
            evts['Time'] = ev['Time']
            return evts
        except (IOError, ValueError):
            #our series might not have any events
            return []
 def __init__(self, url, queue=None):
     self.seriesName = url
     #print url
     self.clusterfilter = url.split('://')[1].split('/')[0]
     #print self.clusterfilter
     self.sequenceName = url.split('://%s/' % self.clusterfilter)[1]
     #print self.sequenceName
     self.lastShapeTime = 0
     
     mdfn = '/'.join([self.sequenceName, 'metadata.json'])  
     
     #print mdfn
     
     self.mdh = MetaDataHandler.NestedClassMDHandler()
     self.mdh.update(json.loads(clusterIO.get_file(mdfn, self.clusterfilter)))
     
     self.fshape = None#(self.mdh['Camera.ROIWidth'],self.mdh['Camera.ROIHeight'])
     
     self._getNumFrames()
Пример #9
0
def file(request, filename):
    type = request.GET.get('type', 'raw')
    #print 'file'
    if type == 'raw':
        return HttpResponse(clusterIO.get_file(filename, use_file_cache=False),
                            content_type='')
    elif type in ['tiff', 'h5']:
        from PYME.IO import image
        import tempfile
        img = image.ImageStack(
            filename='pyme-cluster://%s/%s' %
            (clusterIO.local_serverfilter, filename.rstrip('/')),
            haveGUI=False)

        if type == 'tiff':
            ext = '.tif'
        else:
            ext = '.' + type

        fn = os.path.splitext(os.path.split(filename.rstrip('/'))[-1])[0] + ext

        #note we are being a bit tricky here to ensure our temporary file gets deleted when we are done
        # 1) We create the temporary file using the tempfile module. This gets automagically deleted when we close the
        #    file (at the end of the with block)
        # 2) We pass the filename of the temporary file to img.Save. This will mean that a second file object / file handle
        #    gets created, the contents get written, and the file gets closed
        #with tempfile.NamedTemporaryFile(mode='w+b', suffix=ext) as outf:

        # don't use a context manager as this closes our file prematurely - rely on a cascading close through HTTPResponse
        # and FileWrapper instead
        outf = tempfile.NamedTemporaryFile(mode='w+b', suffix=ext)
        img.Save(outf.name)

        #seek to update temporary file (so that it knows the new length)
        outf.seek(0)

        wrapper = FileWrapper(outf)
        response = StreamingHttpResponse(wrapper,
                                         content_type='image/%s' %
                                         ext.lstrip('.'))
        response['Content-Disposition'] = 'attachment; filename=%s' % fn
        response['Content-Length'] = os.path.getsize(outf.name)
        return response
    def __init__(self, url, queue=None):
        self.seriesName = url
        #print url
        self.clusterfilter = url.split('://')[1].split('/')[0]
        #print self.clusterfilter
        self.sequenceName = url.split('://%s/' % self.clusterfilter)[1]
        #print self.sequenceName
        self.lastShapeTime = 0

        mdfn = '/'.join([self.sequenceName, 'metadata.json'])

        #print mdfn

        self.mdh = MetaDataHandler.NestedClassMDHandler()
        self.mdh.update(
            json.loads(clusterIO.get_file(mdfn, self.clusterfilter)))

        self.fshape = None  #(self.mdh['Camera.ROIWidth'],self.mdh['Camera.ROIHeight'])

        self._getNumFrames()

        # if the series is complete when we start, we don't need to update the number of slices
        self._complete = clusterIO.exists(self.eventFileName,
                                          self.clusterfilter)
Пример #11
0
def test_put():
    testdata = b'foo bar\n'
    clusterIO.put_file('_testing/test.txt', testdata, 'TES1')
    retrieved = clusterIO.get_file('_testing/test.txt', 'TES1')
    
    assert testdata == retrieved
Пример #12
0
    def _load(self, filename):
        from PYME.IO import clusterIO, PZFFormat

        s = clusterIO.get_file(filename)
        return PZFFormat.loads(s)[0].squeeze()