Пример #1
0
    def fromArraysAsImages(self, arrays):
        """Create a Series object from a sequence of numpy ndarrays resident in memory on the driver.

        The arrays will be interpreted as though each represents a single time point - effectively the same
        as if converting Images to a Series, with each array representing a volume image at a particular
        point in time. Thus in the resulting Series, the value of the record with key (0,0,0) will be
        array([arrays[0][0,0,0], arrays[1][0,0,0],... arrays[n][0,0,0]).

        The dimensions of the resulting Series will be *opposite* that of the passed numpy array. Their dtype will not
        be changed.
        """
        # if passed a single array, cast it to a sequence of length 1
        if isinstance(arrays, ndarray):
            arrays = [arrays]

        # check that shapes of passed arrays are consistent
        shape = arrays[0].shape
        dtype = arrays[0].dtype
        for ary in arrays:
            if not ary.shape == shape:
                raise ValueError("Inconsistent array shapes: first array had shape %s, but other array has shape %s" %
                                 (str(shape), str(ary.shape)))
            if not ary.dtype == dtype:
                raise ValueError("Inconsistent array dtypes: first array had dtype %s, but other array has dtype %s" %
                                 (str(dtype), str(ary.dtype)))

        # get indices so that fastest index changes first
        shapeiters = (xrange(n) for n in shape)
        keys = [idx[::-1] for idx in itertools.product(*shapeiters)]

        values = vstack([ary.ravel() for ary in arrays]).T

        dims = Dimensions.fromTuple(shape[::-1])

        return Series(self.sc.parallelize(zip(keys, values), self.minPartitions), dims=dims, dtype=str(dtype))
Пример #2
0
    def fromArrays(self, arrays):
        """Create a Series object from a sequence of numpy ndarrays resident in memory on the driver.

        The arrays will be interpreted as though each represents a single time point - effectively the same
        as if converting Images to a Series, with each array representing a volume image at a particular
        point in time. Thus in the resulting Series, the value of the record with key (0,0,0) will be
        array([arrays[0][0,0,0], arrays[1][0,0,0],... arrays[n][0,0,0]).

        The dimensions of the resulting Series will be *opposite* that of the passed numpy array. Their dtype will not
        be changed.
        """
        # if passed a single array, cast it to a sequence of length 1
        if isinstance(arrays, ndarray):
            arrays = [arrays]

        # check that shapes of passed arrays are consistent
        shape = arrays[0].shape
        dtype = arrays[0].dtype
        for ary in arrays:
            if not ary.shape == shape:
                raise ValueError("Inconsistent array shapes: first array had shape %s, but other array has shape %s" %
                                 (str(shape), str(ary.shape)))
            if not ary.dtype == dtype:
                raise ValueError("Inconsistent array dtypes: first array had dtype %s, but other array has dtype %s" %
                                 (str(dtype), str(ary.dtype)))

        # get indices so that fastest index changes first
        shapeiters = (xrange(n) for n in shape)
        keys = [idx[::-1] for idx in itertools.product(*shapeiters)]

        values = vstack([ary.ravel() for ary in arrays]).T

        dims = Dimensions.fromTuple(shape[::-1])

        return Series(self.sc.parallelize(zip(keys, values), self.minPartitions), dims=dims, dtype=str(dtype))
Пример #3
0
    def fromMultipageTif(self, datapath, ext="tif", blockSize="150M",
                         newdtype='smallfloat', casting='safe',
                         startidx=None, stopidx=None):
        """Load a Series object from multipage tiff files.

        Parameters
        ----------

        datapath: string
            Path to data files or directory, specified as either a local filesystem path or in a URI-like format,
            including scheme. A datapath argument may include a single '*' wildcard character in the filename.

        ext: string, optional, default "tif"
            Extension required on data files to be loaded.

        blocksize: string formatted as e.g. "64M", "512k", "2G", or positive int. optional, default "150M"
            Requested size of Series partitions in bytes (or kilobytes, megabytes, gigabytes).

        newdtype: dtype or dtype specifier or string 'smallfloat' or None, optional, default 'smallfloat'
            Numpy dtype of output series data. Most methods expect Series data to be floating-point. Input data will be
            cast to the requested `newdtype` if not None - see Data `astype()` method.

        casting: 'no'|'equiv'|'safe'|'same_kind'|'unsafe', optional, default 'safe'
            Casting method to pass on to numpy's `astype()` method; see numpy documentation for details.

        startidx, stopidx: nonnegative int. optional.
            Indices of the first and last-plus-one data file to load, relative to the sorted filenames matching
            `datapath` and `ext`. Interpreted according to python slice indexing conventions.
        """
        seriesblocks, metadata = self._getSeriesBlocksFromMultiTif(datapath, ext=ext, blockSize=blockSize,
                                                                   newdtype=newdtype, casting=casting,
                                                                   startidx=startidx, stopidx=stopidx)
        dims, npointsinseries, datatype = metadata
        return Series(seriesblocks, dims=Dimensions.fromTuple(dims[::-1]), dtype=datatype,
                      index=arange(npointsinseries))
Пример #4
0
 def __init__(self, rdd, index=None, dims=None):
     super(Series, self).__init__(rdd)
     self._index = index
     if isinstance(dims, (tuple, list)):
         from thunder.rdds.keys import Dimensions
         self._dims = Dimensions.fromNumpyDimsTuple(dims)
     else:
         self._dims = dims
Пример #5
0
 def __init__(self, rdd, dims=None, nrecords=None, dtype=None):
     super(Images, self).__init__(rdd, nrecords=nrecords, dtype=dtype)
     if dims and not isinstance(dims, Dimensions):
         try:
             dims = Dimensions.fromTuple(dims)
         except:
             raise TypeError("Images dims parameter must be castable to Dimensions object, got: %s" % str(dims))
     self._dims = dims
Пример #6
0
 def __init__(self, rdd, dims=None, nimages=None, dtype=None):
     super(Images, self).__init__(rdd, dtype=dtype)
     if dims and not isinstance(dims, Dimensions):
         try:
             dims = Dimensions.fromTuple(dims)
         except:
             raise TypeError("Images dims parameter must be castable to Dimensions object, got: %s" % str(dims))
     self._dims = dims
     self._nimages = nimages
Пример #7
0
 def dims(self):
     from thunder.rdds.keys import Dimensions
     if self._dims is None:
         entry = self.populateParamsFromFirstRecord()[0]
         n = size(entry)
         d = self.rdd.keys().mapPartitions(lambda i: [Dimensions(
             i, n)]).reduce(lambda x, y: x.mergeDims(y))
         self._dims = d
     return self._dims
Пример #8
0
    def fromTif(self,
                dataPath,
                ext="tif",
                blockSize="150M",
                newDtype='smallfloat',
                casting='safe',
                startIdx=None,
                stopIdx=None,
                recursive=False):
        """Load a Series object from multipage tiff files.

        Parameters
        ----------

        dataPath: string
            Path to data files or directory, specified as either a local filesystem path or in a URI-like format,
            including scheme. A dataPath argument may include a single '*' wildcard character in the filename.

        ext: string, optional, default "tif"
            Extension required on data files to be loaded.

        blockSize: string formatted as e.g. "64M", "512k", "2G", or positive int. optional, default "150M"
            Requested size of Series partitions in bytes (or kilobytes, megabytes, gigabytes).

        newDtype: dtype or dtype specifier or string 'smallfloat' or None, optional, default 'smallfloat'
            Numpy dtype of output series data. Most methods expect Series data to be floating-point. Input data will be
            cast to the requested `newdtype` if not None - see Data `astype()` method.

        casting: 'no'|'equiv'|'safe'|'same_kind'|'unsafe', optional, default 'safe'
            Casting method to pass on to numpy's `astype()` method; see numpy documentation for details.

        startIdx, stopIdx: nonnegative int. optional.
            Indices of the first and last-plus-one data file to load, relative to the sorted filenames matching
            `dataPath` and `ext`. Interpreted according to python slice indexing conventions.

        recursive: boolean, default False
            If true, will recursively descend directories rooted at dataPath, loading all files in the tree that
            have an extension matching 'ext'. Recursive loading is currently only implemented for local filesystems
            (not s3).
        """
        seriesBlocks, metadata = self._getSeriesBlocksFromMultiTif(
            dataPath,
            ext=ext,
            blockSize=blockSize,
            newDtype=newDtype,
            casting=casting,
            startIdx=startIdx,
            stopIdx=stopIdx,
            recursive=recursive)
        dims, npointsInSeries, dtype = metadata
        return Series(seriesBlocks,
                      dims=Dimensions.fromTuple(dims[::-1]),
                      dtype=dtype,
                      index=arange(npointsInSeries))
Пример #9
0
 def __init__(self, rdd, nrecords=None, dtype=None, index=None, dims=None):
     super(Series, self).__init__(rdd, nrecords=nrecords, dtype=dtype)
     self._index = None
     if index is not None:
         self._index = index
     if dims and not isinstance(dims, Dimensions):
         try:
             dims = Dimensions.fromTuple(dims)
         except:
             raise TypeError(
                 "Series dims parameter must be castable to Dimensions object, got: %s"
                 % str(dims))
     self._dims = dims
Пример #10
0
 def populateParamsFromFirstRecord(self):
     record = super(SimpleBlocks, self).populateParamsFromFirstRecord()
     self._dims = Dimensions.fromTuple(record[0].origShape)
     return record
Пример #11
0
 def populateParamsFromFirstRecord(self):
     record = super(Images, self).populateParamsFromFirstRecord()
     self._dims = Dimensions.fromTuple(record[1].shape)
     return record
Пример #12
0
 def populateParamsFromFirstRecord(self):
     record = super(Images, self).populateParamsFromFirstRecord()
     self._dims = Dimensions.fromTuple(record[1].shape)
     return record
Пример #13
0
 def populateParamsFromFirstRecord(self):
     record = super(SimpleBlocks, self).populateParamsFromFirstRecord()
     self._dims = Dimensions.fromTuple(record[0].origShape)
     return record