def __init__(self, obj, dshape=None, index=None, metadata=None): self._datashape = dshape self._metadata = NDTable._metaheader + (metadata or []) if isinstance(dshape, str): # run it through the parser dshape = _dshape(dshape) # Resolve the values # ------------------ if isinstance(obj, Space): self.space = obj self.children = set(self.space.subspaces) else: spaces = injest_iterable(obj) self.space = Space(*spaces) self.children = set(self.space.subspaces) # Resolve the shape # ----------------- if not dshape: # The user just passed in a raw data source, try # and infer how it should be layed out or fail # back on dynamic types. self._datashape = CArraySource.infer_datashape(obj) else: # The user overlayed their custom dshape on this # data, check if it makes sense if CArraySource.check_datashape(obj, given_dshape=dshape): self._datashape = dshape else: raise ValueError("Datashape is inconsistent with source") self._layout = None
def open(uri=None, mode='a'): """Open a Blaze object via an `uri` (Uniform Resource Identifier). Parameters ---------- uri : str Specifies the URI for the Blaze object. It can be a regular file too. mode : the open mode (string) Specifies the mode in which the object is opened. The supported values are: * 'r' for read-only * 'w' for emptying the previous underlying data * 'a' for allowing read/write on top of existing data Returns ------- out : an Array or Table object. """ ARRAY = 1 TABLE = 2 if uri is None: source = CArraySource() else: uri = urlparse(uri) if uri.scheme == 'carray': path = os.path.join(uri.netloc, uri.path[1:]) parms = params(storage=path) source = CArraySource(params=parms) structure = ARRAY if uri.scheme == 'ctable': path = os.path.join(uri.netloc, uri.path[1:]) parms = params(storage=path) source = CTableSource(params=parms) structure = TABLE elif uri.scheme == 'sqlite': path = os.path.join(uri.netloc, uri.path[1:]) parms = params(storage=path or None) source = SqliteSource(params=parms) structure = TABLE else: # Default is to treat the URI as a regular path parms = params(storage=uri.path) source = CArraySource(params=parms) structure = ARRAY # Don't want a deferred array (yet) # return NDArray(source) if structure == ARRAY: return Array(source) elif structure == TABLE: return NDTable(source)
def __init__(self, obj, dshape=None, metadata=None, layout=None, params=None): # Datashape # --------- if isinstance(dshape, basestring): dshape = _dshape(dshape) if not dshape: # The user just passed in a raw data source, try # and infer how it should be layed out or fail # back on dynamic types. self._datashape = dshape = CArraySource.infer_datashape(obj) else: # The user overlayed their custom dshape on this # data, check if it makes sense CArraySource.check_datashape(obj, given_dshape=dshape) self._datashape = dshape # Values # ------ # Mimic NumPy behavior in that we have a variety of # possible arguments to the first argument which result # in different behavior for the values. if isinstance(obj, ByteProvider): self.data = obj else: self.data = CArraySource(obj, params=params) # children graph nodes self.children = [] self.space = Space(self.data) # Layout # ------ if layout: self._layout = layout elif not layout: self._layout = self.data.default_layout() # Metadata # -------- self._metadata = NDArray._metaheader + (metadata or []) # Parameters # ---------- self.params = params
def __init__(self, obj, dshape=None, metadata=None, layout=None, params=None): data = None # Values # ------ # Mimic NumPy behavior in that we have a variety of # possible arguments to the first argument which result # in different behavior for the values. if isinstance(obj, list): data = obj #if isinstance(obj, datashape): #data = None # Datashape # --------- if not dshape: # The user just passed in a raw data source, try # and infer how it should be layed out or fail # back on dynamic types. self._datashape = dshape = CArraySource.infer_datashape(obj) else: # The user overlayed their custom dshape on this # data, check if it makes sense CArraySource.check_datashape(obj, given_dshape=dshape) self._datashape = dshape # children graph nodes self.children = [] self.data = CArraySource(obj, params) self.space = Space(self.data) # Layout # ------ if layout: self._layout = layout elif not layout: self._layout = ChunkedL(self.data, cdimension=0) # Metadata # -------- self._metadata = NDArray._metaheader + (metadata or []) # Parameters # ---------- self.params = params
def zeros(dshape, params=None): """ Create an Array and fill it with zeros. Parameters ---------- dshape : str, blaze.dshape instance Specifies the datashape of the outcome object. params : blaze.params object Any parameter supported by the backend library. Returns ------- out : an Array object. """ if isinstance(dshape, basestring): dshape = _dshape(dshape) shape, dtype = to_numpy(dshape) cparams, rootdir, format_flavor = to_cparams(params or _params()) if rootdir is not None: carray.zeros(shape, dtype, rootdir=rootdir, cparams=cparams) return open(rootdir) else: source = CArraySource(carray.zeros(shape, dtype, cparams=cparams), params=params) return Array(source)
def fromiter(iterable, dshape, params=None): """ Create an Array and fill it with values from `iterable`. Parameters ---------- iterable : iterable object An iterable object providing data for the carray. dshape : str, blaze.dshape instance Specifies the datashape of the outcome object. Only 1d shapes are supported right now. When the `iterator` should return an unknown number of items, a ``TypeVar`` can be used. params : blaze.params object Any parameter supported by the backend library. Returns ------- out : an Array object. """ if isinstance(dshape, basestring): dshape = _dshape(dshape) shape, dtype = dshape.parameters[:-1], dshape.parameters[-1] # Check the shape part if len(shape) > 1: raise ValueError("shape can be only 1-dimensional") length = shape[0] count = -1 if type(length) == TypeVar: count = -1 elif type(length) == Fixed: count = length.val dtype = dtype.to_dtype() # Now, create the Array itself (using the carray backend) cparams, rootdir, format_flavor = to_cparams(params or _params()) if rootdir is not None: carray.fromiter(iterable, dtype, count=count, rootdir=rootdir, cparams=cparams) return open(rootdir) else: ica = carray.fromiter(iterable, dtype, count=count, cparams=cparams) source = CArraySource(ica, params=params) return Array(source)
class Array(Indexable): """ Manifest array, does not create a graph. Forces evaluation on every call. Parameters ---------- obj : A list of byte providers, other NDTables or a Python object. Optional -------- datashape : dshape Manual datashape specification for the table, if None then shape will be inferred if possible. metadata : Manual datashape specification for the table, if None then shape will be inferred if possible. Usage ----- >>> Array([1,2,3]) >>> Array([1,2,3], dshape='3, int32') >>> Array([1,2,3], dshape('3, int32')) >>> Array([1,2,3], params=params(clevel=3, storage='file')) """ eclass = MANIFEST _metaheader = [ md.manifest, md.arraylike, ] def __init__(self, obj, dshape=None, metadata=None, layout=None, params=None): # Datashape # --------- if isinstance(dshape, basestring): dshape = _dshape(dshape) if not dshape: # The user just passed in a raw data source, try # and infer how it should be layed out or fail # back on dynamic types. self._datashape = dshape = CArraySource.infer_datashape(obj) else: # The user overlayed their custom dshape on this # data, check if it makes sense CArraySource.check_datashape(obj, given_dshape=dshape) self._datashape = dshape # Values # ------ # Mimic NumPy behavior in that we have a variety of # possible arguments to the first argument which result # in different behavior for the values. if isinstance(obj, ByteProvider): self.data = obj else: self.data = CArraySource(obj, params=params) # children graph nodes self.children = [] self.space = Space(self.data) # Layout # ------ if layout: self._layout = layout elif not layout: self._layout = self.data.default_layout() # Metadata # -------- self._metadata = NDArray._metaheader + (metadata or []) # Parameters # ---------- self.params = params #------------------------------------------------------------------------ # Properties #------------------------------------------------------------------------ @property def datashape(self): """ Type deconstructor """ return self._datashape @property def size(self): """ Size of the Array. """ # TODO: need to generalize, not every Array will look # like Numpy return sum(i.val for i in self._datashape.parameters[:-1]) @property def backends(self): """ The storage backends that make up the space behind the Array. """ return iter(self.space) #------------------------------------------------------------------------ # Basic Slicing #------------------------------------------------------------------------ # Immediete slicing def __getitem__(self, indexer): cc = self._layout.change_coordinates return retrieve(cc, indexer) # Immediete slicing ( Side-effectful ) def __setitem__(self, indexer, value): cc = self._layout.change_coordinates write(cc, indexer, value) def __iter__(self): raise NotImplementedError def __eq__(self): raise NotImplementedError def __str__(self): return generic_str(self, deferred=False) def __repr__(self): return generic_repr('Array', self, deferred=False)
def open(uri, mode='a', eclass=_eclass.manifest): """Open a Blaze object via an `uri` (Uniform Resource Identifier). Parameters ---------- uri : str Specifies the URI for the Blaze object. It can be a regular file too. The URL scheme indicates the storage type: * carray: Chunked array * ctable: Chunked table * sqlite: SQLite table (the URI 'sqlite://' creates in-memory table) If no URI scheme is given, carray is assumed. mode : the open mode (string) Specifies the mode in which the object is opened. The supported values are: * 'r' for read-only * 'w' for emptying the previous underlying data * 'a' for allowing read/write on top of existing data Returns ------- out : an Array or Table object. """ ARRAY = 1 TABLE = 2 uri = urlparse(uri) path = uri.netloc + uri.path parms = params(storage=path) if uri.scheme == 'carray': source = CArraySource(params=parms) structure = ARRAY elif uri.scheme == 'ctable': source = CTableSource(params=parms) structure = TABLE elif uri.scheme == 'sqlite': # Empty path means memory storage parms = params(storage=path or None) source = SqliteSource(params=parms) structure = TABLE else: # Default is to treat the URI as a regular path parms = params(storage=path) source = CArraySource(params=parms) structure = ARRAY # Don't want a deferred array (yet) # return NDArray(source) if structure == ARRAY: if eclass is _eclass.manifest: return Array(source) elif eclass is _eclass.delayed: return NDArray(source) elif structure == TABLE: if eclass is _eclass.manifest: return Table(source) elif eclass is _eclass.delayed: return NDTable(source)