def _where_helper(agg, cond, otherwise): if not isboolean(cond.dshape): raise TypeError("cond must be a boolean aggregate") _validate_aligned(agg, cond) cond_arr, _ = dynd_to_np_mask(cond._data) arr, arr_mask = dynd_to_np_mask(agg._data) arr_missing = is_option(agg._data.dtype) while cond_arr.ndim < arr.ndim: cond_arr = np.expand_dims(cond_arr, -1) if isinstance(otherwise, Aggregate): _validate_aligned(agg, otherwise) otherwise_arr, otherwise_mask = dynd_to_np_mask(otherwise._data) while otherwise_arr.ndim < arr.ndim: otherwise_arr = np.expand_dims(otherwise_arr, -1) otherwise_mask = np.expand_dims(otherwise_mask, -1) otherwise_missing = is_option(otherwise._data.dtype) elif isinstance(otherwise, (int, float, np.generic)): otherwise_arr = otherwise otherwise_mask = otherwise_missing = False elif otherwise is None: otherwise_arr = dynd_missing_types[arr.dtype] otherwise_mask = False otherwise_missing = True else: raise TypeError("`otherwise` got unknown" " type: {0}".format(type(otherwise))) out = np.where(cond_arr, arr, otherwise_arr) if arr_missing or otherwise_missing: out[arr_mask | otherwise_mask] = dynd_missing_types[out.dtype] out = nd.asarray(out) out = out.view_scalars('?' + str(out.dtype)) else: out = nd.asarray(out) return out
def test_numpy_struct_scalar(self): # Create a NumPy struct scalar object, by indexing into # a structured array a = np.array([(10, 11, 12)], dtype='i4,i8,f8')[0] aligned_tp = ndt.type('c{f0: int32, f1: int64, f2: float64}') val = {'f0': 10, 'f1': 11, 'f2': 12} # Construct using nd.array b = nd.array(a) self.assertEqual(nd.type_of(b), aligned_tp) self.assertEqual(nd.as_py(b), val) self.assertEqual(b.access_flags, 'immutable') b = nd.array(a, access='rw') self.assertEqual(nd.type_of(b), aligned_tp) self.assertEqual(nd.as_py(b), val) self.assertEqual(b.access_flags, 'readwrite') # Construct using nd.asarray b = nd.asarray(a) self.assertEqual(nd.type_of(b), aligned_tp) self.assertEqual(nd.as_py(b), val) self.assertEqual(b.access_flags, 'immutable') b = nd.asarray(a, access='rw') self.assertEqual(nd.type_of(b), aligned_tp) self.assertEqual(nd.as_py(b), val) self.assertEqual(b.access_flags, 'readwrite') # nd.view should fail self.assertRaises(RuntimeError, nd.view, a)
def test_access_asarray(self): a = nd.asarray(1) self.assertEqual(a.access_flags, 'readwrite') a = nd.asarray(1, access='rw') self.assertEqual(a.access_flags, 'readwrite') a = nd.asarray(1, access='r') self.assertEqual(a.access_flags, 'immutable')
def f(self): arr, missing = dynd_to_np_mask(self._data) out = op(arr) if is_option(self._data.dtype): out[missing] = dynd_missing_types[out.dtype] out = nd.asarray(out).view_scalars('?' + str(out.dtype)) else: out = nd.asarray(out) return ScalarAggregate(out, self.x_axis, self.y_axis)
def test_grouped_slices(self): a = nd.asarray([[1, 2, 3], [1, 4, 5]]) gb = nd.groupby(a[:, 1:], a[:, 0]) self.assertEqual(nd.as_py(gb.groups), [1]) self.assertEqual(nd.as_py(gb), [[[2, 3], [4, 5]]]) a = nd.asarray([[1, 2, 3], [3, 1, 7], [1, 4, 5], [2, 6, 7], [3, 2, 5]]) gb = nd.groupby(a[:, 1:], a[:, 0]) self.assertEqual(nd.as_py(gb.groups), [1, 2, 3]) self.assertEqual(nd.as_py(gb), [[[2, 3], [4, 5]], [[6, 7]], [[1, 7], [2, 5]]])
def test_simple(self): a = nd.asarray([1, 2, 3]) self.assertEqual(nd.type_of(a), ndt.type('3 * int32')) # Modifying 'a' should affect 'b', because it's a view b = nd.asarray(a) self.assertEqual(nd.as_py(b), [1, 2, 3]) a[1] = 10 self.assertEqual(nd.as_py(b), [1, 10, 3]) # asarray no longer supports changing the access flags. # Once a different api is available in the Python bindings # for changing the access flags, these tests should be rewritten. """# Can take a readonly view, but still modify the original
def __getitem__(self, key): with tb.open_file(self.path, mode='r') as f: dset = f.get_node(self.datapath) # The returned arrays are temporary buffers, # so must be flagged as readonly. dyndarr = nd.asarray(dset[key], access='readonly') return DyND_DDesc(dyndarr)
def __getitem__(self, key): with tb.open_file(self.filename, mode='r') as f: h5arr = f.get_node(f.root, self.datapath) # The returned arrays are temporary buffers, # so must be flagged as readonly. dyndarr = nd.asarray(h5arr[key], access='readonly') return DyNDDataDescriptor(dyndarr)
def getattr(self, name): with tb.open_file(self.path, mode=self.mode) as f: dset = f.get_node(self.datapath) if hasattr(dset, 'cols'): return DyND_DDesc( nd.asarray(getattr(dset.cols, name)[:], access='readonly')) else: raise IndexError("not an HDF5 compound dataset")
def getattr(self, name): with netCDF4.Dataset(self.path, mode=self.mode) as f: dset = get_node(f, self.datapath) if hasattr(dset, 'cols'): return DyND_DDesc( nd.asarray(getattr(dset.cols, name)[:], access='readonly')) else: raise IndexError("not an netCDF4 compound dataset")
def test_dynd_scalar_asarray(self): a = np.array(3, dtype='int64') n = nd.asarray(a) self.assertEqual(nd.type_of(n), ndt.int64) self.assertEqual(nd.as_py(n), 3) self.assertEqual(n.access_flags, 'readwrite') # Ensure it's a view n[...] = 4 self.assertEqual(a[()], 4)
def op_ckernel(self, op): op_ndim = len(op.type.shape) result_ndim = self.env.get('result-ndim', 0) ckernel, args = op.args in_types = [self.get_arg_type(arg) for arg in args[1:]] out_type = ndt.type(str(args[0].type)) if isinstance(ckernel, dict): tag = ckernel['tag'] if tag == 'elwise': ck = ckernel['ckernel'] if op.metadata['rank'] < op_ndim and \ self.env.get('stream-outer', False) and result_ndim == op_ndim: # Replace the leading dimension type with 'strided' in each operand # if we're streaming it for processing BLZ # TODO: Add dynd tp.subarray(N) function like datashape has for i, tp in enumerate(in_types): if tp.ndim == result_ndim: in_types[i] = ndt.make_strided_dim(tp.element_type) out_type = ndt.make_strided_dim(out_type.element_type) op.args[0] = _lowlevel.lift_ckernel_deferred(ck, [out_type] + in_types) elif tag == 'reduction': ck = ckernel['ckernel'] assoc = ckernel['assoc'] comm = ckernel['comm'] ident = ckernel['ident'] ident = None if ident is None else nd.asarray(ident) axis = ckernel['axis'] keepdims = ckernel['keepdims'] op.args[0] = _lowlevel.lift_reduction_ckernel_deferred( ck, in_types[0], axis=axis, keepdims=keepdims, associative=assoc, commutative=comm, reduction_identity=ident) elif tag == 'rolling': ck = ckernel['ckernel'] window = ckernel['window'] minp = ckernel['minp'] if minp != 0: raise ValueError('rolling window with minp != 0 not supported yet') op.args[0] = _lowlevel.make_rolling_ckernel_deferred(out_type, in_types[0], ck, window) elif tag == 'ckfactory': ckfactory = ckernel['ckernel_factory'] ck = ckfactory(out_type, *in_types) op.args[0] = ck else: raise RuntimeError('unnrecognized ckernel tag %s' % tag) else: op.args[0] = ckernel
def test_simple(self): a = nd.asarray([1, 2, 3], access='rw') self.assertEqual(nd.type_of(a), ndt.type('3 * int32')) # Modifying 'a' should affect 'b', because it's a view b = nd.asarray(a) self.assertEqual(nd.as_py(b), [1, 2, 3]) a[1] = 10 self.assertEqual(nd.as_py(b), [1, 10, 3]) # Can take a readonly view, but still modify the original b = nd.asarray(a, access='r') self.assertEqual(nd.as_py(b), [1, 10, 3]) a[1] = 20 self.assertEqual(nd.as_py(b), [1, 20, 3]) # The readonly view we took can't be written to def assign_at(x, i, y): x[i] = y self.assertRaises(RuntimeError, assign_at, b, 1, 30) # Asking for immutable makes a copy instead of a view b = nd.asarray(a, access='immutable') self.assertEqual(nd.as_py(b), [1, 20, 3]) a[1] = 40 self.assertEqual(nd.as_py(b), [1, 20, 3]) # Asking for immutable from a non-immutable # readonly array makes a copy aprime = nd.asarray(a, access='r') b = nd.asarray(aprime, access='immutable') self.assertEqual(nd.as_py(aprime), [1, 40, 3]) self.assertEqual(nd.as_py(b), [1, 40, 3]) a[1] = 50 self.assertEqual(nd.as_py(aprime), [1, 50, 3]) self.assertEqual(nd.as_py(b), [1, 40, 3])
def _get_dynd(self, key): if (isinstance(key, tuple) and len(key) > len(self.dshape.shape) and isinstance(self.dshape[-1], datashape.Record)): rec_key = get(key[-1], self.dshape[-1].names) if isinstance(rec_key, tuple): key = rec_key + key[:-1] else: key = (rec_key,) + key[:-1] with h5py.File(self.path, mode='r') as f: arr = f[self.datapath] result = np.asarray(arr.__getitem__(key)) return nd.asarray(result, access='readonly')
def dynd_op(op, left, right): if isinstance(left, nd.array): left_np, left_missing = dynd_to_np_mask(left) left_option = is_option(left.dtype) else: left_np, left_missing = left, False left_option = False if isinstance(right, nd.array): right_np, right_missing = dynd_to_np_mask(right) right_option = is_option(right.dtype) else: right_np, right_missing = right, False right_option = False out = op(left_np, right_np) if left_option or right_option: if out.dtype in dynd_missing_types: out[left_missing | right_missing] = dynd_missing_types[out.dtype] out = nd.asarray(out) return nd.asarray(out).view_scalars('?' + str(out.dtype)) else: raise ValueError("Missing type unknown") return nd.asarray(out)
def test_categorical_agg(): data = np.array([[(0, 12, 0), (3, 0, 3)], [(12, 12, 12), (24, 0, 0)]], dtype='i4') cats = ['a', 'b', 'c'] agg = CategoricalAggregate(nd.asarray(data), cats, x_axis, y_axis) assert agg.shape == (2, 2) assert agg.dshape == dshape('3 * int32') assert all(hasattr(agg, c) for c in cats) assert isinstance(agg['a'], ScalarAggregate) assert_dynd_eq(agg['a']._data, np.array([[0, 3], [12, 24]]), False) assert_dynd_eq(agg[['a', 'c']]._data, data[:, :, [0, 2]], False) with pytest.raises(KeyError): agg['d'] with pytest.raises(KeyError): agg[['a', 'd']] with pytest.raises(AttributeError): agg.d
def test_access_from_pyobject(self): a = nd.asarray([1, 2, 3]) self.assertEqual(a.access_flags, "immutable") a = nd.asarray([1, 2, 3], access="immutable") self.assertEqual(a.access_flags, "immutable") a = nd.asarray([1, 2, 3], access="readonly") self.assertEqual(a.access_flags, "immutable") a = nd.asarray([1, 2, 3], access="r") self.assertEqual(a.access_flags, "immutable") a = nd.asarray([1, 2, 3], access="readwrite") self.assertEqual(a.access_flags, "readwrite") a = nd.asarray([1, 2, 3], access="rw") self.assertEqual(a.access_flags, "readwrite")
def test_access_from_pyobject(self): a = nd.asarray([1, 2, 3]) self.assertEqual(a.access_flags, 'readwrite') a = nd.asarray([1, 2, 3], access='immutable') self.assertEqual(a.access_flags, 'immutable') a = nd.asarray([1, 2, 3], access='readonly') self.assertEqual(a.access_flags, 'immutable') a = nd.asarray([1, 2, 3], access='r') self.assertEqual(a.access_flags, 'immutable') a = nd.asarray([1, 2, 3], access='readwrite') self.assertEqual(a.access_flags, 'readwrite') a = nd.asarray([1, 2, 3], access='rw') self.assertEqual(a.access_flags, 'readwrite')
def __getitem__(self, key): try: if isinstance(key, list): # List of categories inds = [self._cats.index(k) for k in key] dtype = self._data.dtype if is_option(dtype): out = nd.as_numpy(self._data.view_scalars( dtype.value_type)) else: out = nd.as_numpy(self._data) out = nd.asarray(out[:, :, inds]).view_scalars(dtype) return CategoricalAggregate(out, key, self.x_axis, self.y_axis) else: # Single category i = self._cats.index(key) return ScalarAggregate(self._data[:, :, i], self.x_axis, self.y_axis) except ValueError: raise KeyError("'{0}'".format(key))
def test_access_from_readwrite_array(self): # `a` is a readwrite array a = nd.array([1, 2, 3], access='rw') b = nd.asarray(a) self.assertEqual(b.access_flags, 'readwrite') b = nd.asarray(a, access='immutable') self.assertEqual(b.access_flags, 'immutable') b = nd.asarray(a, access='readonly') self.assertEqual(b.access_flags, 'readonly') b = nd.asarray(a, access='r') self.assertEqual(b.access_flags, 'readonly') b = nd.asarray(a, access='readwrite') self.assertEqual(b.access_flags, 'readwrite') b = nd.asarray(a, access='rw') self.assertEqual(b.access_flags, 'readwrite')
def test_access_from_readwrite_array(self): # `a` is a readwrite array a = nd.array([1, 2, 3], access="rw") b = nd.asarray(a) self.assertEqual(b.access_flags, "readwrite") b = nd.asarray(a, access="immutable") self.assertEqual(b.access_flags, "immutable") b = nd.asarray(a, access="readonly") self.assertEqual(b.access_flags, "readonly") b = nd.asarray(a, access="r") self.assertEqual(b.access_flags, "readonly") b = nd.asarray(a, access="readwrite") self.assertEqual(b.access_flags, "readwrite") b = nd.asarray(a, access="rw") self.assertEqual(b.access_flags, "readwrite")
def finalize_std(bases, **kwargs): sums, counts, m2s = bases with np.errstate(divide='ignore', invalid='ignore'): x = np.sqrt(as_float64(m2s)/counts) return ScalarAggregate(nd.asarray(x).view_scalars('?float64'), **kwargs)
def __getitem__(self, key): with h5py.File(self.path, mode='r') as f: arr = f[self.datapath] result = np.asarray(arr[key]) return nd.asarray(result, access='readonly')
def getattr(self, name): if isinstance(self.blzarr, blz.btable): return DyND_DDesc(nd.asarray(self.blzarr[name], access='readonly')) else: raise IndexError("not a btable BLZ dataset")
def __getitem__(self, key): blzarr = self.blzarr # The returned arrays are temporary buffers, # so must be flagged as readonly. return DyNDDataDescriptor(nd.asarray(blzarr[key], access='readonly'))
def test_access_asarray(self): a = nd.asarray(1) self.assertEqual(a.access_flags, "immutable") a = nd.asarray(1, access="rw") self.assertEqual(a.access_flags, "readwrite")
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)
import numpy as np import PIL import pytest from datashader.aggregates import (ScalarAggregate, CategoricalAggregate, RecordAggregate) from datashader.core import LinearAxis import datashader.transfer_functions as tf x_axis = LinearAxis((0, 10)) y_axis = LinearAxis((1, 5)) a = np.arange(10, 19, dtype='i4').reshape((3, 3)) a[[0, 1, 2], [0, 1, 2]] = 0 s_a = ScalarAggregate(nd.asarray(a), x_axis=x_axis, y_axis=y_axis) b = np.arange(10, 19, dtype='f8').reshape((3, 3)) b[[0, 1, 2], [0, 1, 2]] = np.nan s_b = ScalarAggregate(nd.array(b, '3 * 3 * ?float64'), x_axis=x_axis, y_axis=y_axis) c = np.arange(10, 19, dtype='i8').reshape((3, 3)) c[[0, 1, 2], [0, 1, 2]] = np.iinfo('i8').min s_c = ScalarAggregate(nd.asarray(c).view_scalars('?int64'), x_axis=x_axis, y_axis=y_axis) agg = RecordAggregate(dict(a=s_a, b=s_b, c=s_c), x_axis, y_axis) @pytest.mark.parametrize(['attr'], ['a', 'b', 'c']) def test_interpolate(attr): x = getattr(agg, attr) img = tf.interpolate(x, 'pink', 'red', how='log').img
def finalize_mean(bases, **kwargs): sums, counts = bases with np.errstate(divide='ignore', invalid='ignore'): x = as_float64(sums)/counts return ScalarAggregate(nd.asarray(x).view_scalars('?float64'), **kwargs)
def array(obj, dshape=None, ddesc=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ((obj is not None) and (not inspect.isgenerator(obj)) and (dshape is not None)): dt = ndt.type(str(dshape)) if dt.ndim > 0: obj = nd.array(obj, type=dt, access='rw') else: obj = nd.array(obj, dtype=dt, access='rw') if obj is None and ddesc is None: raise ValueError('you need to specify at least `obj` or `ddesc`') if isinstance(obj, Array): return obj elif isinstance(obj, DDesc): if ddesc is None: ddesc = obj return Array(ddesc) else: raise ValueError(('you cannot specify `ddesc` when `obj` ' 'is already a DDesc instance')) if ddesc is None: # Use a dynd ddesc by default try: array = nd.asarray(obj, access='rw') except: raise ValueError(('failed to construct a dynd array from ' 'object %r') % obj) ddesc = DyND_DDesc(array) return Array(ddesc) # The DDesc has been specified if isinstance(ddesc, DyND_DDesc): if obj is not None: raise ValueError(('you cannot specify simultaneously ' '`obj` and a DyND `ddesc`')) return Array(ddesc) elif isinstance(ddesc, BLZ_DDesc): if inspect.isgenerator(obj): dt = None if dshape is None else to_numpy_dtype(dshape) # TODO: Generator logic could go inside barray ddesc.blzarr = blz.fromiter(obj, dtype=dt, count=-1, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: if isinstance(obj, nd.array): obj = nd.as_numpy(obj) if dshape and isinstance(dshape.measure, datashape.Record): ddesc.blzarr = blz.btable( obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: ddesc.blzarr = blz.barray( obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): if isinstance(obj, nd.array): obj = nd.as_numpy(obj) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) if dshape and isinstance(dshape.measure, datashape.Record): # Convert the structured array to unaligned dtype # We need that because PyTables only accepts unaligned types, # which are the default in NumPy obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure)) f.create_table(where, name, filters=ddesc.filters, obj=obj) else: f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)
def array(obj, dshape=None, ddesc=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ((obj is not None) and (not inspect.isgenerator(obj)) and (dshape is not None)): dt = ndt.type(str(dshape)) if dt.ndim > 0: obj = nd.array(obj, type=dt, access='rw') else: obj = nd.array(obj, dtype=dt, access='rw') if obj is None and ddesc is None: raise ValueError('you need to specify at least `obj` or `ddesc`') if isinstance(obj, Array): return obj elif isinstance(obj, DDesc): if ddesc is None: ddesc = obj return Array(ddesc) else: raise ValueError(('you cannot specify `ddesc` when `obj` ' 'is already a DDesc instance')) if ddesc is None: # Use a dynd ddesc by default try: array = nd.asarray(obj, access='rw') except: raise ValueError(('failed to construct a dynd array from ' 'object %r') % obj) ddesc = DyND_DDesc(array) return Array(ddesc) # The DDesc has been specified if isinstance(ddesc, DyND_DDesc): if obj is not None: raise ValueError(('you cannot specify simultaneously ' '`obj` and a DyND `ddesc`')) return Array(ddesc) elif isinstance(ddesc, BLZ_DDesc): if inspect.isgenerator(obj): dt = None if dshape is None else to_numpy_dtype(dshape) # TODO: Generator logic could go inside barray ddesc.blzarr = blz.fromiter(obj, dtype=dt, count=-1, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: if isinstance(obj, nd.array): obj = nd.as_numpy(obj) if dshape and isinstance(dshape.measure, datashape.Record): ddesc.blzarr = blz.btable(obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: ddesc.blzarr = blz.barray(obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): if isinstance(obj, nd.array): obj = nd.as_numpy(obj) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) if dshape and isinstance(dshape.measure, datashape.Record): # Convert the structured array to unaligned dtype # We need that because PyTables only accepts unaligned types, # which are the default in NumPy obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure)) f.create_table(where, name, filters=ddesc.filters, obj=obj) else: f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)