def test01b(self): """vtable from a collection of differently sized btables""" N = int(1e1) t1 = blz.fromiter(((i, i*2.) for i in xrange(N+1)), dtype='i4,f8', count=N+1, rootdir=self.rootdir) t2 = blz.fromiter(((i, i*2.) for i in xrange(N+1, N*2)), dtype='i4,f8', count=N-1, rootdir=self.rootdir) vt = blz.vtable((t1, t2), rootdir=self.rootdir) ra = np.fromiter(((i, i*2.) for i in xrange(N*2)), dtype='i4,f8') assert_array_equal(vt[:], ra, "vtable values are not correct")
def test00(self): """Testing vtable creation from a tuple of btables (single row)""" N = int(1e1) t1 = blz.fromiter(((i, i*2.) for i in xrange(N)), dtype='i4,f8', count=N, rootdir=self.rootdir) t2 = blz.fromiter(((i, i*3.) for i in xrange(N*2)), dtype='i4,f8', count=N*2, rootdir=self.rootdir) vt = blz.vtable((t1, t2), rootdir=self.rootdir) r = np.array([(1, 3.)], dtype='i4,f8')[0] assert_array_equal(vt[N+1], r, "vtable values are not correct")
def test02a(self): """vtable with start""" N = int(1e1) t1 = blz.fromiter(((i, i*2.) for i in xrange(N+1)), dtype='i4,f8', count=N+1, rootdir=self.rootdir) t2 = blz.fromiter(((i, i*2.) for i in xrange(N+1, N*2)), dtype='i4,f8', count=N-1, rootdir=self.rootdir) t3 = blz.fromiter(((i, i*2.) for i in xrange(N*2, N*3)), dtype='i4,f8', count=N, rootdir=self.rootdir) vt = blz.vtable((t1, t2, t3), rootdir=self.rootdir) ra = np.fromiter(((i, i*2.) for i in xrange(N*3)), dtype='i4,f8') assert_array_equal(vt[2:], ra[2:], "vtable values are not correct")
def test01b(self): """Testing fromiter (long iter, chunk is multiple of iter length)""" N = 1e4 a = (i for i in xrange(int(N))) b = blz.fromiter(a, dtype='f8', chunklen=1000, count=int(N)) c = np.arange(N) assert_array_equal(b[:], c, "fromiter does not work correctly")
def test04(self): """Testing `iter()` method with large zero arrays""" a = np.zeros(1e4, dtype='f8') b = blz.barray(a, chunklen=100, rootdir=self.rootdir) c = blz.fromiter((v for v in b), dtype='f8', count=len(a)) #print "c ->", repr(c) assert_array_equal(a, c[:], "iterator fails on zeros")
def test01a(self): """Testing fromiter (long iter)""" N = 1e4 a = (i for i in xrange(int(N))) b = blz.fromiter(a, dtype='f8', count=int(N)) c = np.arange(N) assert_array_equal(b[:], c, "fromiter does not work correctly")
def test07(self): """Testing `iter()` method with `limit` and `skip`""" a = np.arange(1e4, dtype='f8') b = blz.barray(a, chunklen=100, rootdir=self.rootdir) c = blz.fromiter((v for v in b.iter(limit=1010, skip=1010)), dtype='f8', count=1010) #print "c ->", repr(c) assert_array_equal(a[1010:2020], c, "iterator fails on zeros")
def getobject(self): if self.flavor == 'barray': obj = blz.zeros(10, dtype="i1", rootdir=self.rootdir) self.assertEqual(type(obj), blz.barray) elif self.flavor == 'btable': obj = blz.fromiter(((i,i*2) for i in range(10)), dtype='i2,f4', count=10, rootdir=self.rootdir) self.assertEqual(type(obj), blz.btable) return obj
def setUp(self): self.dtype = 'i4,f8' self.npt = np.fromiter(((i, i * 2.) for i in range(self.N)), dtype=self.dtype, count=self.N) if self.disk == 'BLZ': prefix = 'blaze-' + self.__class__.__name__ suffix = '.blz' path = tempfile.mkdtemp(suffix=suffix, prefix=prefix) os.rmdir(path) if self.open: table = blz.fromiter(((i, i * 2.) for i in range(self.N)), dtype=self.dtype, count=self.N, rootdir=path) self.ddesc = blaze.BLZ_DDesc(table, mode='r') else: self.ddesc = blaze.BLZ_DDesc(path, mode='w') a = blaze.array([(i, i * 2.) for i in range(self.N)], 'var * {f0: int32, f1: float64}', ddesc=self.ddesc) elif self.disk == 'HDF5' and tables_is_here: prefix = 'hdf5-' + self.__class__.__name__ suffix = '.hdf5' dpath = "/table" h, path = tempfile.mkstemp(suffix=suffix, prefix=prefix) os.close(h) # close the not needed file handle if self.open: with tables.open_file(path, "w") as h5f: ra = np.fromiter(((i, i * 2.) for i in range(self.N)), dtype=self.dtype, count=self.N) h5f.create_table('/', dpath[1:], ra) self.ddesc = blaze.HDF5_DDesc(path, dpath, mode='r') else: self.ddesc = blaze.HDF5_DDesc(path, dpath, mode='w') a = blaze.array([(i, i * 2.) for i in range(self.N)], 'var * {f0: int32, f1: float64}', ddesc=self.ddesc) else: table = blz.fromiter(((i, i * 2.) for i in range(self.N)), dtype=self.dtype, count=self.N) self.ddesc = blaze.BLZ_DDesc(table, mode='r')
def test03(self): """Testing `iterchunks` method with all parameters set""" N, blen = int(1e4), 100 a = blz.fromiter(xrange(N), dtype=np.float64, count=N) l, s = 0, 0 for block in blz.iterblocks(a, blen, blen-1, 3*blen+2): l += len(block) s += block.sum() self.assert_(l == 2*blen + 3) self.assert_(s == np.arange(blen-1, 3*blen+2).sum())
def test02(self): """Testing `iterchunks` method with no stop""" N, blen = int(1e4), 100 a = blz.fromiter(xrange(N), dtype=np.float64, count=N) l, s = 0, 0 for block in blz.iterblocks(a, blen, blen-1): l += len(block) s += block.sum() self.assert_(l == (N - (blen - 1))) self.assert_(s == np.arange(blen-1, N).sum())
def test01(self): """Testing `iterchunks` method with no start, no stop""" N, blen = int(1e4), 100 a = blz.fromiter(xrange(N), dtype=np.float64, count=N) l, s = 0, 0 for block in blz.iterblocks(a, blen): self.assert_(len(block) == blen) l += len(block) s += block.sum() self.assert_(l == N)
def test00(self): """Testing `iterchunks` method with no blen, no start, no stop""" N = int(1e4) a = blz.fromiter(xrange(N), dtype=np.float64, count=N) l, s = 0, 0 for block in blz.iterblocks(a): l += len(block) s += block.sum() self.assert_(l == N) self.assert_(s == (N - 1) * (N / 2)) # as per Gauss summation formula
def setUp(self): self.dtype = 'i4,f8' self.npt = np.fromiter(((i, i*2.) for i in range(self.N)), dtype=self.dtype, count=self.N) if self.disk == 'BLZ': prefix = 'blaze-' + self.__class__.__name__ suffix = '.blz' path = tempfile.mkdtemp(suffix=suffix, prefix=prefix) os.rmdir(path) if self.open: table = blz.fromiter( ((i, i*2.) for i in range(self.N)), dtype=self.dtype, count=self.N, rootdir=path) self.ddesc = blaze.BLZ_DDesc(table, mode='r') else: self.ddesc = blaze.BLZ_DDesc(path, mode='w') a = blaze.array([(i, i*2.) for i in range(self.N)], 'var * {f0: int32, f1: float64}', ddesc=self.ddesc) elif self.disk == 'HDF5' and tables_is_here: prefix = 'hdf5-' + self.__class__.__name__ suffix = '.hdf5' dpath = "/table" h, path = tempfile.mkstemp(suffix=suffix, prefix=prefix) os.close(h) # close the not needed file handle if self.open: with tables.open_file(path, "w") as h5f: ra = np.fromiter( ((i, i*2.) for i in range(self.N)), dtype=self.dtype, count=self.N) h5f.create_table('/', dpath[1:], ra) self.ddesc = blaze.HDF5_DDesc(path, dpath, mode='r') else: self.ddesc = blaze.HDF5_DDesc(path, dpath, mode='w') a = blaze.array([(i, i*2.) for i in range(self.N)], 'var * {f0: int32, f1: float64}', ddesc=self.ddesc) else: table = blz.fromiter( ((i, i*2.) for i in range(self.N)), dtype=self.dtype, count=self.N) self.ddesc = blaze.BLZ_DDesc(table, mode='r')
def getobject(self): if self.flavor == 'barray': obj = blz.zeros(10, dtype="i1", rootdir=self.rootdir) self.assertEqual(type(obj), blz.barray) elif self.flavor == 'btable': obj = blz.fromiter(((i, i * 2) for i in range(10)), dtype='i2,f4', count=10, rootdir=self.rootdir) self.assertEqual(type(obj), blz.btable) return obj
def test_btable(clevel): enter() tc = blz.fromiter((mv + np.random.rand(NC) - mv for i in xrange(int(NR))), dtype=dt, bparams=blz.bparams(clevel, cname='lz4'), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
def test_btable(clevel): enter() tc = blz.fromiter((mv+np.random.rand(NC)-mv for i in xrange(int(NR))), dtype=dt, bparams=blz.bparams(clevel), count=int(NR)) after_create() out = np.fromiter((row for row in tc.where(squery, 'f1,f3')), dtype="f8,f8") after_query() return out
z = xrange(2, N + 2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for ndarray--> %.3f" % (time() - t0, ) print "out-->", len(out) #blz.set_num_threads(blz.ncores//2) # Create a barray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=N, bparams=blz.bparams(clevel)) print "Time for barray--> %.3f" % (time() - t0, ) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Create a barray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=-1, bparams=blz.bparams(clevel)) print "Time for barray (count=-1)--> %.3f" % (time() - t0, ) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i, j, k) for i, j, k in it.izip(x, y, z)) t0 = time()
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)
def array(obj, dshape=None, caps={'efficient-write': True}, storage=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. caps : capabilities dictionary A dictionary containing the desired capabilities of the array. storage : Storage instance A Storage object with the necessary info for storing the data. Returns ------- out : a concrete blaze array. Bugs ---- Right now the explicit dshape is ignored. This needs to be corrected. When the data cannot be coerced to an explicit dshape an exception should be raised. """ dshape = _normalize_dshape(dshape) storage = _storage_convert(storage) if isinstance(obj, Array): return obj elif isinstance(obj, IDataDescriptor): # TODO: Validate the 'caps', convert to another kind # of data descriptor if necessary # Note by Francesc: but if it is already an IDataDescriptor I wonder # if `caps` should be ignored. Hmm, probably not... # # Note by Oscar: Maybe we shouldn't accept a datadescriptor at # all at this level. If you've got a DataDescriptor you are # playing with internal datastructures anyways, go to the # Array constructor directly. If you want to transform to # another datadescriptor... convert it yourself (you are # playing with internal datastructures, remember? you should # be able to do it in your own. dd = obj elif storage is not None: dt = None if dshape is None else to_numpy_dtype(dshape) if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, count=-1, rootdir=storage.path)) else: dd = BLZDataDescriptor( blz.barray(obj, dtype=dt, rootdir=storage.path)) elif 'efficient-write' in caps and caps['efficient-write'] is True: # In-Memory array if dshape is None: dd = DyNDDataDescriptor(nd.asarray(obj, access='rw')) else: # Use the uniform/full dtype specification in dynd depending # on whether the datashape has a uniform dim dt = ndt.type(str(dshape)) if dt.ndim > 0: dd = DyNDDataDescriptor(nd.array(obj, type=dt, access='rw')) else: dd = DyNDDataDescriptor(nd.array(obj, dtype=dt, access='rw')) elif 'compress' in caps and caps['compress'] is True: dt = None if dshape is None else to_numpy_dtype(dshape) # BLZ provides compression if inspect.isgenerator(obj): # TODO: Generator logic can go inside barray dd = BLZDataDescriptor(blz.fromiter(obj, dtype=dt, count=-1)) else: dd = BLZDataDescriptor(blz.barray(obj, dtype=dt)) elif isinstance(obj, np.ndarray): dd = DyNDDataDescriptor(nd.view(obj)) elif isinstance(obj, nd.array): dd = DyNDDataDescriptor(obj) elif isinstance(obj, blz.barray): dd = BLZDataDescriptor(obj) else: raise TypeError(('Failed to construct blaze array from ' 'object of type %r') % type(obj)) return Array(dd)
z = xrange(2,N+2) print "Starting benchmark now for creating arrays..." # Create a ndarray #x = (i for i in xrange(N)) # true iterable t0 = time() out = np.fromiter(x, dtype='f8', count=N) print "Time for ndarray--> %.3f" % (time()-t0,) print "out-->", len(out) #blz.set_num_threads(blz.ncores//2) # Create a barray #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=N, bparams=blz.bparams(clevel)) print "Time for barray--> %.3f" % (time()-t0,) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Create a barray (with unknown size) #x = (i for i in xrange(N)) # true iterable t0 = time() cout = blz.fromiter(x, dtype='f8', count=-1, bparams=blz.bparams(clevel)) print "Time for barray (count=-1)--> %.3f" % (time()-t0,) print "cout-->", len(cout) #assert_array_equal(out, cout, "Arrays are not equal") # Retrieve from a structured ndarray gen = ((i,j,k) for i,j,k in it.izip(x,y,z)) t0 = time()
def test02(self): """Testing fromiter (empty iter)""" a = np.array([], dtype="f8") b = blz.fromiter(iter(a), dtype='f8', count=-1) assert_array_equal(b[:], a, "fromiter does not work correctly")
def array(obj, dshape=None, ddesc=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ((obj is not None) and (not inspect.isgenerator(obj)) and (dshape is not None)): dt = ndt.type(str(dshape)) if dt.ndim > 0: obj = nd.array(obj, type=dt, access='rw') else: obj = nd.array(obj, dtype=dt, access='rw') if obj is None and ddesc is None: raise ValueError('you need to specify at least `obj` or `ddesc`') if isinstance(obj, Array): return obj elif isinstance(obj, DDesc): if ddesc is None: ddesc = obj return Array(ddesc) else: raise ValueError(('you cannot specify `ddesc` when `obj` ' 'is already a DDesc instance')) if ddesc is None: # Use a dynd ddesc by default try: array = nd.asarray(obj, access='rw') except: raise ValueError(('failed to construct a dynd array from ' 'object %r') % obj) ddesc = DyND_DDesc(array) return Array(ddesc) # The DDesc has been specified if isinstance(ddesc, DyND_DDesc): if obj is not None: raise ValueError(('you cannot specify simultaneously ' '`obj` and a DyND `ddesc`')) return Array(ddesc) elif isinstance(ddesc, BLZ_DDesc): if inspect.isgenerator(obj): dt = None if dshape is None else to_numpy_dtype(dshape) # TODO: Generator logic could go inside barray ddesc.blzarr = blz.fromiter(obj, dtype=dt, count=-1, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: if isinstance(obj, nd.array): obj = nd.as_numpy(obj) if dshape and isinstance(dshape.measure, datashape.Record): ddesc.blzarr = blz.btable( obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: ddesc.blzarr = blz.barray( obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): if isinstance(obj, nd.array): obj = nd.as_numpy(obj) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) if dshape and isinstance(dshape.measure, datashape.Record): # Convert the structured array to unaligned dtype # We need that because PyTables only accepts unaligned types, # which are the default in NumPy obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure)) f.create_table(where, name, filters=ddesc.filters, obj=obj) else: f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)
def test03(self): """Testing fromiter (dtype conversion)""" a = np.arange(101, dtype="f8") b = blz.fromiter(iter(a), dtype='f4', count=len(a)) assert_array_equal(b[:], a, "fromiter does not work correctly")
def test04b(self): """Testing fromiter method with large iterator with a hint""" N = 10*1000 a = np.fromiter((i*2 for i in xrange(N)), dtype='f8', count=N) b = blz.fromiter((i*2 for i in xrange(N)), dtype='f8', count=N) assert_array_equal(b[:], a, "iterator with a hint fails")
def test00(self): """Testing fromiter (short iter)""" a = np.arange(1,111) b = blz.fromiter(iter(a), dtype='i4', count=len(a)) assert_array_equal(b[:], a, "fromiter does not work correctly")
def array(obj, dshape=None, ddesc=None): """Create a Blaze array. Parameters ---------- obj : array_like Initial contents for the array. dshape : datashape The datashape for the resulting array. By default the datashape will be inferred from data. If an explicit dshape is provided, the input data will be coerced into the provided dshape. ddesc : data descriptor instance This comes with the necessary info for storing the data. If None, a DyND_DDesc will be used. Returns ------- out : a concrete blaze array. """ dshape = _normalize_dshape(dshape) if ((obj is not None) and (not inspect.isgenerator(obj)) and (dshape is not None)): dt = ndt.type(str(dshape)) if dt.ndim > 0: obj = nd.array(obj, type=dt, access='rw') else: obj = nd.array(obj, dtype=dt, access='rw') if obj is None and ddesc is None: raise ValueError('you need to specify at least `obj` or `ddesc`') if isinstance(obj, Array): return obj elif isinstance(obj, DDesc): if ddesc is None: ddesc = obj return Array(ddesc) else: raise ValueError(('you cannot specify `ddesc` when `obj` ' 'is already a DDesc instance')) if ddesc is None: # Use a dynd ddesc by default try: array = nd.asarray(obj, access='rw') except: raise ValueError(('failed to construct a dynd array from ' 'object %r') % obj) ddesc = DyND_DDesc(array) return Array(ddesc) # The DDesc has been specified if isinstance(ddesc, DyND_DDesc): if obj is not None: raise ValueError(('you cannot specify simultaneously ' '`obj` and a DyND `ddesc`')) return Array(ddesc) elif isinstance(ddesc, BLZ_DDesc): if inspect.isgenerator(obj): dt = None if dshape is None else to_numpy_dtype(dshape) # TODO: Generator logic could go inside barray ddesc.blzarr = blz.fromiter(obj, dtype=dt, count=-1, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: if isinstance(obj, nd.array): obj = nd.as_numpy(obj) if dshape and isinstance(dshape.measure, datashape.Record): ddesc.blzarr = blz.btable(obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) else: ddesc.blzarr = blz.barray(obj, rootdir=ddesc.path, mode=ddesc.mode, **ddesc.kwargs) elif isinstance(ddesc, HDF5_DDesc): if isinstance(obj, nd.array): obj = nd.as_numpy(obj) with tb.open_file(ddesc.path, mode=ddesc.mode) as f: where, name = split_path(ddesc.datapath) if dshape and isinstance(dshape.measure, datashape.Record): # Convert the structured array to unaligned dtype # We need that because PyTables only accepts unaligned types, # which are the default in NumPy obj = np.array(obj, datashape.to_numpy_dtype(dshape.measure)) f.create_table(where, name, filters=ddesc.filters, obj=obj) else: f.create_earray(where, name, filters=ddesc.filters, obj=obj) ddesc.mode = 'a' # change into 'a'ppend mode for further operations return Array(ddesc)