def tohdf5(self, filepath, nodepath='/ctable', mode='w', cparams=None, cname=None): """Write this object into an HDF5 file. Parameters ---------- filepath : string The path of the HDF5 file. nodepath : string The path of the node inside the HDF5 file. mode : string The mode to open the PyTables file. Default is 'w'rite mode. cparams : cparams object The compression parameters. The defaults are the same than for the current bcolz environment. cname : string Any of the compressors supported by PyTables (e.g. 'zlib'). The default is to use 'blosc' as meta-compressor in combination with one of its compressors (see `cparams` parameter above). See Also -------- ctable.fromhdf5 """ if bcolz.tables_here: import tables as tb else: raise ValueError("you need PyTables to use this functionality") if os.path.exists(filepath): raise IOError("path '%s' already exists" % filepath) f = tb.open_file(filepath, mode=mode) cparams = cparams if cparams is not None else bcolz.defaults.cparams cname = cname if cname is not None else "blosc:" + cparams['cname'] filters = tb.Filters(complevel=cparams['clevel'], shuffle=cparams['clevel'], complib=cname) t = f.create_table(f.root, nodepath[1:], self.dtype, filters=filters, expectedrows=len(self)) # Set the attributes for key, val in self.attrs: t.attrs[key] = val # Copy the data for block in bcolz.iterblocks(self): t.append(block) f.close()
def _create_id_index_map(ctable): ''' create a dictionary taking ids to indeces (source) ''' i = 0 id_index_map = {} for block in bcolz.iterblocks(ctable['id']): for item in block: id_index_map[str(item)] = i i += 1 return id_index_map
def _create_index_id_map(ctable): ''' create a dictionary taking an index to an id (target) ''' i = 0 index_id_map = {} for block in bcolz.iterblocks(ctable['id']): for item in block: index_id_map[i] = str(item) i += 1 return index_id_map
def test01(self): """Testing `iterblocks()` (w/ start, stop)""" a = np.ones((2, 3), dtype="i4") b = bcolz.ones((1000, 3), dtype="i4") # print "b->", `b` l, s = 0, 0 for block in bcolz.iterblocks(b, blen=2, start=10, stop=100): assert_array_equal(a, block, "Arrays are not equal") l += len(block) s += block.sum() self.assertEqual(l, 90) # as per Gauss summation formula self.assertEqual(s, 90 * 3)
def test01(self): """Testing `iterblocks()` (w/ start, stop)""" a = np.ones((2,3), dtype="i4") b = bcolz.ones((1000, 3), dtype="i4") # print "b->", `b` l, s = 0, 0 for block in bcolz.iterblocks(b, blen=2, start=10, stop=100): assert_array_equal(a, block, "Arrays are not equal") l += len(block) s += block.sum() self.assertEqual(l, 90) # as per Gauss summation formula self.assertEqual(s, 90*3)
def tohdf5(self, filepath, nodepath='/ctable', mode='w', cparams=None, cname=None): """ tohdf5(filepath, nodepath='/ctable', mode='w', cparams=None, cname=None) Write this object into an HDF5 file. Parameters ---------- filepath : string The path of the HDF5 file. nodepath : string The path of the node inside the HDF5 file. mode : string The mode to open the PyTables file. Default is 'w'rite mode. cparams : cparams object The compression parameters. The defaults are the same than for the current bcolz environment. cname : string Any of the compressors supported by PyTables (e.g. 'zlib'). The default is to use 'blosc' as meta-compressor in combination with one of its compressors (see `cparams` parameter above). See Also -------- ctable.fromhdf5 """ if bcolz.tables_here: import tables as tb else: raise ValueError("you need PyTables to use this functionality") if os.path.exists(filepath): raise IOError("path '%s' already exists" % filepath) f = tb.open_file(filepath, mode=mode) cparams = cparams if cparams is not None else bcolz.defaults.cparams cname = cname if cname is not None else "blosc:"+cparams['cname'] filters = tb.Filters(complevel=cparams['clevel'], shuffle=cparams['clevel'], complib=cname) t = f.create_table(f.root, nodepath[1:], self.dtype, filters=filters) # Set the attributes for key, val in self.attrs: t.attrs[key] = val # Copy the data for block in bcolz.iterblocks(self): t.append(block) f.close()
def test00(self): """Testing `iterblocks()` (no start, stop, step)""" N = 1000 a = np.ones((2, 3), dtype="i4") b = bcolz.ones((N, 3), dtype="i4") # print "b->", `b` l, s = 0, 0 for block in bcolz.iterblocks(b, blen=2): assert_array_equal(a, block, "Arrays are not equal") l += len(block) s += block.sum() self.assertEqual(l, N) # as per Gauss summation formula self.assertEqual(s, N * 3)
def test00(self): """Testing `iterblocks()` (no start, stop, step)""" N = 1000 a = np.ones((2,3), dtype="i4") b = bcolz.ones((N, 3), dtype="i4") # print "b->", `b` l, s = 0, 0 for block in bcolz.iterblocks(b, blen=2): assert_array_equal(a, block, "Arrays are not equal") l += len(block) s += block.sum() self.assertEqual(l, N) # as per Gauss summation formula self.assertEqual(s, N*3)
# Row-by-row using an iterator # t0 = time() # f = tb.open_file(filepath, 'w') # t = f.create_table(f.root, nodepath[1:], ct.dtype) # for row in ct: # t.append([row]) # f.close() # tt = time() - t0 # print("time with iterator: %.2f (%.2f GB/s)" % (tt, dsize / tt)) # Using blocked write t0 = time() f = tb.open_file(filepath, 'w') t = f.create_table(f.root, nodepath[1:], ct.dtype) for block in bcolz.iterblocks(ct): t.append(block) f.close() tt = time() - t0 print("time with blocked write: %.2f (%.2f GB/s)" % (tt, dsize / tt)) # Using generic implementation os.remove(filepath) t0 = time() #ct.tohdf5(filepath, nodepath) ct.tohdf5(filepath, nodepath, cname="blosc:blosclz") tt = time() - t0 print("time with tohdf5: %.2f (%.2f GB/s)" % (tt, dsize / tt)) #print(repr(ct))
def iterblocks2(arr): sum = 0. for b in bcolz.iterblocks(arr, blen=arr.chunklen): sum += b.sum() return sum