def test00c(self): """Testing btable opening in "a" mode""" N = 1e1 a = blz.barray(np.arange(N, dtype='i4')) b = blz.barray(np.arange(N, dtype='f8')+1) t = blz.btable((a, b), ('f0', 'f1'), rootdir=self.rootdir) # Open t t = blz.open(rootdir=self.rootdir, mode='a') #print "t->", `t` # Check values ra = np.rec.fromarrays([a[:],b[:]]).view(np.ndarray) #print "ra[:]", ra[:] assert_array_equal(t[:], ra, "btable values are not correct") # Now check some accesses t.append((10, 11.0)) t.append((10, 11.0)) t[-1] = (11, 12.0) # Check values N = 12 a = blz.barray(np.arange(N, dtype='i4')) b = blz.barray(np.arange(N, dtype='f8')+1) ra = np.rec.fromarrays([a[:],b[:]]).view(np.ndarray) #print "ra[:]", ra[:] assert_array_equal(t[:], ra, "btable values are not correct")
def test02(self): """Testing `ones` constructor""" a = np.ones((2,2), dtype='(4,)i4') b = blz.ones((2,2), dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test01b(self): """Testing `zeros` constructor (II)""" a = np.zeros(2, dtype='(2,4)i4') b = blz.zeros(2, dtype='(2,4)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test03b(self): """Testing `fill` constructor (array default)""" a = np.ones((2,2), dtype='(4,)i4')*3 b = blz.fill((2,2), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00b(self): """Testing `barray` reshape (large shape)""" a = np.arange(16000).reshape((20,20,40)) b = blz.arange(16000, rootdir=self.rootdir).reshape((20,20,40)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00a(self): """Testing `barray` reshape""" a = np.arange(16).reshape((2,2,4)) b = blz.arange(16, rootdir=self.rootdir).reshape((2,2,4)) if self.open: b = blz.open(rootdir=self.rootdir) #print "b->", `b` assert_array_equal(a, b, "Arrays are not equal")
def test00b(self): """Testing `__getitem()__` method with only a start (slice)""" a = np.ones((27,2700), dtype="i4")*3 b = blz.fill((27,2700), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1) self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def testImplicitDtype(self): """Testing barray construction keeping dimensions (implicit dtype)""" a = np.eye(6) # 2d b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) # array equality implies having the same shape assert_array_equal(a, b, "Arrays are not equal")
def testExplicitDtype(self): """Testing barray construction keeping dimensions (explicit dtype)""" dtype = np.dtype(np.float64) a = np.eye(6, dtype=dtype) b = blz.barray(a, dtype=dtype, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) # array equality implies having the same shape assert_array_equal(a, b, "Arrays are not equal")
def test04(self): """Testing `fill` constructor with open and resize (array default)""" a = np.ones((3,200), dtype='(4,)i4')*3 b = blz.fill((2,200), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1,200), dtype='(4,)i4')*3 b.append(c) #print "b->", `b`, len(b), b[1] assert_array_equal(a, b, "Arrays are not equal")
def test04c(self): """Testing `__getitem()__` method with shape reduction (III)""" a = np.arange(6000).reshape((50,40,3)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (1,slice(1,4,2),2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test03c(self): """Testing `__getitem()__` method with several slices (III)""" a = np.arange(120*1000).reshape((5*1000,4,3,2)) b = blz.barray(a, rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = (slice(None,None,3), slice(1,3,2), slice(1,4,2)) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test02(self): """Testing `__getitem()__` method with a start, stop, step""" a = np.ones((10,2), dtype="i4")*3 b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = slice(1,9,2) #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test00a(self): """Testing `__getitem()__` method with only a start (scalar)""" a = np.ones((2,3), dtype="i4")*3 b = blz.fill((2,3), 3, dtype="i4", rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) sl = 1 #print "b[sl]->", `b[sl]` self.assert_(a[sl].shape == b[sl].shape, "Shape is not equal") assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test05(self): """Testing `fill` constructor with open and resize (nchunks>1)""" a = np.ones((3,2000), dtype='(4,)i4')*3 b = blz.fill((2,2000), [3,3,3,3], dtype='(4,)i4', rootdir=self.rootdir) if self.open: b = blz.open(rootdir=self.rootdir) c = np.ones((1,2000), dtype='(4,)i4')*3 b.append(c) #print "b->", `b` # We need to use the b[:] here to overcome a problem with the # assert_array_equal() function assert_array_equal(a, b[:], "Arrays are not equal")
def test00b(self): """Testing `__setitem()__` method with only a start (vector)""" a = np.ones((200,300), dtype="i4")*3 b = blz.fill((200,300), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1) a[sl,:] = range(300) b[sl] = range(300) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test02b(self): """Testing `__setitem()__` method with start,stop,step (scalar)""" a = np.ones((10,2), dtype="i4")*3 b = blz.fill((10,2), 3, dtype="i4", rootdir=self.rootdir) sl = slice(1,8,3) a[sl,:] = range(2) b[sl] = range(2) if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]`, `b` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test04c(self): """Testing `__setitem()__` method with shape reduction (III)""" a = np.arange(24).reshape((4,3,2)) b = blz.barray(a, rootdir=self.rootdir) sl = (1,2,slice(None,None,None)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test01a(self): """Testing `__setitem()__` method with start,stop (scalar)""" a = np.ones((500,200), dtype="i4")*3 b = blz.fill((500,200), 3, dtype="i4", rootdir=self.rootdir, bparams=blz.bparams()) sl = slice(100,400) a[sl,:] = 0 b[sl] = 0 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "b[sl]->", `b[sl]` assert_array_equal(a[sl], b[sl], "Arrays are not equal")
def test03d(self): """Testing `__setitem()__` method with several slices (IV)""" a = np.arange(120).reshape((5,4,3,2)) b = blz.barray(a, rootdir=self.rootdir) sl = (slice(1,3), slice(1,3,1), slice(1,None,2), slice(1)) #print "before->", `b[sl]` a[sl] = 2 b[sl] = 2 if self.open: b.flush() b = blz.open(rootdir=self.rootdir) #print "after->", `b[sl]` assert_array_equal(a[:], b[:], "Arrays are not equal")
def test00b(self): """Accessing attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' # Re-open the barray if self.rootdir: cn = blz.open(rootdir=self.rootdir) self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr2'] == 'val2') self.assert_(cn.attrs['attr3'] == 'val3') self.assert_(len(cn.attrs) == 3)
def test00a(self): """Testing btable opening in "r" mode""" N = 1e1 a = blz.barray(np.arange(N, dtype='i4')) b = blz.barray(np.arange(N, dtype='f8')+1) t = blz.btable((a, b), ('f0', 'f1'), rootdir=self.rootdir) # Open t t = blz.open(rootdir=self.rootdir, mode='r') #print "t->", `t` ra = np.rec.fromarrays([a[:],b[:]]).view(np.ndarray) #print "ra[:]", ra[:] assert_array_equal(t[:], ra, "btable values are not correct") # Now check some accesses self.assertRaises(RuntimeError, t.__setitem__, 1, (0, 0.0)) self.assertRaises(RuntimeError, t.append, (0, 0.0))
def test01c(self): """Appending attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' # Reopen if self.rootdir: cn = blz.open(rootdir=self.rootdir) # Append attrs cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr2'] == 'val2') self.assert_(cn.attrs['attr3'] == 'val3') self.assert_(len(cn.attrs) == 3)
def test01b(self): """Removing attributes in a opened barray.""" cn = self.getobject() # Some attrs cn.attrs['attr1'] = 'val1' cn.attrs['attr2'] = 'val2' cn.attrs['attr3'] = 'val3' # Reopen if self.rootdir: cn = blz.open(rootdir=self.rootdir) # Remove one of them del cn.attrs['attr2'] self.assert_(cn.attrs['attr1'] == 'val1') self.assert_(cn.attrs['attr3'] == 'val3') self.assertRaises(KeyError, cn.attrs.__getitem__, 'attr2') self.assert_(len(cn.attrs) == 2)
def test00b(self): """Testing btable opening in "w" mode""" N = 1e1 a = blz.barray(np.arange(N, dtype='i4')) b = blz.barray(np.arange(N, dtype='f8')+1) t = blz.btable((a, b), ('f0', 'f1'), rootdir=self.rootdir) # Open t t = blz.open(rootdir=self.rootdir, mode='w') #print "t->", `t` N = 0 a = blz.barray(np.arange(N, dtype='i4')) b = blz.barray(np.arange(N, dtype='f8')+1) ra = np.rec.fromarrays([a[:],b[:]]).view(np.ndarray) #print "ra[:]", ra[:] assert_array_equal(t[:], ra, "btable values are not correct") # Now check some accesses t.append((0, 0.0)) t.append((0, 0.0)) t[1] = (1, 2.0) ra = np.rec.fromarrays([(0,1),(0.0, 2.0)], 'i4,f8').view(np.ndarray) #print "ra[:]", ra[:] assert_array_equal(t[:], ra, "btable values are not correct")
# Which dataset do we want to group? which = sys.argv[1] if len(sys.argv) > 1 else "toy" if which == "toy": # The iterator for reading the toy CSV file line by line sreader, dt, path = toy_stream() # Do the actual sortby ssby = groupby(sreader, 'key', 'val1', dtype=dt, path=path, lines_per_chunk=2) elif which == "randhie": # The iterator and dtype for datasets included in statsmodel sreader, dt, path = statsmodel_stream(which) # Do the actual sortby ssby = groupby(sreader, 'mdvis', 'lncoins', dtype=dt, path=path) else: raise ValueError( "parsing for `%s` dataset not implemented" "(try either 'toy' or 'randhie')" % which) # Reopen the BLZ object on-disk for retrieving the grouped data ssby = blz.open(path) # Finally, print the ssby table (do not try to dump it in the # traditional way because the length of the columns is not the same) # print "ssby:", ssby names = ssby.names[:] # Additional sort for guaranteeing sorted keys too names.sort() for key in names: print "key:", key, ssby[key]