def test_load_2_dif_clusters_same_instance(self): base = np.arange(50 * 50).reshape((50, 50)) hecu_p = StorageNumpy(input_array=base, name='load_2_clustrs_same_inst') hecu_p.sync() # Flush values to cassandra hecu_p_load = StorageNumpy(name="load_2_clustrs_same_inst") hecu_p_load[0:1, 0:1] self.assertTrue(np.array_equal(hecu_p_load[40:50, 40:50], base[40:50, 40:50]))
def test_storagenumpy_from_storagenumpy(self): #''' #Create a StorageNumpy from another StorageNumpy #''' n = np.arange(12).reshape(3,4) s1 = StorageNumpy(n, "test_sn_from_sn") s2 = StorageNumpy(s1) # Create a StorageNumpy from another StorageNumpy self.assertTrue(s2.storage_id is None) self.assertTrue(s2._get_name() is None) self.assertTrue(np.array_equal(s2, n)) # StorageNumpy s1 and s2 should not share memory s1[0][0] = 42 self.assertTrue(s2[0,0] != s1[0,0]) s2[2][2] = 666 self.assertTrue(s2[2,2] != s1[2,2]) # Create a third StorageNumpy s3 = StorageNumpy(s2) self.assertTrue(s3.storage_id is None) self.assertTrue(s3._get_name() is None) self.assertTrue(np.array_equal(s3, s2)) # Clean up s1.delete_persistent()
def test_load_StorageNumpy(self): n = np.arange(2*128).reshape(2,128) # A matrix with "some" columns s = StorageNumpy(n, "test_load_StorageNumpy") s.sync() # Flush values to cassandra s2 = StorageNumpy(None, "test_load_StorageNumpy") self.assertTrue(s2._is_persistent) self.assertEqual(s.storage_id, s2.storage_id)
def test_np_dot(self): n1 = np.arange(8*8).reshape(8,8) n2 = np.arange(8*8).reshape(8,8) s1 = StorageNumpy(n1, "test_np_dot1") s2 = StorageNumpy(n2, "test_np_dot2") res = np.dot(s1, s2) res.make_persistent("test_np_dots1xs2") self.assertTrue(np.array_equal(res, np.dot(n1,n2)))
def test_slice_ops(self): obj = np.arange(8 * 8 * 8).reshape((8, 8, 8)) hecu = StorageNumpy(input_array=obj, name='test_slice_ops') hecu_sub = hecu[:2, 3:, 4:] sum = hecu_sub.sum() self.assertGreater(sum, 0) description = repr(hecu_sub) self.assertIsInstance(description, str) hecu.delete_persistent()
def test_slice_from_numpy_array(self): obj = np.arange(8 * 8 * 8).reshape((8, 8, 8)) hecu = StorageNumpy(input_array=obj, name='test_slice_numpy') l = np.array((0,1)) hecu_sub = hecu[l] #Access using an array of indexes # FIXME add more testing, currently if it does not segfault, then it works sum = hecu_sub.sum() self.assertEqual(sum, obj[l].sum()) hecu.delete_persistent()
def test_types_in_memory(self): base_array = np.arange(256) for typecode in np.typecodes['Integer']: typed_array = StorageNumpy(base_array.astype(typecode)) self.assertTrue(np.array_equal(typed_array, base_array.astype(typecode))) for typecode in np.typecodes['UnsignedInteger']: typed_array = StorageNumpy(base_array.astype(typecode)) self.assertTrue(np.array_equal(typed_array, base_array.astype(typecode)))
def test_split_content(self): n = np.arange(88*66).reshape(88,66) s = StorageNumpy(n,"test_split_content") s.sync() # Flush values to cassandra del s s = StorageNumpy(None,"test_split_content") rows = [i for i in s.split(cols=False)] self.assertTrue(len(rows)==4) columns = [ i for i in s.split(cols=True)] self.assertTrue(len(columns)==3) blocks = [i for i in s.split()] self.assertTrue(len(blocks)==12) for i in rows: self.assertTrue(i.shape == (22,66)) for i in columns: self.assertTrue(i.shape == (88,22)) for i in blocks: self.assertTrue(i.shape == (22,22)) self.assertTrue(np.array_equal(rows[0],n[0:22,:])) self.assertTrue(np.array_equal(rows[1],n[22:44,:])) self.assertTrue(np.array_equal(rows[2],n[44:66,:])) self.assertTrue(np.array_equal(rows[3],n[66:,:])) self.assertTrue(np.array_equal(columns[0],n[:,0:22])) self.assertTrue(np.array_equal(columns[1],n[:,22:44])) self.assertTrue(np.array_equal(columns[2],n[:,44:]))
def test_get_subarray(self): base = np.arange(8 * 8 * 4).reshape((8, 8, 4)) hecu_p = StorageNumpy(input_array=base, name='test_get_subarray') hecu_p.sync() # Flush values to cassandra hecu_r2 = StorageNumpy(name="test_get_subarray") res = hecu_r2[:3, :2] sum = res.sum() res = hecu_r2[:3, :2] avg = res.mean() self.assertGreater(sum, 0) self.assertGreater(avg, 0)
def test_init_empty(self): tablename = None base_array = np.arange(4096).reshape((64, 64)) storage_id = None basic_init = StorageNumpy(base_array) self.assertTrue(np.array_equal(basic_init, base_array)) complete_init = StorageNumpy(base_array, storage_id, tablename) self.assertTrue(np.array_equal(complete_init, base_array))
def test_split(self): n = np.arange(2*128).reshape(2,128) # A matrix with "some" columns s = StorageNumpy(n, "test_split") splits = 0 for i in s.split(): # Assuming a BLOCK_SIZE of 4096!! FIXME use an environment variable! if splits <= 4: self.assertEqual(i.shape, (2,22)) else: self.assertEqual(i.shape, (2,18)) self.assertTrue(i[0,0] == splits*22) splits = splits + 1 self.assertTrue(splits == 6)
def test_split_access(self): n = np.arange(2*128).reshape(2,128) # A matrix with "some" columns s = StorageNumpy(n, "test_split_access") if s._build_args.metas.partition_type != 0: #This test is only valid for ZORDER return splits = 0 for i in s.split(): # Assuming a BLOCK_SIZE of 4096!! FIXME use an environment variable! if splits <= 4: self.assertTrue(np.array_equal(i[:], n[0:22, 22*splits:22*(splits+1)])) else: self.assertTrue(np.array_equal(i[:], n[0:22, 22*splits:22*(splits)+18])) splits = splits + 1
def test_split_already_persistent(self): bn, bm = (2, 1) x = np.arange(100).reshape(10, -1) blocks = [] for i in range(0, x.shape[0], bn): row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)] blocks.append(row) data = StorageNumpy(input_array=x, name="test_split_already_persistent") data.sync() # Flush values to cassandra for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) del data gc.collect() data = StorageNumpy(name="test_split_already_persistent") self.assertTrue(np.array_equal(list(data), x)) for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) self.assertEqual(i + 1, len(blocks))
def test_twin_volatile_from_storagenumpy(self): n = np.arange(3*4).reshape(3,4) s = StorageNumpy(n) s2 = StorageNumpy(s) self.assertTrue(s2._twin_ref is not None) self.assertEqual(s2._twin_id, None) self.assertEqual(s2._twin_name, None) self.assertEqual(s2._twin_ref._name, None) self.assertEqual(s2._twin_ref.storage_id, None) self.assertEqual(n.T.shape, s2._twin_ref.shape) self.assertTrue(np.array_equal(s2._twin_ref, n.T))
def test_twin_persistent_from_storagenumpy(self): n = np.arange(3*4).reshape(3,4) s = StorageNumpy(n, 'pers_from_sn') s2 = StorageNumpy(s) # Create a volatile SN self.assertTrue(s2._twin_ref is not None) self.assertEqual(s2._twin_id, None) self.assertEqual(s2._twin_name, None) self.assertEqual(s2._twin_ref._name, None) self.assertEqual(s2._twin_ref.storage_id, None) self.assertEqual(n.T.shape, s2._twin_ref.shape) self.assertTrue(np.allclose(s2._twin_ref, n.T))
def test_iter_numpy(self): obj = np.arange(8 * 8 * 8).reshape((8, 8, 8)) hecu = StorageNumpy(input_array=obj, name='test_iter_numpy') acc = 0 for i in hecu: acc = acc + 1 hecu_sub = hecu[:2, 3:, 4:] acc2 = 0 for i in hecu_sub: acc2 = acc2 + 1 self.assertGreater(acc, acc2) hecu.delete_persistent()
def test_out_of_bounds(self): n = np.arange(1000).reshape(10,10,10) coordinates = (slice(50, 150, None), slice(50, 150, None), slice(5, 150, None)) s = StorageNumpy(n, "KK") t = s[coordinates] t - 1 # Should not fail t-= 1 # Should not fail
def test_out_of_bounds_in_numpy(self): n = np.arange(88*66).reshape(88,66) s = StorageNumpy(n, "test_bounds_in_numpy") del s s = StorageNumpy(None, "test_bounds_in_numpy") with self.assertRaises(IndexError): s[:, 100] with self.assertRaises(IndexError): s[100, :] v = s[1:10,22:50] with self.assertRaises(IndexError): v[11, :] with self.assertRaises(IndexError): v[:, 55]
def test_copy_storageNumpyVolatile(self): #''' #Test that a copy of a StorageNumpy does not share memory (Volatile version) #''' n=np.arange(12).reshape(3,4) s=StorageNumpy(n) c=s.copy() self.assertTrue(s.storage_id is None) self.assertTrue(c.storage_id is None) self.assertTrue(c[0,0]==s[0,0]) c[0,0]=42 self.assertTrue(c[0,0]!=s[0,0])
def test_transpose(self): #''' #Test the transpose #''' n=np.arange(12).reshape(3,4) s=StorageNumpy(n,"testTranspose") t=s.transpose() self.assertTrue(t[0,1] == s [1,0]) t[0,1]=42 self.assertTrue(t[0,1] == s[1,0]) # Clean up s.delete_persistent()
def test_twin_persistent_manual(self): n = np.arange(3*4).reshape(3,4) s = StorageNumpy(n) s.make_persistent('manual_pers') self.assertTrue(s._twin_id is not None) self.assertEqual(s._twin_name, self.ksp+'_arrow.manual_pers_arrow') self.assertEqual(s._twin_ref._name, self.ksp+'_arrow.manual_pers_arrow') self.assertEqual(s._twin_ref.storage_id, s._twin_id) self.assertEqual(n.T.shape, s._twin_ref.shape) self.assertEqual(s._build_args.twin_id, s._twin_id) #stored data in cassandra res = config.session.execute( "SELECT twin_id FROM hecuba.istorage WHERE storage_id = %s", [s.storage_id] ) self.assertEqual(res.one().twin_id, s._twin_id) self.assertTrue(np.allclose(s._twin_ref, n.T))
def test_storagenumpy_copy_memory(self): #''' #Check that the memory from a StorageNumpy does not share original array #''' n = np.arange(12).reshape(3,4) s1 = StorageNumpy(n, "test_storagenumpy_copy_memory") # StorageNumpy s1 and n should NOT share memory s1[0][0] = 42 self.assertTrue(not np.array_equal(s1, n)) s1[0][0] = n[0][0] # Undo n[2][2] = 666 self.assertTrue(not np.array_equal(s1, n)) # Clean up s1.delete_persistent()
def test_explicit_construct(self): # From an explicit constructor - e.g. InfoArray(): # obj is None # (we're in the middle of the InfoArray.__new__ # constructor, and self.info will be set when we return to # InfoArray.__new__) basic_init = StorageNumpy()
def test_copy_storageNumpyPersist(self): #''' #Test that a copy of a StorageNumpy does not share memory (Persistent version) #''' n=np.arange(12).reshape(3,4) s=StorageNumpy(n,"test_copy_storageNumpyPersist") c=s.copy() self.assertTrue(c.storage_id is None) self.assertTrue(c._get_name() is None) self.assertTrue(c[0,0]==s[0,0]) c[0,0]=42 self.assertTrue(c[0,0]!=s[0,0]) # Clean up s.delete_persistent()
def test_load_persistent_twin_by_name_and_id(self): n = np.arange(3*4).reshape(3,4) s = StorageNumpy(n, 'by_name_and_id') sid = s.storage_id del s s2 = StorageNumpy(None, 'by_name_and_id', sid) #FIXME #self.assertTrue(s2._twin_id is not None) #self.assertEqual(s2._twin_name, self.ksp+'.harrow_kk') #self.assertEqual(s2._twin_ref._name, self.ksp+'.harrow_kk') #self.assertEqual(s2._twin_ref.storage_id, s2._twin_id) #self.assertEqual(sid, s2.storage_id) #self.assertTrue(np.array_equal(s2._twin_ref, n.T)) pass
def test_load_persistent_twin_by_name(self): n = np.arange(3*4).reshape(3,4) s = StorageNumpy(n, 'load_by_name') sid = s.storage_id del s s2 = StorageNumpy(None, 'load_by_name') self.assertTrue(s2._twin_id is not None) self.assertEqual(s2._twin_name, self.ksp+'_arrow.load_by_name_arrow') self.assertEqual(s2._twin_ref._name, self.ksp+'_arrow.load_by_name_arrow') self.assertEqual(s2._twin_ref.storage_id, s2._twin_id) self.assertEqual(sid, s2.storage_id) self.assertEqual(n.T.shape, s2._twin_ref.shape) self.assertTrue(np.allclose(s2._twin_ref, n.T)) self.assertTrue(np.allclose(s2, n))
def test_pv_negative_indexes(self): n = np.arange(66*66).reshape(66,66) sn = StorageNumpy(n,"test_pv_negative_indexes") sn.sync() del sn sn = StorageNumpy(None,"test_pv_negative_indexes") s1 = -1 self.assertTrue(np.array_equal(sn[s1], n[s1])) nn = np.arange(66*66) snn = StorageNumpy(nn,"test_pv_negative_indexes_small") del snn snn = StorageNumpy(None,"test_pv_negative_indexes_small") self.assertTrue(np.array_equal(snn[s1], nn[s1]))
def test_read_all(self): nelem = 2 ** 21 elem_dim = 2 ** 7 base_array = np.arange(nelem).reshape((elem_dim, elem_dim, elem_dim)) casted = StorageNumpy(input_array=base_array, name="test_read_all") casted.sync() # Flush values to cassandra test_numpy = np.arange(nelem).reshape((elem_dim, elem_dim, elem_dim)) casted = StorageNumpy(name="test_read_all") chunk = casted[slice(None, None, None)] self.assertTrue(np.allclose(chunk.view(np.ndarray), test_numpy)) casted.delete_persistent()
def test_slicing_3d(self): base = np.arange(8 * 8 * 4).reshape((8, 8, 4)) hecu = StorageNumpy(input_array=base, name='test_slicing_3d') res_hecu = hecu[6:7, 4:] res = base[6:7, 4:] self.assertTrue(np.array_equal(res, res_hecu)) hecu.sync() # Flush values to cassandra hecu = StorageNumpy(name="test_slicing_3d") res_hecu = hecu[6:7, 4:] self.assertTrue(np.array_equal(res, res_hecu)) hecu.delete_persistent()
def test_split_by_columns(self): """ Tests iterating through the columns of the Hecuba array """ bn, bm = (10, 1) x = np.arange(100).reshape(10, -1) blocks = [] for i in range(0, x.shape[0], bn): row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)] blocks.append(row) data = StorageNumpy(input_array=x, name="test_split_by_columns") data.sync() # Flush values to cassandra for i, chunk in enumerate(data.np_split(block_size=(bn, bm))): storage_id = chunk.storage_id chunk.sync() #Flush data del chunk chunk = getByID(storage_id) self.assertTrue(np.array_equal(list(chunk), blocks[i])) self.assertEqual(i + 1, len(blocks))