def test_small_brute(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a small amount of data using an iterkeys and validates that no column name can be a key and value at the same time Analyzes: - HCache (enforce column can't be key and value at the same time) - Iterkeys ''' '''''' table = "particle" nelems = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, nelems): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 100 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nelems / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] hcache_config = {'cache_size': '10', 'writer_buffer': 20} keys = ["partid", "time"] values = ["time", "x"] cache = None # this should fail since a key can not be a column name at the same time (key=time, column=time) try: cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, hcache_config) except RuntimeError, e: self.assertTrue(True, e)
def test_put_row_text(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Simple test to store text and retrieve it Analyzes: - HCache - Put_row (write text) - Iteritems (read text) ''' '''''' table = "bulk" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(partid int PRIMARY KEY, data text);" % (self.keyspace, table)) num_items = int(pow(10, 3)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (num_items / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid"] values = ["data"] hcache_config = {'cache_size': '10', 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "", tokens, keys, values, hcache_config) for i in xrange(0, num_items): cache.put_row([i], ['someRandomText']) # it doesnt make sense to count the read elements # because the data is still being written async hiter = cache.iteritems(10) while True: try: data = hiter.get_next() self.assertEqual(len(data), len(keys) + len(values)) self.assertEqual(data[1], 'someRandomText') except StopIteration: break
def test_simpletest(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Analyzes: ''' '''''' table = 'particle' nelems = 500 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, nelems): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid", "time"] values = ["x", "y", "z"] token_ranges = [] # empty configuration parameter (the last dictionary) means to use the default config table = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", token_ranges, keys, values, {}) def get_data(cache, keys): data = None try: data = cache.get_row(keys) self.assertEqual(len(data), len(values)) except KeyError: print 'not found' return data q1 = get_data(table, [433, 4330]) # float(0.003) lost = get_data(table, [133, 1330]) lost = get_data(table, [433, 4330]) q2 = get_data(table, [433, 4330]) self.assertEqual(q1, q2)
def _setup_hcache(self): key_names = [key["name"] for key in self._primary_keys] persistent_values = [{"name": col["name"]} for col in self._columns] if self._tokens is None: raise RuntimeError("Tokens for object {} are null".format(self._get_name())) self._hcache_params = (self._ksp, self._table, self.storage_id, self._tokens, key_names, persistent_values, {'cache_size': config.max_cache_size, 'writer_par': config.write_callbacks_number, 'writer_buffer': config.write_buffer_size, 'timestamped_writes': config.timestamped_writes}) log.debug("HCACHE params %s", self._hcache_params) self._hcache = Hcache(*self._hcache_params)
def test_write_nulls_simple(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Simple test to store text and retrieve it Analyzes: - HCache - Put_row (write data mixed with nulls) ''' '''''' table = "nulls" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(partid int PRIMARY KEY, time float, data text);" % (self.keyspace, table)) num_items = int(pow(10, 3)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (num_items / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid"] values = ["time", "data"] hcache_config = {'cache_size': '10', 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "", tokens, keys, values, hcache_config) for i in xrange(0, num_items): cache.put_row( [i], [12, None] ) # random.sample({i,None},1)+random.sample({'SomeRandomText',None},1)) time.sleep(10)
def make_persistent(self, name): if self._is_persistent: raise AlreadyPersistentError( "This StorageNumpy is already persistent [Before:{}.{}][After:{}]", self._ksp, self._table, name) self._is_persistent = True (self._ksp, self._table) = self._extract_ks_tab(name) if self._storage_id is None: self._storage_id = uuid.uuid3( uuid.NAMESPACE_DNS, self._ksp + '.' + self._table + '_numpies') self._build_args = self.args(self._storage_id, self._class_name, name) log.info("PERSISTING DATA INTO %s %s", self._ksp, self._table) query_keyspace = "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % ( self._ksp, config.replication) config.session.execute(query_keyspace) config.session.execute( 'CREATE TABLE IF NOT EXISTS ' + self._ksp + '.' + self._table + '_numpies' '(storage_id uuid , ' 'cluster_id int, ' 'block_id int, ' 'payload blob, ' 'PRIMARY KEY((storage_id,cluster_id),block_id))') self._hcache_params = (self._ksp, self._table + '_numpies', self._storage_id, [], ['storage_id', 'cluster_id', 'block_id'], [{ 'name': "payload", 'type': 'numpy' }], { 'cache_size': config.max_cache_size, 'writer_par': config.write_callbacks_number, 'write_buffer': config.write_buffer_size }) self._hcache = Hcache(*self._hcache_params) if len(self.shape) != 0: self._hcache.put_row([self._storage_id, -1, -1], [self]) self._store_meta(self._build_args)
def load_array(storage_id, name): (ksp, table) = IStorage._extract_ks_tab(name) _hcache_params = (ksp, table + '_numpies', storage_id, [], ['storage_id', 'cluster_id', 'block_id'], [{ 'name': "payload", 'type': 'numpy' }], { 'cache_size': config.max_cache_size, 'writer_par': config.write_callbacks_number, 'write_buffer': config.write_buffer_size }) _hcache = Hcache(*_hcache_params) result = _hcache.get_row([storage_id, -1, -1]) if len(result) == 1: return result[0] else: raise KeyError
def test_coherency(self): from hfetch import connectCassandra from hfetch import Hcache from hfetch import HWriter '''''' ''' Analyzes: - HCache ''' '''''' table = "particle" nparts = 10000 # Num particles in range self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort tkns = [] keys = ["partid", "time"] values = ["x", "y", "z"] cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys, values, { 'cache_size': '1', 'writer_buffer': 20 }) for i in xrange(0, nparts): cache.put_row([i, i / .1], [i / .2, i / .3, i / .4]) for i in reversed(xrange(0, nparts)): #xrange(nparts, -1, -1): try: cache.get_row([i, i / .1]) except KeyError: str_k = str([i, i / .1]) self.fail(str_k + " not found")
def write_test(self): from hfetch import connectCassandra from hfetch import Hcache from hfetch import HWriter '''''' ''' While the iterator retrieves the data from a table, the writer stores it into another table Analyzes: - HCache - HWriter - Iteritems (updating the cache) ''' '''''' table = "particle" table_write = "particle_write" nparts = 6000 # Num particles in range self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table_write)) for i in xrange(0, nparts): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort p = 1000 # Num partitions t_f = -7764607523034234880 # Token begin range # t_t = 5764607523034234880 # Token end range t_t = 7764607523034234880 # Token blocks tkn_size = (t_t - t_f) / (nparts / p) tkns = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid", "time"] values = ["x", "y", "z"] a = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys, values, { self.keyspace: '100', 'writer_buffer': 20 }) writer = HWriter(self.keyspace, table_write, keys, values, {'writer_buffer': 20}) def readAll(iter, wr): count = 1 while True: try: i = iter.get_next() except StopIteration: print 'End of data, items read: ', count, ' with value ', i break wr.write(i[0:2], i[2:5]) count += 1 if count % 100000 == 0: print count print "iter has %d elements" % count start = time.time() readAll(a.iteritems({ "prefetch_size": 100, "update_cache": "yes" }), writer) print "finshed into %d" % (time.time() - start)
def uuid_test(self): from hfetch import connectCassandra from hfetch import Hcache import uuid '''''' ''' This test check the correct handling of UUIDs Analyzes: - Hcache - Put_row - Iteritems ''' '''''' table = "uuid" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid uuid, data int, PRIMARY KEY(partid));" % (self.keyspace, table)) nelem = 1000 nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nelem / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid"] values = ["data"] # CREATE TABLE test.bulk(partid int PRIMARY KEY, data text); cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, { 'cache_size': '10', 'writer_buffer': 20 }) # Write data someid = None i = 0 while i < nelem: u = uuid.uuid4() # ('81da81e8-1914-11e7-908d-ecf4bb4c66c4') cache.put_row([u], [i]) if i == nelem / 2: someid = u i += 1 # by recreating the cache we wait until all the data is written cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, { 'cache_size': '10', 'writer_buffer': 20 }) # Read data itera = cache.iteritems(10) found = False counter = 0 while True: try: L = uuid.UUID(itera.get_next()[0]) if L == someid: found = True except StopIteration: break counter = counter + 1 self.assertEqual(counter, nelem) self.assertTrue(found)
def test_get_row_key_error(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test check the hcache sets a key error when the key we asked doesnt exist Analyzes: - Hcache - Get_row (returning KeyError) ''' '''''' table = 'particle' num_keys = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) token_ranges = [(8070430489100699999, 8070450532247928832)] non_existent_keys = 10 cache_size = num_keys + non_existent_keys try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, {'cache_size': cache_size}) # Access the cache, which is empty and queries cassandra to retrieve the data t1 = time.time() error_counter = 0 for pk in xrange(0, num_keys + non_existent_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: error_counter = error_counter + 1 print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format( unicode(str(num_keys), 'utf-8'), unicode(str(time.time() - t1), 'utf-8'), unicode(str(error_counter), 'utf-8'), unicode(str(non_existent_keys), 'utf-8')) self.assertEqual(error_counter, non_existent_keys) # Access the cache, which has already all the data and will ask cassandra only if # the keys asked are not present t1 = time.time() error_counter = 0 for pk in xrange(0, num_keys + non_existent_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: error_counter = error_counter + 1 print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format( unicode(str(num_keys), 'utf-8'), unicode(str(time.time() - t1), 'utf-8'), unicode(str(error_counter), 'utf-8'), unicode(str(non_existent_keys), 'utf-8')) self.assertEqual(error_counter, non_existent_keys)
def test_get_row(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a set of particles, performing get_row operations Analyzes: - HCache (multiple reads of the same key) - Get_row ''' '''''' table = 'particle' num_keys = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort token_ranges = [] cache_size = 10001 keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache_config = {'cache_size': cache_size} cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, cache_config) # clustering key t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - load C++ cache with cassandra data: ', time.time() - t1 t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] # print 'items in res: ',len(result) print 'time - read data from C++ cache: ', time.time() - t1 py_dict = {} cache = Hcache(self.keyspace, table, "", [(8070430489100699999, 8070450532247928832)], ["partid", "time"], ["ciao", "x", "y", "z"], {'cache_size': num_keys}) t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) py_dict[(pk, ck)] = result self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - load data into python dict: ', time.time() - t1 # print 'size ', len(py_dict) # print 'items in res: ',len(py_dict[1]) t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = py_dict[(pk, ck)] self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - read data from the python dict: ', time.time() - t1
t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nparts / p) tkns = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid", "time"] values = ["x"] hcache_config = {'cache_size': '100', 'writer_buffer': 20} token_query = "WHERE token(partid)>=? AND token(partid)<?;" cache = Hcache(self.keyspace, table, token_query, tkns, keys, values, hcache_config) hiter_config = {"prefetch_size": 100, "update_cache": "yes"} hiter = cache.iteritems(hiter_config) count = 0 start = time.time() while True: try: i = hiter.get_next() self.assertEqual(len(i), len(keys) + len(values)) except StopIteration: break count += 1
class Hfetch_Tests(unittest.TestCase): keyspace = "hnumpy_test" contact_names = ['127.0.0.1'] nodePort = 9042 cluster = Cluster(contact_names, port=nodePort) session = cluster.connect() @classmethod def setUpClass(cls): cls.session.execute( "CREATE KEYSPACE IF NOT EXISTS %s WITH replication " "= {'class': 'SimpleStrategy', 'replication_factor': 1};" % cls.keyspace) cls.session.execute( "CREATE TYPE IF NOT EXISTS %s.numpy_meta(dims frozen<list<int>>,type int,type_size int);" % cls.keyspace) @classmethod def tearDownClass(cls): #self.session.execute("DROP KEYSPACE IF EXISTS %s;" % cls.keyspace) pass def test_simple_memory(self): from hfetch import connectCassandra from hfetch import Hcache import numpy as np '''''' ''' Analyzes: ''' '''''' dims = 2 elem_dim = 4096 try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort table = "arrays_numpies" self.session.execute("DROP TABLE if exists %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));" % (self.keyspace, table)) storage_id = uuid.uuid3(uuid.NAMESPACE_DNS, self.keyspace + '.' + table) time.sleep(5) a = Hcache(self.keyspace, table, storage_id, [], ['storage_id', 'cluster_id', 'block_id'], [{ 'name': "payload", 'type': 'numpy' }], {}) #prepare data bigarr = np.arange(pow(elem_dim, dims)).reshape(elem_dim, elem_dim) print 'To be written ' keys = [storage_id, -1, -1] values = [bigarr.astype('i')] print values #insert a.put_row(keys, values) #delete is a blocking op which waits the data to be flushed del a a = Hcache(self.keyspace, table, storage_id, [], ["storage_id", 'cluster_id', 'block_id'], [{ "name": "payload", "type": "numpy" }], {}) #retrieve result = a.get_row(keys) print 'Retrieved from cassandra' print result if np.array_equal(bigarr, result[0]): print 'Created and retrieved are equal' else: self.fail('Created and retrieved ndarrays differ') self.session.execute("DROP TABLE %s.%s;" % (self.keyspace, table))
print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort table = "arrays_numpies" self.session.execute("DROP TABLE if exists %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));" % (self.keyspace, table)) storage_id = uuid.uuid3(uuid.NAMESPACE_DNS, self.keyspace + '.' + table) time.sleep(5) a = Hcache(self.keyspace, table, storage_id, [], ['storage_id', 'cluster_id', 'block_id'], [{ 'name': "payload", 'type': 'numpy' }], {}) #prepare data bigarr = np.arange(pow(elem_dim, dims)).reshape(elem_dim, elem_dim, elem_dim) keys = [storage_id, -1, -1] values = [bigarr.astype('i')] #insert a.put_row(keys, values) # othw we ask for the row before it has been processed time.sleep(2)
def make_persistent(self, name): """ Method to transform a StorageDict into a persistent object. This will make it use a persistent DB as the main location of its data. Args: name: """ if self._is_persistent: raise AlreadyPersistentError( "This StorageDict is already persistent [Before:{}.{}][After:{}]", self._ksp, self._table, name) self._is_persistent = True (self._ksp, self._table) = self._extract_ks_tab(name) if self._storage_id is None: self._storage_id = uuid.uuid3(uuid.NAMESPACE_DNS, self._ksp + '.' + self._table) self._build_args = self._build_args._replace( storage_id=self._storage_id, name=self._ksp + "." + self._table) self._store_meta(self._build_args) if config.id_create_schema == -1: query_keyspace = "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % ( self._ksp, config.replication) try: log.debug('MAKE PERSISTENCE: %s', query_keyspace) config.session.execute(query_keyspace) except Exception as ex: log.warn("Error creating the StorageDict keyspace %s, %s", (query_keyspace), ex) raise ex for key, value in dict.iteritems(self): if issubclass(value.__class__, IStorage): # new name as ksp+table+obj_class_name val_name = self._ksp + '.' + self._table + type( value).__name__.lower() value.make_persistent(val_name) columns = self._primary_keys + self._columns for ind, entry in enumerate(columns): n = StorageDict._other_case.match(entry[1]) if n is not None: iter_type, intra_type = n.groups() else: iter_type = entry[1] if iter_type not in IStorage._basic_types: columns[ind] = entry[0], 'uuid' pks = map(lambda a: a[0], self._primary_keys) query_table = "CREATE TABLE IF NOT EXISTS %s.%s (%s, PRIMARY KEY (%s));" \ % (self._ksp, self._table, ",".join("%s %s" % tup for tup in columns), str.join(',', pks)) try: log.debug('MAKE PERSISTENCE: %s', query_table) config.session.execute(query_table) except Exception as ex: log.warn("Error creating the StorageDict table: %s %s", query_table, ex) raise ex key_names = map(lambda a: a[0].encode('UTF8'), self._primary_keys) column_names = self._columns self._hcache_params = (self._ksp, self._table, self._storage_id, self._tokens, key_names, map(lambda x: { "name": x[0], "type": x[1] }, column_names), { 'cache_size': config.max_cache_size, 'writer_par': config.write_callbacks_number, 'write_buffer': config.write_buffer_size }) log.debug("HCACHE params %s", self._hcache_params) self._hcache = Hcache(*self._hcache_params) # Storing all in-memory values to cassandra for key, value in dict.iteritems(self): self._hcache.put_row(self._make_key(key), self._make_value(value)) if hasattr(self, '_indexed_args') and self._indexed_args is not None: index_query = 'CREATE CUSTOM INDEX IF NOT EXISTS ' + self._table + '_idx ON ' index_query += self._ksp + '.' + self._table + ' (' + str.join( ',', self._indexed_args) + ') ' index_query += "using 'es.bsc.qbeast.index.QbeastIndex';" try: config.session.execute(index_query) except Exception as ex: log.error("Error creating the Qbeast custom index: %s %s", index_query, ex) raise ex
def test_delete_row(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a set of particles, performing get_row operations Analyzes: - HCache - Get_row (setting TypeError properly) ''' '''''' table = 'particle' num_keys = 100 # num keys must be multiple of expected_errors expected_errors = 10 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort token_ranges = [] cache_size = 1 keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache_config = {'cache_size': cache_size} cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, cache_config) pk = 0 ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: self.fail("Error when retrieving value from cache: " + str(e) + " -- " + str([pk, ck])) try: result = cache.delete_row([pk, ck]) except KeyError as e: self.fail("Error when deleteing entry from cache: " + str(e) + " -- " + str([pk, ck])) try: result = cache.get_row([pk, ck]) self.fail( "Error when retrieving value from cache, the entry shouldnt exist" ) except KeyError as e: pass
def test_iterators(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over some text and check coherency between hcache and hiter Analyzes: - HCache - Get_row (read text) - Iteritems (read text) ''' '''''' table = "words" num_keys = 20 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(position int PRIMARY KEY, wordinfo text);" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [ i, "'someRandomTextForTesting purposes - " + str(i * 60) + "'" ]) self.session.execute( "INSERT INTO %s.%s(position , wordinfo ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort tkns = [(pow(-2, 63) + 1, pow(2, 63) - 1)] keys = ["position"] values = ["wordinfo"] hcache_config = {'cache_size': 100, 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "WHERE token(position)>=? AND token(position)<?;", tkns, keys, values, hcache_config) iter_config = {"prefetch_size": 100, "update_cache": "yes"} myIter = cache.iteritems(iter_config) data = [] for i in xrange(0, 10): data.append(myIter.get_next()) assert (len(data) > 0) first_data = data[0] assert (len(first_data) == 2) first_key = [first_data[0]] assert (type(first_key[0]) == int) somedata = cache.get_row(first_key) # self.assertEqual((first_key + somedata), first_data) assert ((first_key + somedata) == first_data) count = len(data) while True: try: i = myIter.get_next() except StopIteration: print 'End of data, items read: ', count, ' with value ', i break count = count + 1 print 'data was: \n', data