def test_part(self): from hfetch import connectCassandra from hfetch import Hcache import numpy as np '''''' ''' Analyzes: ''' '''''' dims = 2 elem_dim = 128 table = "arrays_numpies" self.session.execute("DROP TABLE if exists %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));" % (self.keyspace, table)) storage_id = uuid.uuid3(uuid.NAMESPACE_DNS, self.keyspace + '.' + table) try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
def test_iterate_brute(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a huge amount of data and checks no data is lost Analyzes: - HCache - Iteritems from HCache - Updates the HCache with the prefetched data (iteritems) ''' '''''' table = "particle" nparts = 10000 # Num particles in range self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, nparts): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
def test_small_brute(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a small amount of data using an iterkeys and validates that no column name can be a key and value at the same time Analyzes: - HCache (enforce column can't be key and value at the same time) - Iterkeys ''' '''''' table = "particle" nelems = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, nelems): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 100 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nelems / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] hcache_config = {'cache_size': '10', 'writer_buffer': 20} keys = ["partid", "time"] values = ["time", "x"] cache = None # this should fail since a key can not be a column name at the same time (key=time, column=time) try: cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, hcache_config) except RuntimeError, e: self.assertTrue(True, e)
def test_put_row_text(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Simple test to store text and retrieve it Analyzes: - HCache - Put_row (write text) - Iteritems (read text) ''' '''''' table = "bulk" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(partid int PRIMARY KEY, data text);" % (self.keyspace, table)) num_items = int(pow(10, 3)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (num_items / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid"] values = ["data"] hcache_config = {'cache_size': '10', 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "", tokens, keys, values, hcache_config) for i in xrange(0, num_items): cache.put_row([i], ['someRandomText']) # it doesnt make sense to count the read elements # because the data is still being written async hiter = cache.iteritems(10) while True: try: data = hiter.get_next() self.assertEqual(len(data), len(keys) + len(values)) self.assertEqual(data[1], 'someRandomText') except StopIteration: break
def test_simpletest(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Analyzes: ''' '''''' table = 'particle' nelems = 500 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, nelems): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid", "time"] values = ["x", "y", "z"] token_ranges = [] # empty configuration parameter (the last dictionary) means to use the default config table = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", token_ranges, keys, values, {}) def get_data(cache, keys): data = None try: data = cache.get_row(keys) self.assertEqual(len(data), len(values)) except KeyError: print 'not found' return data q1 = get_data(table, [433, 4330]) # float(0.003) lost = get_data(table, [133, 1330]) lost = get_data(table, [433, 4330]) q2 = get_data(table, [433, 4330]) self.assertEqual(q1, q2)
def test_connection(self): from hfetch import connectCassandra # Test behaviour when NodePort is None (should return TypeError) test_contact_names = [] test_node_port = None fails = False try: connectCassandra(test_contact_names, test_node_port) except TypeError: fails = True except Exception, e: self.fail(e.message)
def test_multidim(self): from hfetch import connectCassandra from hfetch import Hcache import numpy as np '''''' ''' Analyzes: ''' '''''' dims = 3 elem_dim = 5 try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
def test_write_nulls_simple(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' Simple test to store text and retrieve it Analyzes: - HCache - Put_row (write data mixed with nulls) ''' '''''' table = "nulls" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(partid int PRIMARY KEY, time float, data text);" % (self.keyspace, table)) num_items = int(pow(10, 3)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (num_items / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid"] values = ["time", "data"] hcache_config = {'cache_size': '10', 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "", tokens, keys, values, hcache_config) for i in xrange(0, num_items): cache.put_row( [i], [12, None] ) # random.sample({i,None},1)+random.sample({'SomeRandomText',None},1)) time.sleep(10)
def test_nopart(self): from hfetch import connectCassandra from hfetch import Hcache import numpy as np '''''' ''' Analyzes: ''' '''''' elem_dim = 128 dims = 2 table = "arrays_numpies" try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
def test_coherency(self): from hfetch import connectCassandra from hfetch import Hcache from hfetch import HWriter '''''' ''' Analyzes: - HCache ''' '''''' table = "particle" nparts = 10000 # Num particles in range self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort tkns = [] keys = ["partid", "time"] values = ["x", "y", "z"] cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys, values, { 'cache_size': '1', 'writer_buffer': 20 }) for i in xrange(0, nparts): cache.put_row([i, i / .1], [i / .2, i / .3, i / .4]) for i in reversed(xrange(0, nparts)): #xrange(nparts, -1, -1): try: cache.get_row([i, i / .1]) except KeyError: str_k = str([i, i / .1]) self.fail(str_k + " not found")
def test_arr_put_get(self): from hfetch import connectCassandra from hfetch import Hcache import numpy as np '''''' ''' Running arr_put_get test Analyzes: ''' '''''' dims = 2 elem_dim = 128 table = "arrays_numpies" print 'Dimensions: ', dims, ' Element in each dim: ', elem_dim try: connectCassandra(self.contact_names, self.nodePort) except RuntimeError, e: print e print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
class Hfetch_Tests(unittest.TestCase): keyspace = "hfetch_test" contact_names = ['127.0.0.1'] nodePort = 9042 cluster = Cluster(contact_names, port=nodePort) session = cluster.connect() @classmethod def setUpClass(cls): cls.session.execute( "CREATE KEYSPACE IF NOT EXISTS %s WITH replication " "= {'class': 'SimpleStrategy', 'replication_factor': 1};" % cls.keyspace) @classmethod def tearDownClass(cls): # self.session.execute("DROP KEYSPACE IF EXISTS %s;" % cls.keyspace) pass def test_connection(self): from hfetch import connectCassandra # Test behaviour when NodePort is None (should return TypeError) test_contact_names = [] test_node_port = None fails = False try: connectCassandra(test_contact_names, test_node_port) except TypeError: fails = True except Exception, e: self.fail(e.message) self.assertTrue(fails) fails = False # Test behaviour when contact_names is an empty text (should return ValueError) test_node_port = self.nodePort test_contact_names = [123456789] try: connectCassandra(test_contact_names, test_node_port) except TypeError: fails = True except Exception, e: self.fail(e.message)
def test_iterators(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over some text and check coherency between hcache and hiter Analyzes: - HCache - Get_row (read text) - Iteritems (read text) ''' '''''' table = "words" num_keys = 20 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE %s.%s(position int PRIMARY KEY, wordinfo text);" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [ i, "'someRandomTextForTesting purposes - " + str(i * 60) + "'" ]) self.session.execute( "INSERT INTO %s.%s(position , wordinfo ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort tkns = [(pow(-2, 63) + 1, pow(2, 63) - 1)] keys = ["position"] values = ["wordinfo"] hcache_config = {'cache_size': 100, 'writer_buffer': 20} cache = Hcache(self.keyspace, table, "WHERE token(position)>=? AND token(position)<?;", tkns, keys, values, hcache_config) iter_config = {"prefetch_size": 100, "update_cache": "yes"} myIter = cache.iteritems(iter_config) data = [] for i in xrange(0, 10): data.append(myIter.get_next()) assert (len(data) > 0) first_data = data[0] assert (len(first_data) == 2) first_key = [first_data[0]] assert (type(first_key[0]) == int) somedata = cache.get_row(first_key) # self.assertEqual((first_key + somedata), first_data) assert ((first_key + somedata) == first_data) count = len(data) while True: try: i = myIter.get_next() except StopIteration: print 'End of data, items read: ', count, ' with value ', i break count = count + 1 print 'data was: \n', data
def write_test(self): from hfetch import connectCassandra from hfetch import Hcache from hfetch import HWriter '''''' ''' While the iterator retrieves the data from a table, the writer stores it into another table Analyzes: - HCache - HWriter - Iteritems (updating the cache) ''' '''''' table = "particle" table_write = "particle_write" nparts = 6000 # Num particles in range self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table_write)) for i in xrange(0, nparts): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort p = 1000 # Num partitions t_f = -7764607523034234880 # Token begin range # t_t = 5764607523034234880 # Token end range t_t = 7764607523034234880 # Token blocks tkn_size = (t_t - t_f) / (nparts / p) tkns = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["partid", "time"] values = ["x", "y", "z"] a = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys, values, { self.keyspace: '100', 'writer_buffer': 20 }) writer = HWriter(self.keyspace, table_write, keys, values, {'writer_buffer': 20}) def readAll(iter, wr): count = 1 while True: try: i = iter.get_next() except StopIteration: print 'End of data, items read: ', count, ' with value ', i break wr.write(i[0:2], i[2:5]) count += 1 if count % 100000 == 0: print count print "iter has %d elements" % count start = time.time() readAll(a.iteritems({ "prefetch_size": 100, "update_cache": "yes" }), writer) print "finshed into %d" % (time.time() - start)
def words_test_hiter(self): from hfetch import connectCassandra from hfetch import HIterator import random import string ''' This test iterates over huge lines of text and verifies the correct behaviour of HIterator By default it acts as an iteritems Analyzes: - HIterator - Iteritems ''' table = "words" nelems = 2000 length_row = 100 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(position int, wordinfo text, PRIMARY KEY(position));" % (self.keyspace, table)) for i in xrange(0, nelems): vals = ','.join([ str(i), "'" + ''.join( random.choice(string.ascii_uppercase + string.ascii_lowercase + " " + string.digits) for _ in range(length_row)) + "'" ]) self.session.execute( "INSERT INTO %s.%s(position,wordinfo) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort nelem = 10 nblocks = 2 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nelem / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] keys = ["position"] values = ["wordinfo"] hiter_config = {'prefetch_size': '100', 'writer_buffer': 20} itera = HIterator(self.keyspace, table, tokens, keys, values, hiter_config) while True: try: data = itera.get_next() except StopIteration: break
def uuid_test(self): from hfetch import connectCassandra from hfetch import Hcache import uuid '''''' ''' This test check the correct handling of UUIDs Analyzes: - Hcache - Put_row - Iteritems ''' '''''' table = "uuid" self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid uuid, data int, PRIMARY KEY(partid));" % (self.keyspace, table)) nelem = 1000 nblocks = 10 t_f = pow(-2, 63) # Token begin range t_t = pow(2, 63) - 1 # Token blocks tkn_size = (t_t - t_f) / (nelem / nblocks) tokens = [(a, a + tkn_size) for a in xrange(t_f, t_t - tkn_size, tkn_size)] try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid"] values = ["data"] # CREATE TABLE test.bulk(partid int PRIMARY KEY, data text); cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, { 'cache_size': '10', 'writer_buffer': 20 }) # Write data someid = None i = 0 while i < nelem: u = uuid.uuid4() # ('81da81e8-1914-11e7-908d-ecf4bb4c66c4') cache.put_row([u], [i]) if i == nelem / 2: someid = u i += 1 # by recreating the cache we wait until all the data is written cache = Hcache(self.keyspace, table, "WHERE token(partid)>=? AND token(partid)<?;", tokens, keys, values, { 'cache_size': '10', 'writer_buffer': 20 }) # Read data itera = cache.iteritems(10) found = False counter = 0 while True: try: L = uuid.UUID(itera.get_next()[0]) if L == someid: found = True except StopIteration: break counter = counter + 1 self.assertEqual(counter, nelem) self.assertTrue(found)
def test_get_row_key_error(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test check the hcache sets a key error when the key we asked doesnt exist Analyzes: - Hcache - Get_row (returning KeyError) ''' '''''' table = 'particle' num_keys = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) token_ranges = [(8070430489100699999, 8070450532247928832)] non_existent_keys = 10 cache_size = num_keys + non_existent_keys try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, {'cache_size': cache_size}) # Access the cache, which is empty and queries cassandra to retrieve the data t1 = time.time() error_counter = 0 for pk in xrange(0, num_keys + non_existent_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: error_counter = error_counter + 1 print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format( unicode(str(num_keys), 'utf-8'), unicode(str(time.time() - t1), 'utf-8'), unicode(str(error_counter), 'utf-8'), unicode(str(non_existent_keys), 'utf-8')) self.assertEqual(error_counter, non_existent_keys) # Access the cache, which has already all the data and will ask cassandra only if # the keys asked are not present t1 = time.time() error_counter = 0 for pk in xrange(0, num_keys + non_existent_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: error_counter = error_counter + 1 print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format( unicode(str(num_keys), 'utf-8'), unicode(str(time.time() - t1), 'utf-8'), unicode(str(error_counter), 'utf-8'), unicode(str(non_existent_keys), 'utf-8')) self.assertEqual(error_counter, non_existent_keys)
def __init__(self): singleton = Config.instance if singleton.configured: log.info('setting down') return singleton.configured = True if 'HECUBA_ARROW' in os.environ: env_var = os.environ['HECUBA_ARROW'].lower() singleton.arrow_enabled = False if env_var == 'no' or env_var == 'false' else True log.info('HECUBA_ARROW: {}'.format(singleton.arrow_enabled)) else: singleton.arrow_enabled = False log.warn('Arrow access is DISABLED [HECUBA_ARROW=%s]', singleton.arrow_enabled) if 'CONCURRENT_CREATION' in os.environ: if os.environ['CONCURRENT_CREATION']=='True': singleton.concurrent_creation = True else: singleton.concurrent_creation = False log.info('CONCURRENT_CREATION: %s', str(singleton.concurrent_creation)) else: singleton.concurrent_creation = False log.warn('Concurrent creation is DISABLED [CONCURRENT_CREATION=False]') if 'LOAD_ON_DEMAND' in os.environ: if os.environ['LOAD_ON_DEMAND']=='False': singleton.load_on_demand = False else: singleton.load_on_demand = True log.info('LOAD_ON_DEMAND: %s', str(singleton.load_on_demand)) else: singleton.load_on_demand = True log.warn('Load data on demand is ENABLED [LOAD_ON_DEMAND=True]') if 'CREATE_SCHEMA' in os.environ: env_var = os.environ['CREATE_SCHEMA'].lower() singleton.id_create_schema = False if env_var == 'no' or env_var == 'false' else True log.info('CREATE_SCHEMA: %d', singleton.id_create_schema) else: singleton.id_create_schema = True log.warn('Creating keyspaces and tables by default [CREATE_SCHEMA=True]') try: singleton.nodePort = int(os.environ['NODE_PORT']) log.info('NODE_PORT: %d', singleton.nodePort) except KeyError: log.warn('using default NODE_PORT 9042') singleton.nodePort = 9042 try: singleton.contact_names = os.environ['CONTACT_NAMES'].split(",") log.info('CONTACT_NAMES: %s', str.join(" ", singleton.contact_names)) # Convert node names to ips if needed import socket contact_names_ips = [] show_translation = False for h_name in singleton.contact_names: IP_addres = socket.gethostbyname(h_name) if (IP_addres != h_name): show_translation=True contact_names_ips.append(IP_addres) singleton.contact_names = contact_names_ips if show_translation: log.info('CONTACT_NAMES: %s', str.join(" ", singleton.contact_names)) except KeyError: log.warn('using default contact point localhost') singleton.contact_names = ['127.0.0.1'] if hasattr(singleton, 'session'): log.warn('Shutting down pre-existent sessions and cluster') try: singleton.session.shutdown() singleton.cluster.shutdown() except Exception: log.warn('error shutting down') try: singleton.replication_factor = int(os.environ['REPLICA_FACTOR']) log.info('REPLICA_FACTOR: %d', singleton.replication_factor) except KeyError: singleton.replication_factor = 1 log.warn('using default REPLICA_FACTOR: %d', singleton.replication_factor) try: user_defined_execution_name = os.environ['EXECUTION_NAME'] if user_defined_execution_name == 'hecuba': raise RuntimeError('Error: the application keyspace cannot be \'hecuba\'. ' 'This keyspace is reserved for storing metadata.') singleton.execution_name = user_defined_execution_name log.info('EXECUTION_NAME: %s', singleton.execution_name) except KeyError: singleton.execution_name = 'my_app' log.warn('using default EXECUTION_NAME: %s', singleton.execution_name) try: singleton.splits_per_node = int(os.environ['SPLITS_PER_NODE']) log.info('SPLITS_PER_NODE: %d', singleton.splits_per_node) except KeyError: singleton.splits_per_node = 32 log.warn('using default SPLITS_PER_NODE: %d', singleton.splits_per_node) try: singleton.token_range_size = int(os.environ['TOKEN_RANGE_SIZE']) log.info('TOKEN_RANGE_SIZE: %d', singleton.token_range_size) singleton.target_token_range_size = None except KeyError: singleton.token_range_size = None try: singleton.target_token_range_size = int(os.environ['TARGET_TOKEN_RANGE_SIZE']) log.info('TARGET_TOKEN_RANGE_SIZE: %d', singleton.target_token_range_size) except KeyError: singleton.target_token_range_size = 64 * 1024 log.warn('using default TARGET_TOKEN_RANGE_SIZE: %d', singleton.target_token_range_size) try: singleton.max_cache_size = int(os.environ['MAX_CACHE_SIZE']) log.info('MAX_CACHE_SIZE: %d', singleton.max_cache_size) except KeyError: singleton.max_cache_size = 1000 log.warn('using default MAX_CACHE_SIZE: %d', singleton.max_cache_size) try: singleton.replication_strategy = os.environ['REPLICATION_STRATEGY'] log.info('REPLICATION_STRATEGY: %s', singleton.replication_strategy) except KeyError: singleton.replication_strategy = "SimpleStrategy" log.warn('using default REPLICATION_STRATEGY: %s', singleton.replication_strategy) try: singleton.replication_strategy_options = os.environ['REPLICATION_STRATEGY_OPTIONS'] log.info('REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options) except KeyError: singleton.replication_strategy_options = "" log.warn('using default REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options) if singleton.replication_strategy == "SimpleStrategy": singleton.replication = "{'class' : 'SimpleStrategy', 'replication_factor': %d}" % \ singleton.replication_factor else: singleton.replication = "{'class' : '%s', %s}" % ( singleton.replication_strategy, singleton.replication_strategy_options) try: singleton.hecuba_print_limit = int(os.environ['HECUBA_PRINT_LIMIT']) log.info('HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit) except KeyError: singleton.hecuba_print_limit = 1000 log.warn('using default HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit) try: singleton.prefetch_size = int(os.environ['PREFETCH_SIZE']) log.info('PREFETCH_SIZE: %s', singleton.prefetch_size) except KeyError: singleton.prefetch_size = 10000 log.warn('using default PREFETCH_SIZE: %s', singleton.prefetch_size) try: singleton.write_buffer_size = int(os.environ['WRITE_BUFFER_SIZE']) log.info('WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size) except KeyError: singleton.write_buffer_size = 1000 log.warn('using default WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size) try: singleton.write_callbacks_number = int(os.environ['WRITE_CALLBACKS_NUMBER']) log.info('WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number) except KeyError: singleton.write_callbacks_number = 16 log.warn('using default WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number) try: env_var = os.environ['TIMESTAMPED_WRITES'].lower() singleton.timestamped_writes = False if env_var == 'no' or env_var == 'false' else True log.info('TIMESTAMPED WRITES ENABLED? {}'.format(singleton.timestamped_writes)) except KeyError: singleton.timestamped_writes = False log.warn('using default TIMESTAMPED_WRITES: %s', singleton.timestamped_writes) if singleton.max_cache_size < singleton.write_buffer_size: import warnings message = "Defining a MAX_CACHE_SIZE smaller than WRITE_BUFFER_SIZE can result " \ "in reading outdated results from the persistent storage" warnings.warn(message) log.info('Initializing global session') singleton.cluster = Cluster(contact_points=singleton.contact_names, load_balancing_policy=TokenAwarePolicy(RoundRobinPolicy()), port=singleton.nodePort, default_retry_policy=_NRetry(5)) singleton.session = singleton.cluster.connect() singleton.session.encoder.mapping[tuple] = singleton.session.encoder.cql_encode_tuple if singleton.concurrent_creation: configure_lock=[ """CREATE KEYSPACE IF NOT EXISTS hecuba_locks WITH replication= {'class': 'SimpleStrategy', 'replication_factor': 1}; """, """CREATE TABLE IF NOT EXISTS hecuba_locks.table_lock (table_name text, PRIMARY KEY (table_name)); """, "TRUNCATE table hecuba_locks.table_lock;" ] for query in configure_lock: try: self.instance.session.execute(query) except Exception as e: log.error("Error executing query %s" % query) raise e singleton._query_to_lock=singleton.session.prepare("INSERT into hecuba_locks.table_lock (table_name) values (?) if not exists;") if singleton.id_create_schema: queries = [ "CREATE KEYSPACE IF NOT EXISTS hecuba WITH replication = %s" % singleton.replication, """CREATE TYPE IF NOT EXISTS hecuba.q_meta( mem_filter text, from_point frozen<list<double>>, to_point frozen<list<double>>, precision float); """, """CREATE TYPE IF NOT EXISTS hecuba.np_meta (flags int, elem_size int, partition_type tinyint, dims list<int>, strides list<int>, typekind text, byteorder text)""", """CREATE TABLE IF NOT EXISTS hecuba .istorage (storage_id uuid, class_name text,name text, istorage_props map<text,text>, tokens list<frozen<tuple<bigint,bigint>>>, indexed_on list<text>, qbeast_random text, qbeast_meta frozen<q_meta>, numpy_meta frozen<np_meta>, block_id int, base_numpy uuid, view_serialization blob, primary_keys list<frozen<tuple<text,text>>>, columns list<frozen<tuple<text,text>>>, PRIMARY KEY(storage_id)); """, "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % (singleton.execution_name, singleton.replication)] for query in queries: try: self.executelocked(query) except Exception as e: log.error("Error executing query %s" % query) raise e from hfetch import connectCassandra, HArrayMetadata # connecting c++ bindings connectCassandra(singleton.contact_names, singleton.nodePort) if singleton.id_create_schema: time.sleep(10) singleton.cluster.register_user_type('hecuba', 'np_meta', HArrayMetadata)
def test_get_row(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a set of particles, performing get_row operations Analyzes: - HCache (multiple reads of the same key) - Get_row ''' '''''' table = 'particle' num_keys = 10001 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort token_ranges = [] cache_size = 10001 keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache_config = {'cache_size': cache_size} cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, cache_config) # clustering key t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - load C++ cache with cassandra data: ', time.time() - t1 t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] # print 'items in res: ',len(result) print 'time - read data from C++ cache: ', time.time() - t1 py_dict = {} cache = Hcache(self.keyspace, table, "", [(8070430489100699999, 8070450532247928832)], ["partid", "time"], ["ciao", "x", "y", "z"], {'cache_size': num_keys}) t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = cache.get_row([pk, ck]) py_dict[(pk, ck)] = result self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - load data into python dict: ', time.time() - t1 # print 'size ', len(py_dict) # print 'items in res: ',len(py_dict[1]) t1 = time.time() for pk in xrange(0, num_keys): ck = pk * 10 try: result = py_dict[(pk, ck)] self.assertEqual(len(result), len(values)) except KeyError as e: print "Error when retrieving value from cache:", e, [pk, ck] print 'time - read data from the python dict: ', time.time() - t1
test_contact_names = [123456789] try: connectCassandra(test_contact_names, test_node_port) except TypeError: fails = True except Exception, e: self.fail(e.message) self.assertTrue(fails) fails = False # Test behaviour when contact_names is an empty text (should return ValueError) test_node_port = self.nodePort test_contact_names = [''] try: connectCassandra(test_contact_names, test_node_port) except ValueError: fails = True except Exception, e: self.fail(e.message) self.assertTrue(fails) fails = False # if no contact point specified, connects to 127.0.0.1 try: self.contact_names.index( '127.0.0.1') # raises value error if not present test_contact_names = [] connectCassandra(test_contact_names, test_node_port) except ValueError:
def test_delete_row(self): from hfetch import connectCassandra from hfetch import Hcache '''''' ''' This test iterates over a set of particles, performing get_row operations Analyzes: - HCache - Get_row (setting TypeError properly) ''' '''''' table = 'particle' num_keys = 100 # num keys must be multiple of expected_errors expected_errors = 10 self.session.execute("DROP TABLE IF EXISTS %s.%s;" % (self.keyspace, table)) self.session.execute( "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text," "x float, y float, z float, PRIMARY KEY(partid,time));" % (self.keyspace, table)) for i in xrange(0, num_keys): vals = ','.join( str(e) for e in [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"]) self.session.execute( "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)" % (self.keyspace, table, vals)) try: connectCassandra(self.contact_names, self.nodePort) except Exception: print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort token_ranges = [] cache_size = 1 keys = ["partid", "time"] values = ["ciao", "x", "y", "z"] cache_config = {'cache_size': cache_size} cache = Hcache(self.keyspace, table, "", token_ranges, keys, values, cache_config) pk = 0 ck = pk * 10 try: result = cache.get_row([pk, ck]) self.assertEqual(len(result), len(values)) except KeyError as e: self.fail("Error when retrieving value from cache: " + str(e) + " -- " + str([pk, ck])) try: result = cache.delete_row([pk, ck]) except KeyError as e: self.fail("Error when deleteing entry from cache: " + str(e) + " -- " + str([pk, ck])) try: result = cache.get_row([pk, ck]) self.fail( "Error when retrieving value from cache, the entry shouldnt exist" ) except KeyError as e: pass
def reset(mock_cassandra=False): singleton = Config.instance if singleton.configured and singleton.mock_cassandra == mock_cassandra: log.info('setting down') return singleton.mock_cassandra = mock_cassandra log.info('setting up configuration with mock_cassandra = %s', mock_cassandra) singleton.configured = True if 'CREATE_SCHEMA' in os.environ: singleton.id_create_schema = int(os.environ['CREATE_SCHEMA']) else: singleton.id_create_schema = -1 if mock_cassandra: log.info('configuring mock environment') else: log.info('configuring production environment') try: singleton.nodePort = int(os.environ['NODE_PORT']) log.info('NODE_PORT: %d', singleton.nodePort) except KeyError: log.warn('using default NODE_PORT 9042') singleton.nodePort = 9042 try: singleton.contact_names = os.environ['CONTACT_NAMES'].split(",") log.info('CONTACT_NAMES: %s', str.join(" ", singleton.contact_names)) except KeyError: log.warn('using default contact point localhost') singleton.contact_names = ['127.0.0.1'] if hasattr(singleton, 'session'): log.warn('Shutting down pre-existent sessions and cluster') try: singleton.session.shutdown() singleton.cluster.shutdown() except _: log.warn('error shutting down') try: singleton.replication_factor = int(os.environ['REPLICA_FACTOR']) log.info('REPLICA_FACTOR: %d', singleton.replication_factor) except KeyError: singleton.replication_factor = 1 log.warn('using default REPLICA_FACTOR: %d', singleton.replication_factor) try: user_defined_execution_name = os.environ['EXECUTION_NAME'] if user_defined_execution_name == 'hecuba': raise RuntimeError( 'Error: the application keyspace cannot be \'hecuba\'. ' 'This keyspace is reserved for storing metadata.') singleton.execution_name = user_defined_execution_name log.info('EXECUTION_NAME: %s', singleton.execution_name) except KeyError: singleton.execution_name = 'my_app' log.warn('using default EXECUTION_NAME: %s', singleton.execution_name) try: singleton.number_of_partitions = int( os.environ['NUMBER_OF_BLOCKS']) log.info('NUMBER_OF_BLOCKS: %d', singleton.number_of_partitions) except KeyError: singleton.number_of_partitions = 32 log.warn('using default NUMBER_OF_BLOCKS: %d', singleton.number_of_partitions) try: singleton.min_number_of_tokens = int( os.environ['MIN_NUMBER_OF_TOKENS']) log.info('MIN_NUMBER_OF_TOKENS: %d', singleton.min_number_of_tokens) except KeyError: singleton.min_number_of_tokens = 1024 log.warn('using default MIN_NUMBER_OF_TOKENS: %d', singleton.min_number_of_tokens) try: singleton.max_cache_size = int(os.environ['MAX_CACHE_SIZE']) log.info('MAX_CACHE_SIZE: %d', singleton.max_cache_size) except KeyError: singleton.max_cache_size = 100 log.warn('using default MAX_CACHE_SIZE: %d', singleton.max_cache_size) try: singleton.replication_strategy = os.environ['REPLICATION_STRATEGY'] log.info('REPLICATION_STRATEGY: %s', singleton.replication_strategy) except KeyError: singleton.replication_strategy = "SimpleStrategy" log.warn('using default REPLICATION_STRATEGY: %s', singleton.replication_strategy) try: singleton.replication_strategy_options = os.environ[ 'REPLICATION_STRATEGY_OPTIONS'] log.info('REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options) except KeyError: singleton.replication_strategy_options = "" log.warn('using default REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options) if singleton.replication_strategy is "SimpleStrategy": singleton.replication = "{'class' : 'SimpleStrategy', 'replication_factor': %d}" % \ singleton.replication_factor else: singleton.replication = "{'class' : '%s', %s}" % ( singleton.replication_strategy, singleton.replication_strategy_options) try: singleton.hecuba_print_limit = int( os.environ['HECUBA_PRINT_LIMIT']) log.info('HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit) except KeyError: singleton.hecuba_print_limit = 1000 log.warn('using default HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit) try: singleton.hecuba_type_checking = os.environ[ 'HECUBA_TYPE_CHECKING'].lower() == 'true' log.info('HECUBA_TYPE_CHECKING: %s', singleton.hecuba_type_checking) except KeyError: singleton.hecuba_type_checking = False log.warn('using default HECUBA_TYPE_CHECKING: %s', singleton.hecuba_type_checking) try: singleton.hecuba_type_checking = os.environ[ 'HECUBA_TYPE_CHECKING'].lower() == 'true' log.info('HECUBA_TYPE_CHECKING: %s', singleton.hecuba_type_checking) except KeyError: singleton.hecuba_type_checking = False log.warn('using default HECUBA_TYPE_CHECKING: %s', singleton.hecuba_type_checking) try: singleton.prefetch_size = int(os.environ['PREFETCH_SIZE']) log.info('PREFETCH_SIZE: %s', singleton.prefetch_size) except KeyError: singleton.prefetch_size = 10000 log.warn('using default PREFETCH_SIZE: %s', singleton.prefetch_size) try: singleton.write_buffer_size = int(os.environ['WRITE_BUFFER_SIZE']) log.info('WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size) except KeyError: singleton.write_buffer_size = 1000 log.warn('using default WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size) try: singleton.write_callbacks_number = int( os.environ['WRITE_CALLBACKS_NUMBER']) log.info('WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number) except KeyError: singleton.write_callbacks_number = 16 log.warn('using default WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number) try: singleton.qbeast_master_port = int( os.environ['QBEAST_MASTER_PORT']) log.info('QBEAST_MASTER_PORT: %d', singleton.qbeast_master_port) except KeyError: log.warn('using default qbeast master port 2600') singleton.qbeast_master_port = 2600 try: singleton.qbeast_worker_port = int( os.environ['QBEAST_WORKER_PORT']) log.info('QBEAST_WORKER_PORT: %d', singleton.qbeast_worker_port) except KeyError: log.warn('using default qbeast worker port 2688') singleton.qbeast_worker_port = 2688 try: singleton.qbeast_entry_node = os.environ[ 'QBEAST_ENTRY_NODE'].split(",") log.info('QBEAST_ENTRY_NODE: %s', singleton.qbeast_entry_node) except KeyError: log.warn('using default qbeast entry node localhost') import socket singleton.qbeast_entry_node = [socket.gethostname()] try: singleton.qbeast_max_results = int( os.environ['QBEAST_MAX_RESULTS'].split(",")) log.info('QBEAST_MAX_RESULTS: %d', singleton.qbeast_max_results) except KeyError: log.warn('using default qbeast max results 10000000') singleton.qbeast_max_results = 10000000 try: singleton.qbeast_return_at_least = int( os.environ['RETURN_AT_LEAST'].split(",")) log.info('QBEAST_RETURN_AT_LEAST: %d', singleton.qbeast_return_at_least) except KeyError: log.warn('using default qbeast return at least 100') singleton.qbeast_return_at_least = 100 try: singleton.qbeast_read_max = int(os.environ['READ_MAX'].split(",")) log.info('QBEAST_READ_MAX: %d', singleton.qbeast_read_max) except KeyError: log.warn('using default qbeast read max 10000') singleton.qbeast_read_max = 10000 if mock_cassandra: class clusterMock: def __init__(self): from cassandra.metadata import Metadata self.metadata = Metadata() self.metadata.rebuild_token_map("Murmur3Partitioner", {}) class sessionMock: def execute(self, *args, **kwargs): log.info('called mock.session') return [] def prepare(self, *args, **kwargs): return self def bind(self, *args, **kwargs): return self singleton.cluster = clusterMock() singleton.session = sessionMock() else: log.info('Initializing global session') try: singleton.cluster = Cluster( contact_points=singleton.contact_names, port=singleton.nodePort, default_retry_policy=_NRetry(5)) singleton.session = singleton.cluster.connect() singleton.session.encoder.mapping[ tuple] = singleton.session.encoder.cql_encode_tuple from hfetch import connectCassandra # connecting c++ bindings connectCassandra(singleton.contact_names, singleton.nodePort) if singleton.id_create_schema == -1: singleton.session.execute( "CREATE KEYSPACE IF NOT EXISTS %s WITH REPLICATION = %s" % (singleton.execution_name, singleton.replication)) singleton.session.execute( ('CREATE KEYSPACE IF NOT EXISTS hecuba' + " WITH replication = %s" % singleton.replication)) singleton.session.execute( 'CREATE TYPE IF NOT EXISTS hecuba.q_meta(' 'mem_filter text, ' 'from_point frozen < list < float >>,' 'to_point frozen < list < float >>,' 'precision float)') singleton.session.execute( 'CREATE TABLE IF NOT EXISTS hecuba' + '.istorage (storage_id uuid, ' 'class_name text,name text, ' 'istorage_props map<text,text>, ' 'tokens list<frozen<tuple<bigint,bigint>>>,' 'indexed_on list<text>,' 'entry_point text,' 'qbeast_id uuid,' 'qbeast_meta q_meta,' 'primary_keys list<frozen<tuple<text,text>>>,' 'columns list<frozen<tuple<text,text>>>,' 'PRIMARY KEY(storage_id))') except Exception as e: log.error( 'Exception creating cluster session. Are you in a testing env? %s', e)