Exemplo n.º 1
0
    def test_part(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import numpy as np
        '''''' '''
        
        Analyzes:
        
        ''' ''''''

        dims = 2
        elem_dim = 128
        table = "arrays_numpies"

        self.session.execute("DROP TABLE if exists %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));"
            % (self.keyspace, table))

        storage_id = uuid.uuid3(uuid.NAMESPACE_DNS,
                                self.keyspace + '.' + table)

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
Exemplo n.º 2
0
    def test_iterate_brute(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a huge amount of data and checks no data is lost
        
        Analyzes:
        - HCache
        - Iteritems from HCache
        - Updates the HCache with the prefetched data (iteritems)
        ''' ''''''

        table = "particle"
        nparts = 10000  # Num particles in range

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, nparts):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
Exemplo n.º 3
0
    def test_small_brute(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a small amount of data using an iterkeys and validates that
        no column name can be a key and value at the same time
        
        Analyzes:
        - HCache (enforce column can't be key and value at the same time)
        - Iterkeys
        ''' ''''''

        table = "particle"
        nelems = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, nelems):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 100

        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nelems / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}
        keys = ["partid", "time"]
        values = ["time", "x"]

        cache = None
        # this should fail since a key can not be a column name at the same time (key=time, column=time)
        try:
            cache = Hcache(self.keyspace, table,
                           "WHERE token(partid)>=? AND token(partid)<?;",
                           tokens, keys, values, hcache_config)
        except RuntimeError, e:
            self.assertTrue(True, e)
Exemplo n.º 4
0
    def test_put_row_text(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        Simple test to store text and retrieve it
        
        Analyzes:
        - HCache
        - Put_row (write text)
        - Iteritems (read text)
        ''' ''''''

        table = "bulk"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(partid int PRIMARY KEY, data text);" %
            (self.keyspace, table))

        num_items = int(pow(10, 3))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 10
        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (num_items / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["partid"]
        values = ["data"]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table, "", tokens, keys, values,
                       hcache_config)
        for i in xrange(0, num_items):
            cache.put_row([i], ['someRandomText'])

        # it doesnt make sense to count the read elements
        # because the data is still being written async
        hiter = cache.iteritems(10)
        while True:
            try:
                data = hiter.get_next()
                self.assertEqual(len(data), len(keys) + len(values))
                self.assertEqual(data[1], 'someRandomText')
            except StopIteration:
                break
Exemplo n.º 5
0
    def test_simpletest(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        
        Analyzes:
        ''' ''''''

        table = 'particle'
        nelems = 500

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, nelems):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        token_ranges = []
        # empty configuration parameter (the last dictionary) means to use the default config
        table = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;",
                       token_ranges, keys, values, {})

        def get_data(cache, keys):
            data = None
            try:
                data = cache.get_row(keys)
                self.assertEqual(len(data), len(values))
            except KeyError:
                print 'not found'
            return data

        q1 = get_data(table, [433, 4330])  # float(0.003)
        lost = get_data(table, [133, 1330])
        lost = get_data(table, [433, 4330])
        q2 = get_data(table, [433, 4330])
        self.assertEqual(q1, q2)
Exemplo n.º 6
0
    def test_connection(self):
        from hfetch import connectCassandra

        # Test behaviour when NodePort is None (should return TypeError)
        test_contact_names = []
        test_node_port = None
        fails = False
        try:
            connectCassandra(test_contact_names, test_node_port)
        except TypeError:
            fails = True
        except Exception, e:
            self.fail(e.message)
Exemplo n.º 7
0
    def test_multidim(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import numpy as np
        '''''' '''
        
        Analyzes:
        
        ''' ''''''
        dims = 3
        elem_dim = 5

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
Exemplo n.º 8
0
    def test_write_nulls_simple(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        Simple test to store text and retrieve it

        Analyzes:
        - HCache
        - Put_row (write data mixed with nulls)
        ''' ''''''

        table = "nulls"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(partid int PRIMARY KEY, time float, data text);"
            % (self.keyspace, table))

        num_items = int(pow(10, 3))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 10
        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (num_items / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["partid"]
        values = ["time", "data"]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table, "", tokens, keys, values,
                       hcache_config)
        for i in xrange(0, num_items):
            cache.put_row(
                [i], [12, None]
            )  # random.sample({i,None},1)+random.sample({'SomeRandomText',None},1))
        time.sleep(10)
Exemplo n.º 9
0
    def test_nopart(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import numpy as np
        '''''' '''
        
        Analyzes:
        
        ''' ''''''

        elem_dim = 128
        dims = 2
        table = "arrays_numpies"

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
Exemplo n.º 10
0
    def test_coherency(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        from hfetch import HWriter
        '''''' '''
         Analyzes:
         - HCache
         ''' ''''''

        table = "particle"
        nparts = 10000  # Num particles in range

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        tkns = []
        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tkns,
                       keys, values, {
                           'cache_size': '1',
                           'writer_buffer': 20
                       })
        for i in xrange(0, nparts):
            cache.put_row([i, i / .1], [i / .2, i / .3, i / .4])

        for i in reversed(xrange(0, nparts)):  #xrange(nparts, -1, -1):
            try:
                cache.get_row([i, i / .1])
            except KeyError:
                str_k = str([i, i / .1])
                self.fail(str_k + " not found")
Exemplo n.º 11
0
    def test_arr_put_get(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import numpy as np
        '''''' '''
        Running arr_put_get test
        
        Analyzes:
        
        ''' ''''''
        dims = 2
        elem_dim = 128
        table = "arrays_numpies"

        print 'Dimensions: ', dims, ' Element in each dim: ', elem_dim
        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
Exemplo n.º 12
0
class Hfetch_Tests(unittest.TestCase):
    keyspace = "hfetch_test"
    contact_names = ['127.0.0.1']
    nodePort = 9042
    cluster = Cluster(contact_names, port=nodePort)
    session = cluster.connect()

    @classmethod
    def setUpClass(cls):
        cls.session.execute(
            "CREATE KEYSPACE IF NOT EXISTS %s WITH replication "
            "= {'class': 'SimpleStrategy', 'replication_factor': 1};" %
            cls.keyspace)

    @classmethod
    def tearDownClass(cls):
        # self.session.execute("DROP KEYSPACE IF EXISTS %s;" % cls.keyspace)
        pass

    def test_connection(self):
        from hfetch import connectCassandra

        # Test behaviour when NodePort is None (should return TypeError)
        test_contact_names = []
        test_node_port = None
        fails = False
        try:
            connectCassandra(test_contact_names, test_node_port)
        except TypeError:
            fails = True
        except Exception, e:
            self.fail(e.message)

        self.assertTrue(fails)
        fails = False

        # Test behaviour when contact_names is an empty text (should return ValueError)
        test_node_port = self.nodePort
        test_contact_names = [123456789]
        try:
            connectCassandra(test_contact_names, test_node_port)
        except TypeError:
            fails = True
        except Exception, e:
            self.fail(e.message)
Exemplo n.º 13
0
    def test_iterators(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over some text and check coherency between hcache and hiter
        
        Analyzes:
        - HCache
        - Get_row (read text)
        - Iteritems (read text)
        ''' ''''''

        table = "words"
        num_keys = 20

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(position int PRIMARY KEY, wordinfo text);" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in [
                    i, "'someRandomTextForTesting purposes - " + str(i * 60) +
                    "'"
                ])
            self.session.execute(
                "INSERT INTO %s.%s(position , wordinfo ) VALUES (%s)" %
                (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        tkns = [(pow(-2, 63) + 1, pow(2, 63) - 1)]
        keys = ["position"]
        values = ["wordinfo"]
        hcache_config = {'cache_size': 100, 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table,
                       "WHERE token(position)>=? AND token(position)<?;", tkns,
                       keys, values, hcache_config)

        iter_config = {"prefetch_size": 100, "update_cache": "yes"}
        myIter = cache.iteritems(iter_config)

        data = []
        for i in xrange(0, 10):
            data.append(myIter.get_next())

        assert (len(data) > 0)
        first_data = data[0]

        assert (len(first_data) == 2)
        first_key = [first_data[0]]

        assert (type(first_key[0]) == int)
        somedata = cache.get_row(first_key)
        # self.assertEqual((first_key + somedata), first_data)
        assert ((first_key + somedata) == first_data)

        count = len(data)

        while True:
            try:
                i = myIter.get_next()
            except StopIteration:
                print 'End of data, items read: ', count, ' with value ', i
                break
            count = count + 1

        print 'data was: \n', data
Exemplo n.º 14
0
    def write_test(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        from hfetch import HWriter
        '''''' '''
        While the iterator retrieves the data from a table, the writer stores it into another table
        
        Analyzes:
        - HCache
        - HWriter
        - Iteritems (updating the cache)
        ''' ''''''

        table = "particle"
        table_write = "particle_write"
        nparts = 6000  # Num particles in range

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table_write))

        for i in xrange(0, nparts):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        p = 1000  # Num partitions

        t_f = -7764607523034234880  # Token begin range
        # t_t = 5764607523034234880  # Token end range
        t_t = 7764607523034234880
        # Token blocks
        tkn_size = (t_t - t_f) / (nparts / p)
        tkns = [(a, a + tkn_size)
                for a in xrange(t_f, t_t - tkn_size, tkn_size)]
        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        a = Hcache(self.keyspace, table,
                   "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys,
                   values, {
                       self.keyspace: '100',
                       'writer_buffer': 20
                   })

        writer = HWriter(self.keyspace, table_write, keys, values,
                         {'writer_buffer': 20})

        def readAll(iter, wr):
            count = 1
            while True:
                try:
                    i = iter.get_next()
                except StopIteration:
                    print 'End of data, items read: ', count, ' with value ', i
                    break
                wr.write(i[0:2], i[2:5])
                count += 1
                if count % 100000 == 0:
                    print count
            print "iter has %d elements" % count

        start = time.time()
        readAll(a.iteritems({
            "prefetch_size": 100,
            "update_cache": "yes"
        }), writer)
        print "finshed into %d" % (time.time() - start)
Exemplo n.º 15
0
    def words_test_hiter(self):
        from hfetch import connectCassandra
        from hfetch import HIterator
        import random
        import string
        '''
        This test iterates over huge lines of text and verifies the correct behaviour of HIterator
        By default it acts as an iteritems
        
        Analyzes:
        - HIterator
        - Iteritems
        '''

        table = "words"
        nelems = 2000
        length_row = 100

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(position int, wordinfo text, PRIMARY KEY(position));"
            % (self.keyspace, table))

        for i in xrange(0, nelems):
            vals = ','.join([
                str(i), "'" + ''.join(
                    random.choice(string.ascii_uppercase +
                                  string.ascii_lowercase + " " + string.digits)
                    for _ in range(length_row)) + "'"
            ])
            self.session.execute(
                "INSERT INTO %s.%s(position,wordinfo) VALUES (%s)" %
                (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nelem = 10
        nblocks = 2

        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nelem / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["position"]
        values = ["wordinfo"]
        hiter_config = {'prefetch_size': '100', 'writer_buffer': 20}

        itera = HIterator(self.keyspace, table, tokens, keys, values,
                          hiter_config)

        while True:
            try:
                data = itera.get_next()
            except StopIteration:
                break
Exemplo n.º 16
0
    def uuid_test(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import uuid
        '''''' '''
        This test check the correct handling of UUIDs
        
        Analyzes:
        - Hcache
        - Put_row
        - Iteritems
        ''' ''''''

        table = "uuid"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid uuid, data int, PRIMARY KEY(partid));"
            % (self.keyspace, table))

        nelem = 1000
        nblocks = 10

        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nelem / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        keys = ["partid"]
        values = ["data"]

        # CREATE TABLE test.bulk(partid int PRIMARY KEY, data text);
        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tokens,
                       keys, values, {
                           'cache_size': '10',
                           'writer_buffer': 20
                       })

        # Write data
        someid = None
        i = 0
        while i < nelem:
            u = uuid.uuid4()  # ('81da81e8-1914-11e7-908d-ecf4bb4c66c4')
            cache.put_row([u], [i])
            if i == nelem / 2:
                someid = u
            i += 1

        # by recreating the cache we wait until all the data is written

        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tokens,
                       keys, values, {
                           'cache_size': '10',
                           'writer_buffer': 20
                       })
        # Read data
        itera = cache.iteritems(10)
        found = False
        counter = 0
        while True:
            try:
                L = uuid.UUID(itera.get_next()[0])
                if L == someid:
                    found = True
            except StopIteration:
                break
            counter = counter + 1

        self.assertEqual(counter, nelem)
        self.assertTrue(found)
Exemplo n.º 17
0
    def test_get_row_key_error(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test check the hcache sets a key error when the key we asked doesnt exist
        Analyzes:
        - Hcache
        - Get_row (returning KeyError)
        ''' ''''''

        table = 'particle'
        num_keys = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        token_ranges = [(8070430489100699999, 8070450532247928832)]

        non_existent_keys = 10

        cache_size = num_keys + non_existent_keys

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]
        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       {'cache_size': cache_size})

        # Access the cache, which is empty and queries cassandra to retrieve the data
        t1 = time.time()
        error_counter = 0
        for pk in xrange(0, num_keys + non_existent_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                error_counter = error_counter + 1

        print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format(
            unicode(str(num_keys), 'utf-8'),
            unicode(str(time.time() - t1), 'utf-8'),
            unicode(str(error_counter), 'utf-8'),
            unicode(str(non_existent_keys), 'utf-8'))

        self.assertEqual(error_counter, non_existent_keys)

        # Access the cache, which has already all the data and will ask cassandra only if
        # the keys asked are not present
        t1 = time.time()
        error_counter = 0
        for pk in xrange(0, num_keys + non_existent_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                error_counter = error_counter + 1

        print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format(
            unicode(str(num_keys), 'utf-8'),
            unicode(str(time.time() - t1), 'utf-8'),
            unicode(str(error_counter), 'utf-8'),
            unicode(str(non_existent_keys), 'utf-8'))

        self.assertEqual(error_counter, non_existent_keys)
Exemplo n.º 18
0
    def __init__(self):
        singleton = Config.instance
        if singleton.configured:
            log.info('setting down')
            return

        singleton.configured = True


        if 'HECUBA_ARROW' in os.environ:
            env_var = os.environ['HECUBA_ARROW'].lower()
            singleton.arrow_enabled = False if env_var == 'no' or env_var == 'false' else True
            log.info('HECUBA_ARROW: {}'.format(singleton.arrow_enabled))
        else:
            singleton.arrow_enabled = False
            log.warn('Arrow access is DISABLED [HECUBA_ARROW=%s]', singleton.arrow_enabled)

        if 'CONCURRENT_CREATION' in os.environ:
            if os.environ['CONCURRENT_CREATION']=='True':
                singleton.concurrent_creation = True
            else:
                singleton.concurrent_creation = False
            log.info('CONCURRENT_CREATION: %s', str(singleton.concurrent_creation))
        else:
            singleton.concurrent_creation = False
            log.warn('Concurrent creation is DISABLED [CONCURRENT_CREATION=False]')

        if 'LOAD_ON_DEMAND' in os.environ:
            if os.environ['LOAD_ON_DEMAND']=='False':
                singleton.load_on_demand = False
            else:
                singleton.load_on_demand = True
            log.info('LOAD_ON_DEMAND: %s', str(singleton.load_on_demand))
        else:
            singleton.load_on_demand = True
            log.warn('Load data on demand is ENABLED [LOAD_ON_DEMAND=True]')

        if 'CREATE_SCHEMA' in os.environ:
            env_var = os.environ['CREATE_SCHEMA'].lower()
            singleton.id_create_schema = False if env_var == 'no' or env_var == 'false' else True
            log.info('CREATE_SCHEMA: %d', singleton.id_create_schema)
        else:
            singleton.id_create_schema = True
            log.warn('Creating keyspaces and tables by default [CREATE_SCHEMA=True]')
        try:
            singleton.nodePort = int(os.environ['NODE_PORT'])
            log.info('NODE_PORT: %d', singleton.nodePort)
        except KeyError:
            log.warn('using default NODE_PORT 9042')
            singleton.nodePort = 9042

        try:
            singleton.contact_names = os.environ['CONTACT_NAMES'].split(",")
            log.info('CONTACT_NAMES: %s', str.join(" ", singleton.contact_names))
            # Convert node names to ips if needed
            import socket
            contact_names_ips = []
            show_translation = False
            for h_name in singleton.contact_names:
                IP_addres = socket.gethostbyname(h_name)
                if (IP_addres != h_name):
                    show_translation=True
                contact_names_ips.append(IP_addres)
            singleton.contact_names = contact_names_ips
            if show_translation:
                log.info('CONTACT_NAMES: %s', str.join(" ", singleton.contact_names))

        except KeyError:
            log.warn('using default contact point localhost')
            singleton.contact_names = ['127.0.0.1']

        if hasattr(singleton, 'session'):
            log.warn('Shutting down pre-existent sessions and cluster')
            try:
                singleton.session.shutdown()
                singleton.cluster.shutdown()
            except Exception:
                log.warn('error shutting down')
        try:
            singleton.replication_factor = int(os.environ['REPLICA_FACTOR'])
            log.info('REPLICA_FACTOR: %d', singleton.replication_factor)
        except KeyError:
            singleton.replication_factor = 1
            log.warn('using default REPLICA_FACTOR: %d', singleton.replication_factor)

        try:
            user_defined_execution_name = os.environ['EXECUTION_NAME']
            if user_defined_execution_name == 'hecuba':
                raise RuntimeError('Error: the application keyspace cannot be \'hecuba\'. '
                                   'This keyspace is reserved for storing metadata.')
            singleton.execution_name = user_defined_execution_name
            log.info('EXECUTION_NAME: %s', singleton.execution_name)
        except KeyError:
            singleton.execution_name = 'my_app'
            log.warn('using default EXECUTION_NAME: %s', singleton.execution_name)
        try:
            singleton.splits_per_node = int(os.environ['SPLITS_PER_NODE'])
            log.info('SPLITS_PER_NODE: %d', singleton.splits_per_node)
        except KeyError:
            singleton.splits_per_node = 32
            log.warn('using default SPLITS_PER_NODE: %d', singleton.splits_per_node)

        try:
            singleton.token_range_size = int(os.environ['TOKEN_RANGE_SIZE'])
            log.info('TOKEN_RANGE_SIZE: %d', singleton.token_range_size)
            singleton.target_token_range_size = None
        except KeyError:
            singleton.token_range_size = None

            try:
                singleton.target_token_range_size = int(os.environ['TARGET_TOKEN_RANGE_SIZE'])
                log.info('TARGET_TOKEN_RANGE_SIZE: %d', singleton.target_token_range_size)
            except KeyError:
                singleton.target_token_range_size = 64 * 1024
                log.warn('using default TARGET_TOKEN_RANGE_SIZE: %d', singleton.target_token_range_size)

        try:
            singleton.max_cache_size = int(os.environ['MAX_CACHE_SIZE'])
            log.info('MAX_CACHE_SIZE: %d', singleton.max_cache_size)
        except KeyError:
            singleton.max_cache_size = 1000
            log.warn('using default MAX_CACHE_SIZE: %d', singleton.max_cache_size)

        try:
            singleton.replication_strategy = os.environ['REPLICATION_STRATEGY']
            log.info('REPLICATION_STRATEGY: %s', singleton.replication_strategy)
        except KeyError:
            singleton.replication_strategy = "SimpleStrategy"
            log.warn('using default REPLICATION_STRATEGY: %s', singleton.replication_strategy)

        try:
            singleton.replication_strategy_options = os.environ['REPLICATION_STRATEGY_OPTIONS']
            log.info('REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options)
        except KeyError:
            singleton.replication_strategy_options = ""
            log.warn('using default REPLICATION_STRATEGY_OPTIONS: %s', singleton.replication_strategy_options)

        if singleton.replication_strategy == "SimpleStrategy":
            singleton.replication = "{'class' : 'SimpleStrategy', 'replication_factor': %d}" % \
                                    singleton.replication_factor
        else:
            singleton.replication = "{'class' : '%s', %s}" % (
                singleton.replication_strategy, singleton.replication_strategy_options)
        try:
            singleton.hecuba_print_limit = int(os.environ['HECUBA_PRINT_LIMIT'])
            log.info('HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit)
        except KeyError:
            singleton.hecuba_print_limit = 1000
            log.warn('using default HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit)

        try:
            singleton.prefetch_size = int(os.environ['PREFETCH_SIZE'])
            log.info('PREFETCH_SIZE: %s', singleton.prefetch_size)
        except KeyError:
            singleton.prefetch_size = 10000
            log.warn('using default PREFETCH_SIZE: %s', singleton.prefetch_size)

        try:
            singleton.write_buffer_size = int(os.environ['WRITE_BUFFER_SIZE'])
            log.info('WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size)
        except KeyError:
            singleton.write_buffer_size = 1000
            log.warn('using default WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size)

        try:
            singleton.write_callbacks_number = int(os.environ['WRITE_CALLBACKS_NUMBER'])
            log.info('WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number)
        except KeyError:
            singleton.write_callbacks_number = 16
            log.warn('using default WRITE_CALLBACKS_NUMBER: %s', singleton.write_callbacks_number)

        try:
            env_var = os.environ['TIMESTAMPED_WRITES'].lower()
            singleton.timestamped_writes = False if env_var == 'no' or env_var == 'false' else True
            log.info('TIMESTAMPED WRITES ENABLED? {}'.format(singleton.timestamped_writes))
        except KeyError:
            singleton.timestamped_writes = False
            log.warn('using default TIMESTAMPED_WRITES: %s', singleton.timestamped_writes)

        if singleton.max_cache_size < singleton.write_buffer_size:
            import warnings
            message = "Defining a MAX_CACHE_SIZE smaller than WRITE_BUFFER_SIZE can result " \
                      "in reading outdated results from the persistent storage"
            warnings.warn(message)

        log.info('Initializing global session')

        singleton.cluster = Cluster(contact_points=singleton.contact_names,
                                    load_balancing_policy=TokenAwarePolicy(RoundRobinPolicy()),
                                    port=singleton.nodePort,
                                    default_retry_policy=_NRetry(5))
        singleton.session = singleton.cluster.connect()
        singleton.session.encoder.mapping[tuple] = singleton.session.encoder.cql_encode_tuple
        if singleton.concurrent_creation:
            configure_lock=[
                """CREATE KEYSPACE IF NOT EXISTS hecuba_locks
                        WITH replication=  {'class': 'SimpleStrategy', 'replication_factor': 1};
                """,
                """CREATE TABLE IF NOT EXISTS hecuba_locks.table_lock
                        (table_name text, PRIMARY KEY (table_name));
                """,
                "TRUNCATE table hecuba_locks.table_lock;"
            ]
            for query in configure_lock:
                try:
                    self.instance.session.execute(query)
                except Exception as e:
                    log.error("Error executing query %s" % query)
                    raise e
            singleton._query_to_lock=singleton.session.prepare("INSERT into hecuba_locks.table_lock (table_name) values (?) if not exists;")

        if singleton.id_create_schema:
            queries = [
                "CREATE KEYSPACE IF NOT EXISTS hecuba  WITH replication = %s" % singleton.replication,
                """CREATE TYPE IF NOT EXISTS hecuba.q_meta(
                mem_filter text, 
                from_point frozen<list<double>>,
                to_point frozen<list<double>>,
                precision float);
                """,
                """CREATE TYPE IF NOT EXISTS hecuba.np_meta (flags int, elem_size int, partition_type tinyint,
                dims list<int>, strides list<int>, typekind text, byteorder text)""",
                """CREATE TABLE IF NOT EXISTS hecuba
                .istorage (storage_id uuid, 
                class_name text,name text, 
                istorage_props map<text,text>, 
                tokens list<frozen<tuple<bigint,bigint>>>,
                indexed_on list<text>,
                qbeast_random text,
                qbeast_meta frozen<q_meta>,
                numpy_meta frozen<np_meta>,
                block_id int,
                base_numpy uuid,
                view_serialization blob,
                primary_keys list<frozen<tuple<text,text>>>,
                columns list<frozen<tuple<text,text>>>,
                PRIMARY KEY(storage_id));
                """,
                "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % (singleton.execution_name, singleton.replication)]
            for query in queries:
                try:
                    self.executelocked(query)
                except Exception as e:
                    log.error("Error executing query %s" % query)
                    raise e

        from hfetch import connectCassandra, HArrayMetadata
        # connecting c++ bindings
        connectCassandra(singleton.contact_names, singleton.nodePort)

        if singleton.id_create_schema:
            time.sleep(10)
        singleton.cluster.register_user_type('hecuba', 'np_meta', HArrayMetadata)
Exemplo n.º 19
0
    def test_get_row(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a set of particles, performing get_row operations
        
        Analyzes:
        - HCache (multiple reads of the same key)
        - Get_row
        ''' ''''''

        table = 'particle'
        num_keys = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        token_ranges = []

        cache_size = 10001

        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]

        cache_config = {'cache_size': cache_size}

        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       cache_config)

        # clustering key
        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]

        print 'time - load C++ cache with cassandra data: ', time.time() - t1

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        # print 'items in res: ',len(result)
        print 'time - read data from C++ cache: ', time.time() - t1

        py_dict = {}
        cache = Hcache(self.keyspace, table, "",
                       [(8070430489100699999, 8070450532247928832)],
                       ["partid", "time"], ["ciao", "x", "y", "z"],
                       {'cache_size': num_keys})

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                py_dict[(pk, ck)] = result
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        print 'time - load data into python dict: ', time.time() - t1
        # print 'size ', len(py_dict)
        # print 'items in res: ',len(py_dict[1])

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = py_dict[(pk, ck)]
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        print 'time - read data from the python dict: ', time.time() - t1
Exemplo n.º 20
0
        test_contact_names = [123456789]
        try:
            connectCassandra(test_contact_names, test_node_port)
        except TypeError:
            fails = True
        except Exception, e:
            self.fail(e.message)

        self.assertTrue(fails)
        fails = False

        # Test behaviour when contact_names is an empty text (should return ValueError)
        test_node_port = self.nodePort
        test_contact_names = ['']
        try:
            connectCassandra(test_contact_names, test_node_port)
        except ValueError:
            fails = True
        except Exception, e:
            self.fail(e.message)

        self.assertTrue(fails)
        fails = False

        # if no contact point specified, connects to 127.0.0.1
        try:
            self.contact_names.index(
                '127.0.0.1')  # raises value error if not present
            test_contact_names = []
            connectCassandra(test_contact_names, test_node_port)
        except ValueError:
Exemplo n.º 21
0
    def test_delete_row(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a set of particles, performing get_row operations

        Analyzes:
        - HCache
        - Get_row (setting TypeError properly)
        ''' ''''''

        table = 'particle'
        num_keys = 100  # num keys must be multiple of expected_errors
        expected_errors = 10

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        token_ranges = []

        cache_size = 1

        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]

        cache_config = {'cache_size': cache_size}

        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       cache_config)
        pk = 0
        ck = pk * 10

        try:
            result = cache.get_row([pk, ck])
            self.assertEqual(len(result), len(values))
        except KeyError as e:
            self.fail("Error when retrieving value from cache: " + str(e) +
                      " -- " + str([pk, ck]))

        try:
            result = cache.delete_row([pk, ck])
        except KeyError as e:
            self.fail("Error when deleteing entry from cache: " + str(e) +
                      " -- " + str([pk, ck]))

        try:
            result = cache.get_row([pk, ck])
            self.fail(
                "Error when retrieving value from cache, the entry shouldnt exist"
            )
        except KeyError as e:
            pass
Exemplo n.º 22
0
    def reset(mock_cassandra=False):
        singleton = Config.instance
        if singleton.configured and singleton.mock_cassandra == mock_cassandra:
            log.info('setting down')
            return

        singleton.mock_cassandra = mock_cassandra
        log.info('setting up configuration with mock_cassandra = %s',
                 mock_cassandra)

        singleton.configured = True

        if 'CREATE_SCHEMA' in os.environ:
            singleton.id_create_schema = int(os.environ['CREATE_SCHEMA'])
        else:
            singleton.id_create_schema = -1

        if mock_cassandra:
            log.info('configuring mock environment')
        else:
            log.info('configuring production environment')
        try:
            singleton.nodePort = int(os.environ['NODE_PORT'])
            log.info('NODE_PORT: %d', singleton.nodePort)
        except KeyError:
            log.warn('using default NODE_PORT 9042')
            singleton.nodePort = 9042

        try:
            singleton.contact_names = os.environ['CONTACT_NAMES'].split(",")
            log.info('CONTACT_NAMES: %s', str.join(" ",
                                                   singleton.contact_names))
        except KeyError:
            log.warn('using default contact point localhost')
            singleton.contact_names = ['127.0.0.1']

        if hasattr(singleton, 'session'):
            log.warn('Shutting down pre-existent sessions and cluster')
            try:
                singleton.session.shutdown()
                singleton.cluster.shutdown()
            except _:
                log.warn('error shutting down')
        try:
            singleton.replication_factor = int(os.environ['REPLICA_FACTOR'])
            log.info('REPLICA_FACTOR: %d', singleton.replication_factor)
        except KeyError:
            singleton.replication_factor = 1
            log.warn('using default REPLICA_FACTOR: %d',
                     singleton.replication_factor)

        try:
            user_defined_execution_name = os.environ['EXECUTION_NAME']
            if user_defined_execution_name == 'hecuba':
                raise RuntimeError(
                    'Error: the application keyspace cannot be \'hecuba\'. '
                    'This keyspace is reserved for storing metadata.')
            singleton.execution_name = user_defined_execution_name
            log.info('EXECUTION_NAME: %s', singleton.execution_name)
        except KeyError:
            singleton.execution_name = 'my_app'
            log.warn('using default EXECUTION_NAME: %s',
                     singleton.execution_name)

        try:
            singleton.number_of_partitions = int(
                os.environ['NUMBER_OF_BLOCKS'])
            log.info('NUMBER_OF_BLOCKS: %d', singleton.number_of_partitions)
        except KeyError:
            singleton.number_of_partitions = 32
            log.warn('using default NUMBER_OF_BLOCKS: %d',
                     singleton.number_of_partitions)

        try:
            singleton.min_number_of_tokens = int(
                os.environ['MIN_NUMBER_OF_TOKENS'])
            log.info('MIN_NUMBER_OF_TOKENS: %d',
                     singleton.min_number_of_tokens)
        except KeyError:
            singleton.min_number_of_tokens = 1024
            log.warn('using default MIN_NUMBER_OF_TOKENS: %d',
                     singleton.min_number_of_tokens)

        try:
            singleton.max_cache_size = int(os.environ['MAX_CACHE_SIZE'])
            log.info('MAX_CACHE_SIZE: %d', singleton.max_cache_size)
        except KeyError:
            singleton.max_cache_size = 100
            log.warn('using default MAX_CACHE_SIZE: %d',
                     singleton.max_cache_size)

        try:
            singleton.replication_strategy = os.environ['REPLICATION_STRATEGY']
            log.info('REPLICATION_STRATEGY: %s',
                     singleton.replication_strategy)
        except KeyError:
            singleton.replication_strategy = "SimpleStrategy"
            log.warn('using default REPLICATION_STRATEGY: %s',
                     singleton.replication_strategy)

        try:
            singleton.replication_strategy_options = os.environ[
                'REPLICATION_STRATEGY_OPTIONS']
            log.info('REPLICATION_STRATEGY_OPTIONS: %s',
                     singleton.replication_strategy_options)
        except KeyError:
            singleton.replication_strategy_options = ""
            log.warn('using default REPLICATION_STRATEGY_OPTIONS: %s',
                     singleton.replication_strategy_options)

        if singleton.replication_strategy is "SimpleStrategy":
            singleton.replication = "{'class' : 'SimpleStrategy', 'replication_factor': %d}" % \
                                    singleton.replication_factor
        else:
            singleton.replication = "{'class' : '%s', %s}" % (
                singleton.replication_strategy,
                singleton.replication_strategy_options)
        try:
            singleton.hecuba_print_limit = int(
                os.environ['HECUBA_PRINT_LIMIT'])
            log.info('HECUBA_PRINT_LIMIT: %s', singleton.hecuba_print_limit)
        except KeyError:
            singleton.hecuba_print_limit = 1000
            log.warn('using default HECUBA_PRINT_LIMIT: %s',
                     singleton.hecuba_print_limit)

        try:
            singleton.hecuba_type_checking = os.environ[
                'HECUBA_TYPE_CHECKING'].lower() == 'true'
            log.info('HECUBA_TYPE_CHECKING: %s',
                     singleton.hecuba_type_checking)
        except KeyError:
            singleton.hecuba_type_checking = False
            log.warn('using default HECUBA_TYPE_CHECKING: %s',
                     singleton.hecuba_type_checking)

        try:
            singleton.hecuba_type_checking = os.environ[
                'HECUBA_TYPE_CHECKING'].lower() == 'true'
            log.info('HECUBA_TYPE_CHECKING: %s',
                     singleton.hecuba_type_checking)
        except KeyError:
            singleton.hecuba_type_checking = False
            log.warn('using default HECUBA_TYPE_CHECKING: %s',
                     singleton.hecuba_type_checking)

        try:
            singleton.prefetch_size = int(os.environ['PREFETCH_SIZE'])
            log.info('PREFETCH_SIZE: %s', singleton.prefetch_size)
        except KeyError:
            singleton.prefetch_size = 10000
            log.warn('using default PREFETCH_SIZE: %s',
                     singleton.prefetch_size)

        try:
            singleton.write_buffer_size = int(os.environ['WRITE_BUFFER_SIZE'])
            log.info('WRITE_BUFFER_SIZE: %s', singleton.write_buffer_size)
        except KeyError:
            singleton.write_buffer_size = 1000
            log.warn('using default WRITE_BUFFER_SIZE: %s',
                     singleton.write_buffer_size)

        try:
            singleton.write_callbacks_number = int(
                os.environ['WRITE_CALLBACKS_NUMBER'])
            log.info('WRITE_CALLBACKS_NUMBER: %s',
                     singleton.write_callbacks_number)
        except KeyError:
            singleton.write_callbacks_number = 16
            log.warn('using default WRITE_CALLBACKS_NUMBER: %s',
                     singleton.write_callbacks_number)

        try:
            singleton.qbeast_master_port = int(
                os.environ['QBEAST_MASTER_PORT'])
            log.info('QBEAST_MASTER_PORT: %d', singleton.qbeast_master_port)
        except KeyError:
            log.warn('using default qbeast master port 2600')
            singleton.qbeast_master_port = 2600

        try:
            singleton.qbeast_worker_port = int(
                os.environ['QBEAST_WORKER_PORT'])
            log.info('QBEAST_WORKER_PORT: %d', singleton.qbeast_worker_port)
        except KeyError:
            log.warn('using default qbeast worker port 2688')
            singleton.qbeast_worker_port = 2688

        try:
            singleton.qbeast_entry_node = os.environ[
                'QBEAST_ENTRY_NODE'].split(",")
            log.info('QBEAST_ENTRY_NODE: %s', singleton.qbeast_entry_node)
        except KeyError:
            log.warn('using default qbeast entry node localhost')
            import socket
            singleton.qbeast_entry_node = [socket.gethostname()]

        try:
            singleton.qbeast_max_results = int(
                os.environ['QBEAST_MAX_RESULTS'].split(","))
            log.info('QBEAST_MAX_RESULTS: %d', singleton.qbeast_max_results)
        except KeyError:
            log.warn('using default qbeast max results 10000000')
            singleton.qbeast_max_results = 10000000

        try:
            singleton.qbeast_return_at_least = int(
                os.environ['RETURN_AT_LEAST'].split(","))
            log.info('QBEAST_RETURN_AT_LEAST: %d',
                     singleton.qbeast_return_at_least)
        except KeyError:
            log.warn('using default qbeast return at least 100')
            singleton.qbeast_return_at_least = 100

        try:
            singleton.qbeast_read_max = int(os.environ['READ_MAX'].split(","))
            log.info('QBEAST_READ_MAX: %d', singleton.qbeast_read_max)
        except KeyError:
            log.warn('using default qbeast read max 10000')
            singleton.qbeast_read_max = 10000

        if mock_cassandra:

            class clusterMock:
                def __init__(self):
                    from cassandra.metadata import Metadata
                    self.metadata = Metadata()
                    self.metadata.rebuild_token_map("Murmur3Partitioner", {})

            class sessionMock:
                def execute(self, *args, **kwargs):
                    log.info('called mock.session')
                    return []

                def prepare(self, *args, **kwargs):
                    return self

                def bind(self, *args, **kwargs):
                    return self

            singleton.cluster = clusterMock()
            singleton.session = sessionMock()
        else:
            log.info('Initializing global session')
            try:
                singleton.cluster = Cluster(
                    contact_points=singleton.contact_names,
                    port=singleton.nodePort,
                    default_retry_policy=_NRetry(5))
                singleton.session = singleton.cluster.connect()
                singleton.session.encoder.mapping[
                    tuple] = singleton.session.encoder.cql_encode_tuple
                from hfetch import connectCassandra
                # connecting c++ bindings
                connectCassandra(singleton.contact_names, singleton.nodePort)
                if singleton.id_create_schema == -1:
                    singleton.session.execute(
                        "CREATE KEYSPACE IF NOT EXISTS %s WITH REPLICATION = %s"
                        % (singleton.execution_name, singleton.replication))
                    singleton.session.execute(
                        ('CREATE KEYSPACE IF NOT EXISTS hecuba' +
                         " WITH replication = %s" % singleton.replication))

                    singleton.session.execute(
                        'CREATE TYPE IF NOT EXISTS hecuba.q_meta('
                        'mem_filter text, '
                        'from_point frozen < list < float >>,'
                        'to_point frozen < list < float >>,'
                        'precision float)')

                    singleton.session.execute(
                        'CREATE TABLE IF NOT EXISTS hecuba' +
                        '.istorage (storage_id uuid, '
                        'class_name text,name text, '
                        'istorage_props map<text,text>, '
                        'tokens list<frozen<tuple<bigint,bigint>>>,'
                        'indexed_on list<text>,'
                        'entry_point text,'
                        'qbeast_id uuid,'
                        'qbeast_meta q_meta,'
                        'primary_keys list<frozen<tuple<text,text>>>,'
                        'columns list<frozen<tuple<text,text>>>,'
                        'PRIMARY KEY(storage_id))')

            except Exception as e:
                log.error(
                    'Exception creating cluster session. Are you in a testing env? %s',
                    e)