示例#1
0
    def test_small_brute(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a small amount of data using an iterkeys and validates that
        no column name can be a key and value at the same time
        
        Analyzes:
        - HCache (enforce column can't be key and value at the same time)
        - Iterkeys
        ''' ''''''

        table = "particle"
        nelems = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, nelems):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 100

        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nelems / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}
        keys = ["partid", "time"]
        values = ["time", "x"]

        cache = None
        # this should fail since a key can not be a column name at the same time (key=time, column=time)
        try:
            cache = Hcache(self.keyspace, table,
                           "WHERE token(partid)>=? AND token(partid)<?;",
                           tokens, keys, values, hcache_config)
        except RuntimeError, e:
            self.assertTrue(True, e)
示例#2
0
    def test_put_row_text(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        Simple test to store text and retrieve it
        
        Analyzes:
        - HCache
        - Put_row (write text)
        - Iteritems (read text)
        ''' ''''''

        table = "bulk"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(partid int PRIMARY KEY, data text);" %
            (self.keyspace, table))

        num_items = int(pow(10, 3))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 10
        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (num_items / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["partid"]
        values = ["data"]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table, "", tokens, keys, values,
                       hcache_config)
        for i in xrange(0, num_items):
            cache.put_row([i], ['someRandomText'])

        # it doesnt make sense to count the read elements
        # because the data is still being written async
        hiter = cache.iteritems(10)
        while True:
            try:
                data = hiter.get_next()
                self.assertEqual(len(data), len(keys) + len(values))
                self.assertEqual(data[1], 'someRandomText')
            except StopIteration:
                break
示例#3
0
    def test_simpletest(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        
        Analyzes:
        ''' ''''''

        table = 'particle'
        nelems = 500

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, nelems):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        token_ranges = []
        # empty configuration parameter (the last dictionary) means to use the default config
        table = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;",
                       token_ranges, keys, values, {})

        def get_data(cache, keys):
            data = None
            try:
                data = cache.get_row(keys)
                self.assertEqual(len(data), len(values))
            except KeyError:
                print 'not found'
            return data

        q1 = get_data(table, [433, 4330])  # float(0.003)
        lost = get_data(table, [133, 1330])
        lost = get_data(table, [433, 4330])
        q2 = get_data(table, [433, 4330])
        self.assertEqual(q1, q2)
示例#4
0
文件: qbeast.py 项目: bsc-dd/hecuba
    def _setup_hcache(self):
        key_names = [key["name"] for key in self._primary_keys]
        persistent_values = [{"name": col["name"]} for col in self._columns]

        if self._tokens is None:
            raise RuntimeError("Tokens for object {} are null".format(self._get_name()))

        self._hcache_params = (self._ksp, self._table,
                               self.storage_id,
                               self._tokens, key_names, persistent_values,
                               {'cache_size': config.max_cache_size,
                                'writer_par': config.write_callbacks_number,
                                'writer_buffer': config.write_buffer_size,
                                'timestamped_writes': config.timestamped_writes})
        log.debug("HCACHE params %s", self._hcache_params)
        self._hcache = Hcache(*self._hcache_params)
示例#5
0
    def test_write_nulls_simple(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        Simple test to store text and retrieve it

        Analyzes:
        - HCache
        - Put_row (write data mixed with nulls)
        ''' ''''''

        table = "nulls"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(partid int PRIMARY KEY, time float, data text);"
            % (self.keyspace, table))

        num_items = int(pow(10, 3))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        nblocks = 10
        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (num_items / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["partid"]
        values = ["time", "data"]

        hcache_config = {'cache_size': '10', 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table, "", tokens, keys, values,
                       hcache_config)
        for i in xrange(0, num_items):
            cache.put_row(
                [i], [12, None]
            )  # random.sample({i,None},1)+random.sample({'SomeRandomText',None},1))
        time.sleep(10)
示例#6
0
文件: hnumpy.py 项目: him-28/hecuba
    def make_persistent(self, name):
        if self._is_persistent:
            raise AlreadyPersistentError(
                "This StorageNumpy is already persistent [Before:{}.{}][After:{}]",
                self._ksp, self._table, name)
        self._is_persistent = True

        (self._ksp, self._table) = self._extract_ks_tab(name)
        if self._storage_id is None:
            self._storage_id = uuid.uuid3(
                uuid.NAMESPACE_DNS, self._ksp + '.' + self._table + '_numpies')
        self._build_args = self.args(self._storage_id, self._class_name, name)
        log.info("PERSISTING DATA INTO %s %s", self._ksp, self._table)

        query_keyspace = "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % (
            self._ksp, config.replication)
        config.session.execute(query_keyspace)

        config.session.execute(
            'CREATE TABLE IF NOT EXISTS ' + self._ksp + '.' + self._table +
            '_numpies'
            '(storage_id uuid , '
            'cluster_id int, '
            'block_id int, '
            'payload blob, '
            'PRIMARY KEY((storage_id,cluster_id),block_id))')

        self._hcache_params = (self._ksp, self._table + '_numpies',
                               self._storage_id, [],
                               ['storage_id', 'cluster_id', 'block_id'], [{
                                   'name':
                                   "payload",
                                   'type':
                                   'numpy'
                               }], {
                                   'cache_size': config.max_cache_size,
                                   'writer_par': config.write_callbacks_number,
                                   'write_buffer': config.write_buffer_size
                               })

        self._hcache = Hcache(*self._hcache_params)
        if len(self.shape) != 0:
            self._hcache.put_row([self._storage_id, -1, -1], [self])
        self._store_meta(self._build_args)
示例#7
0
文件: hnumpy.py 项目: him-28/hecuba
 def load_array(storage_id, name):
     (ksp, table) = IStorage._extract_ks_tab(name)
     _hcache_params = (ksp, table + '_numpies', storage_id, [],
                       ['storage_id', 'cluster_id', 'block_id'], [{
                           'name':
                           "payload",
                           'type':
                           'numpy'
                       }], {
                           'cache_size': config.max_cache_size,
                           'writer_par': config.write_callbacks_number,
                           'write_buffer': config.write_buffer_size
                       })
     _hcache = Hcache(*_hcache_params)
     result = _hcache.get_row([storage_id, -1, -1])
     if len(result) == 1:
         return result[0]
     else:
         raise KeyError
示例#8
0
    def test_coherency(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        from hfetch import HWriter
        '''''' '''
         Analyzes:
         - HCache
         ''' ''''''

        table = "particle"
        nparts = 10000  # Num particles in range

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        tkns = []
        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tkns,
                       keys, values, {
                           'cache_size': '1',
                           'writer_buffer': 20
                       })
        for i in xrange(0, nparts):
            cache.put_row([i, i / .1], [i / .2, i / .3, i / .4])

        for i in reversed(xrange(0, nparts)):  #xrange(nparts, -1, -1):
            try:
                cache.get_row([i, i / .1])
            except KeyError:
                str_k = str([i, i / .1])
                self.fail(str_k + " not found")
示例#9
0
    def write_test(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        from hfetch import HWriter
        '''''' '''
        While the iterator retrieves the data from a table, the writer stores it into another table
        
        Analyzes:
        - HCache
        - HWriter
        - Iteritems (updating the cache)
        ''' ''''''

        table = "particle"
        table_write = "particle_write"
        nparts = 6000  # Num particles in range

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table_write))

        for i in xrange(0, nparts):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        p = 1000  # Num partitions

        t_f = -7764607523034234880  # Token begin range
        # t_t = 5764607523034234880  # Token end range
        t_t = 7764607523034234880
        # Token blocks
        tkn_size = (t_t - t_f) / (nparts / p)
        tkns = [(a, a + tkn_size)
                for a in xrange(t_f, t_t - tkn_size, tkn_size)]
        keys = ["partid", "time"]
        values = ["x", "y", "z"]
        a = Hcache(self.keyspace, table,
                   "WHERE token(partid)>=? AND token(partid)<?;", tkns, keys,
                   values, {
                       self.keyspace: '100',
                       'writer_buffer': 20
                   })

        writer = HWriter(self.keyspace, table_write, keys, values,
                         {'writer_buffer': 20})

        def readAll(iter, wr):
            count = 1
            while True:
                try:
                    i = iter.get_next()
                except StopIteration:
                    print 'End of data, items read: ', count, ' with value ', i
                    break
                wr.write(i[0:2], i[2:5])
                count += 1
                if count % 100000 == 0:
                    print count
            print "iter has %d elements" % count

        start = time.time()
        readAll(a.iteritems({
            "prefetch_size": 100,
            "update_cache": "yes"
        }), writer)
        print "finshed into %d" % (time.time() - start)
示例#10
0
    def uuid_test(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import uuid
        '''''' '''
        This test check the correct handling of UUIDs
        
        Analyzes:
        - Hcache
        - Put_row
        - Iteritems
        ''' ''''''

        table = "uuid"

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid uuid, data int, PRIMARY KEY(partid));"
            % (self.keyspace, table))

        nelem = 1000
        nblocks = 10

        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nelem / nblocks)
        tokens = [(a, a + tkn_size)
                  for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        keys = ["partid"]
        values = ["data"]

        # CREATE TABLE test.bulk(partid int PRIMARY KEY, data text);
        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tokens,
                       keys, values, {
                           'cache_size': '10',
                           'writer_buffer': 20
                       })

        # Write data
        someid = None
        i = 0
        while i < nelem:
            u = uuid.uuid4()  # ('81da81e8-1914-11e7-908d-ecf4bb4c66c4')
            cache.put_row([u], [i])
            if i == nelem / 2:
                someid = u
            i += 1

        # by recreating the cache we wait until all the data is written

        cache = Hcache(self.keyspace, table,
                       "WHERE token(partid)>=? AND token(partid)<?;", tokens,
                       keys, values, {
                           'cache_size': '10',
                           'writer_buffer': 20
                       })
        # Read data
        itera = cache.iteritems(10)
        found = False
        counter = 0
        while True:
            try:
                L = uuid.UUID(itera.get_next()[0])
                if L == someid:
                    found = True
            except StopIteration:
                break
            counter = counter + 1

        self.assertEqual(counter, nelem)
        self.assertTrue(found)
示例#11
0
    def test_get_row_key_error(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test check the hcache sets a key error when the key we asked doesnt exist
        Analyzes:
        - Hcache
        - Get_row (returning KeyError)
        ''' ''''''

        table = 'particle'
        num_keys = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        token_ranges = [(8070430489100699999, 8070450532247928832)]

        non_existent_keys = 10

        cache_size = num_keys + non_existent_keys

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort
        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]
        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       {'cache_size': cache_size})

        # Access the cache, which is empty and queries cassandra to retrieve the data
        t1 = time.time()
        error_counter = 0
        for pk in xrange(0, num_keys + non_existent_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                error_counter = error_counter + 1

        print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format(
            unicode(str(num_keys), 'utf-8'),
            unicode(str(time.time() - t1), 'utf-8'),
            unicode(str(error_counter), 'utf-8'),
            unicode(str(non_existent_keys), 'utf-8'))

        self.assertEqual(error_counter, non_existent_keys)

        # Access the cache, which has already all the data and will ask cassandra only if
        # the keys asked are not present
        t1 = time.time()
        error_counter = 0
        for pk in xrange(0, num_keys + non_existent_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                error_counter = error_counter + 1

        print 'Retrieved {0} keys in {1} seconds. {2} keys weren\'t found, {3} keys weren\'t supposed to be found'.format(
            unicode(str(num_keys), 'utf-8'),
            unicode(str(time.time() - t1), 'utf-8'),
            unicode(str(error_counter), 'utf-8'),
            unicode(str(non_existent_keys), 'utf-8'))

        self.assertEqual(error_counter, non_existent_keys)
示例#12
0
    def test_get_row(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a set of particles, performing get_row operations
        
        Analyzes:
        - HCache (multiple reads of the same key)
        - Get_row
        ''' ''''''

        table = 'particle'
        num_keys = 10001

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        token_ranges = []

        cache_size = 10001

        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]

        cache_config = {'cache_size': cache_size}

        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       cache_config)

        # clustering key
        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]

        print 'time - load C++ cache with cassandra data: ', time.time() - t1

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        # print 'items in res: ',len(result)
        print 'time - read data from C++ cache: ', time.time() - t1

        py_dict = {}
        cache = Hcache(self.keyspace, table, "",
                       [(8070430489100699999, 8070450532247928832)],
                       ["partid", "time"], ["ciao", "x", "y", "z"],
                       {'cache_size': num_keys})

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = cache.get_row([pk, ck])
                py_dict[(pk, ck)] = result
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        print 'time - load data into python dict: ', time.time() - t1
        # print 'size ', len(py_dict)
        # print 'items in res: ',len(py_dict[1])

        t1 = time.time()
        for pk in xrange(0, num_keys):
            ck = pk * 10
            try:
                result = py_dict[(pk, ck)]
                self.assertEqual(len(result), len(values))
            except KeyError as e:
                print "Error when retrieving value from cache:", e, [pk, ck]
        print 'time - read data from the python dict: ', time.time() - t1
示例#13
0
        t_f = pow(-2, 63)  # Token begin range
        t_t = pow(2, 63) - 1
        # Token blocks
        tkn_size = (t_t - t_f) / (nparts / p)
        tkns = [(a, a + tkn_size)
                for a in xrange(t_f, t_t - tkn_size, tkn_size)]

        keys = ["partid", "time"]
        values = ["x"]

        hcache_config = {'cache_size': '100', 'writer_buffer': 20}

        token_query = "WHERE token(partid)>=? AND token(partid)<?;"

        cache = Hcache(self.keyspace, table, token_query, tkns, keys, values,
                       hcache_config)

        hiter_config = {"prefetch_size": 100, "update_cache": "yes"}

        hiter = cache.iteritems(hiter_config)

        count = 0
        start = time.time()
        while True:
            try:
                i = hiter.get_next()
                self.assertEqual(len(i), len(keys) + len(values))
            except StopIteration:
                break
            count += 1
示例#14
0
class Hfetch_Tests(unittest.TestCase):
    keyspace = "hnumpy_test"
    contact_names = ['127.0.0.1']
    nodePort = 9042
    cluster = Cluster(contact_names, port=nodePort)
    session = cluster.connect()

    @classmethod
    def setUpClass(cls):
        cls.session.execute(
            "CREATE KEYSPACE IF NOT EXISTS %s WITH replication "
            "= {'class': 'SimpleStrategy', 'replication_factor': 1};" %
            cls.keyspace)
        cls.session.execute(
            "CREATE TYPE IF NOT EXISTS %s.numpy_meta(dims frozen<list<int>>,type int,type_size int);"
            % cls.keyspace)

    @classmethod
    def tearDownClass(cls):
        #self.session.execute("DROP KEYSPACE IF EXISTS %s;" % cls.keyspace)
        pass

    def test_simple_memory(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        import numpy as np
        '''''' '''
        
        Analyzes:
        
        ''' ''''''
        dims = 2
        elem_dim = 4096

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except RuntimeError, e:
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        table = "arrays_numpies"

        self.session.execute("DROP TABLE if exists %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));"
            % (self.keyspace, table))

        storage_id = uuid.uuid3(uuid.NAMESPACE_DNS,
                                self.keyspace + '.' + table)
        time.sleep(5)
        a = Hcache(self.keyspace, table, storage_id, [],
                   ['storage_id', 'cluster_id', 'block_id'], [{
                       'name': "payload",
                       'type': 'numpy'
                   }], {})

        #prepare data

        bigarr = np.arange(pow(elem_dim, dims)).reshape(elem_dim, elem_dim)

        print 'To be written '
        keys = [storage_id, -1, -1]
        values = [bigarr.astype('i')]
        print values
        #insert
        a.put_row(keys, values)

        #delete is a blocking op which waits the data to be flushed
        del a

        a = Hcache(self.keyspace, table, storage_id, [],
                   ["storage_id", 'cluster_id', 'block_id'], [{
                       "name": "payload",
                       "type": "numpy"
                   }], {})
        #retrieve
        result = a.get_row(keys)
        print 'Retrieved from cassandra'
        print result
        if np.array_equal(bigarr, result[0]):
            print 'Created and retrieved are equal'
        else:
            self.fail('Created and retrieved ndarrays differ')
        self.session.execute("DROP TABLE %s.%s;" % (self.keyspace, table))
示例#15
0
            print e
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        table = "arrays_numpies"

        self.session.execute("DROP TABLE if exists %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(storage_id uuid, cluster_id int, block_id int, payload blob,PRIMARY KEY((storage_id,cluster_id),block_id));"
            % (self.keyspace, table))
        storage_id = uuid.uuid3(uuid.NAMESPACE_DNS,
                                self.keyspace + '.' + table)
        time.sleep(5)
        a = Hcache(self.keyspace, table, storage_id, [],
                   ['storage_id', 'cluster_id', 'block_id'], [{
                       'name': "payload",
                       'type': 'numpy'
                   }], {})

        #prepare data
        bigarr = np.arange(pow(elem_dim,
                               dims)).reshape(elem_dim, elem_dim, elem_dim)

        keys = [storage_id, -1, -1]
        values = [bigarr.astype('i')]

        #insert
        a.put_row(keys, values)

        # othw we ask for the row before it has been processed
        time.sleep(2)
示例#16
0
文件: hdict.py 项目: him-28/hecuba
    def make_persistent(self, name):
        """
        Method to transform a StorageDict into a persistent object.
        This will make it use a persistent DB as the main location
        of its data.
        Args:
            name:
        """
        if self._is_persistent:
            raise AlreadyPersistentError(
                "This StorageDict is already persistent [Before:{}.{}][After:{}]",
                self._ksp, self._table, name)
        self._is_persistent = True
        (self._ksp, self._table) = self._extract_ks_tab(name)

        if self._storage_id is None:
            self._storage_id = uuid.uuid3(uuid.NAMESPACE_DNS,
                                          self._ksp + '.' + self._table)
        self._build_args = self._build_args._replace(
            storage_id=self._storage_id, name=self._ksp + "." + self._table)
        self._store_meta(self._build_args)
        if config.id_create_schema == -1:
            query_keyspace = "CREATE KEYSPACE IF NOT EXISTS %s WITH replication = %s" % (
                self._ksp, config.replication)
            try:
                log.debug('MAKE PERSISTENCE: %s', query_keyspace)
                config.session.execute(query_keyspace)
            except Exception as ex:
                log.warn("Error creating the StorageDict keyspace %s, %s",
                         (query_keyspace), ex)
                raise ex

        for key, value in dict.iteritems(self):
            if issubclass(value.__class__, IStorage):
                # new name as ksp+table+obj_class_name
                val_name = self._ksp + '.' + self._table + type(
                    value).__name__.lower()
                value.make_persistent(val_name)

        columns = self._primary_keys + self._columns
        for ind, entry in enumerate(columns):
            n = StorageDict._other_case.match(entry[1])
            if n is not None:
                iter_type, intra_type = n.groups()
            else:
                iter_type = entry[1]
            if iter_type not in IStorage._basic_types:
                columns[ind] = entry[0], 'uuid'

        pks = map(lambda a: a[0], self._primary_keys)
        query_table = "CREATE TABLE IF NOT EXISTS %s.%s (%s, PRIMARY KEY (%s));" \
                      % (self._ksp,
                         self._table,
                         ",".join("%s %s" % tup for tup in columns),
                         str.join(',', pks))
        try:
            log.debug('MAKE PERSISTENCE: %s', query_table)
            config.session.execute(query_table)
        except Exception as ex:
            log.warn("Error creating the StorageDict table: %s %s",
                     query_table, ex)
            raise ex
        key_names = map(lambda a: a[0].encode('UTF8'), self._primary_keys)
        column_names = self._columns

        self._hcache_params = (self._ksp, self._table, self._storage_id,
                               self._tokens, key_names,
                               map(lambda x: {
                                   "name": x[0],
                                   "type": x[1]
                               }, column_names), {
                                   'cache_size': config.max_cache_size,
                                   'writer_par': config.write_callbacks_number,
                                   'write_buffer': config.write_buffer_size
                               })
        log.debug("HCACHE params %s", self._hcache_params)
        self._hcache = Hcache(*self._hcache_params)
        # Storing all in-memory values to cassandra
        for key, value in dict.iteritems(self):
            self._hcache.put_row(self._make_key(key), self._make_value(value))
        if hasattr(self, '_indexed_args') and self._indexed_args is not None:
            index_query = 'CREATE CUSTOM INDEX IF NOT EXISTS ' + self._table + '_idx ON '
            index_query += self._ksp + '.' + self._table + ' (' + str.join(
                ',', self._indexed_args) + ') '
            index_query += "using 'es.bsc.qbeast.index.QbeastIndex';"
            try:
                config.session.execute(index_query)
            except Exception as ex:
                log.error("Error creating the Qbeast custom index: %s %s",
                          index_query, ex)
                raise ex
示例#17
0
    def test_delete_row(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over a set of particles, performing get_row operations

        Analyzes:
        - HCache
        - Get_row (setting TypeError properly)
        ''' ''''''

        table = 'particle'
        num_keys = 100  # num keys must be multiple of expected_errors
        expected_errors = 10

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE IF NOT EXISTS %s.%s(partid int, time float, ciao text,"
            "x float, y float, z float, PRIMARY KEY(partid,time));" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in
                [i, i / .1, i / .2, i / .3, i / .4, "'" + str(i * 60) + "'"])
            self.session.execute(
                "INSERT INTO %s.%s(partid , time , x, y , z,ciao ) VALUES (%s)"
                % (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        token_ranges = []

        cache_size = 1

        keys = ["partid", "time"]
        values = ["ciao", "x", "y", "z"]

        cache_config = {'cache_size': cache_size}

        cache = Hcache(self.keyspace, table, "", token_ranges, keys, values,
                       cache_config)
        pk = 0
        ck = pk * 10

        try:
            result = cache.get_row([pk, ck])
            self.assertEqual(len(result), len(values))
        except KeyError as e:
            self.fail("Error when retrieving value from cache: " + str(e) +
                      " -- " + str([pk, ck]))

        try:
            result = cache.delete_row([pk, ck])
        except KeyError as e:
            self.fail("Error when deleteing entry from cache: " + str(e) +
                      " -- " + str([pk, ck]))

        try:
            result = cache.get_row([pk, ck])
            self.fail(
                "Error when retrieving value from cache, the entry shouldnt exist"
            )
        except KeyError as e:
            pass
示例#18
0
    def test_iterators(self):
        from hfetch import connectCassandra
        from hfetch import Hcache
        '''''' '''
        This test iterates over some text and check coherency between hcache and hiter
        
        Analyzes:
        - HCache
        - Get_row (read text)
        - Iteritems (read text)
        ''' ''''''

        table = "words"
        num_keys = 20

        self.session.execute("DROP TABLE IF EXISTS %s.%s;" %
                             (self.keyspace, table))
        self.session.execute(
            "CREATE TABLE %s.%s(position int PRIMARY KEY, wordinfo text);" %
            (self.keyspace, table))

        for i in xrange(0, num_keys):
            vals = ','.join(
                str(e) for e in [
                    i, "'someRandomTextForTesting purposes - " + str(i * 60) +
                    "'"
                ])
            self.session.execute(
                "INSERT INTO %s.%s(position , wordinfo ) VALUES (%s)" %
                (self.keyspace, table, vals))

        try:
            connectCassandra(self.contact_names, self.nodePort)
        except Exception:
            print 'can\'t connect, verify the contact points and port', self.contact_names, self.nodePort

        tkns = [(pow(-2, 63) + 1, pow(2, 63) - 1)]
        keys = ["position"]
        values = ["wordinfo"]
        hcache_config = {'cache_size': 100, 'writer_buffer': 20}

        cache = Hcache(self.keyspace, table,
                       "WHERE token(position)>=? AND token(position)<?;", tkns,
                       keys, values, hcache_config)

        iter_config = {"prefetch_size": 100, "update_cache": "yes"}
        myIter = cache.iteritems(iter_config)

        data = []
        for i in xrange(0, 10):
            data.append(myIter.get_next())

        assert (len(data) > 0)
        first_data = data[0]

        assert (len(first_data) == 2)
        first_key = [first_data[0]]

        assert (type(first_key[0]) == int)
        somedata = cache.get_row(first_key)
        # self.assertEqual((first_key + somedata), first_data)
        assert ((first_key + somedata) == first_data)

        count = len(data)

        while True:
            try:
                i = myIter.get_next()
            except StopIteration:
                print 'End of data, items read: ', count, ' with value ', i
                break
            count = count + 1

        print 'data was: \n', data