Пример #1
0
 def test_set(self):
     b = BitSet()
     self.assertTrue(b.set(0))
     self.assertTrue(b.set(1))
     self.assertTrue(b.set(2))
     self.assertTrue(b.set(3))
     self.assertFalse(b.set(1))
Пример #2
0
 def test_generator(self):
     b = BitSet()
     b.set(1)
     b.set(4)
     b.set(10)
     b.set(100000)
     b.set(12323131)
     self.assertEqual(list(b), [1, 4, 10, 100000, 12323131])
Пример #3
0
 def get_ix(self, column_name, key):
     _, idb, _, _ = self.dbs[column_name]
     bitset = BitSet()
     try:
         data = idb.get(self.txn, key)
         bitset.loads(data)
     except:
         pass
     return bitset
Пример #4
0
 def get_ix(self, column_name, key):
     _, idb, _, _ = self.dbs[column_name]
     bitset = BitSet()
     try:
         data = idb.get(self.txn, key)
         bitset.loads(data)
     except:
         pass
     return bitset
Пример #5
0
 def bit_eq(self, ix, key):
     _, idb, _, column = self.dbs[ix]
     rval = BitSet()
     zkey = self._vid_for_value(column, key)
     if zkey is not None:
         val = idb.get(self.txn, zkey)
         if val is not None:
             rval.loads(val)
     return rval
Пример #6
0
 def bit_eq(self, ix, key):
     _, idb, _, column = self.dbs[ix]
     rval = BitSet()
     zkey = self._vid_for_value(column, key)
     if zkey is not None:
         val = idb.get(self.txn, zkey)
         if val is not None:
             rval.loads(val)
     return rval
Пример #7
0
 def test_contains(self):
     b = BitSet()
     b.set(1)
     b.set(4)
     b.set(10)
     b.set(100000)
     b.set(12323131)
     for i in [1, 4, 10, 100000, 12323131]:
         self.assertTrue(i in b)
Пример #8
0
 def test_marble_insert(self):
     #  test general infomation
     self.assertEqual(self.n_inserted, len(_ALBUMS))
     self.assertEqual(_NPARTITIONS, len(self.files))
     part_id = {}
     #  test that each sub db is fine
     for date, file in self.files.iteritems():
         env, txn, dbs, meta = self.marble._open(file)
         #  check meta db
         self.assertTrue(meta.contains(txn, "_vid_nodes"))
         self.assertTrue(meta.contains(txn, "_vid_kids"))
         self.assertTrue(meta.contains(txn, "_vid16_nodes"))
         self.assertTrue(meta.contains(txn, "_vid16_kids"))
         self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections"))
         self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date"))
         self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS))
         self.assertEqual(meta.get(txn, "_pdata"), ujson.dumps(date))
         vid_nodes, _ = meta.get_raw(txn, '_vid_nodes')
         vid_kids, _ = meta.get_raw(txn, '_vid_kids')
         vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0))
         vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0))
         #  check subdb, subinddb
         part_id[date] = 1
         for name, (db, ind_db, _, column, _) in dbs.iteritems():
             if name == "_count":
                 continue
             bitmaps = {}
             part_id[date] = 1
             for album in self.albums:
                 if date == album[_PARTITIONS]:  # match the partition
                     value = album[name]
                     i = part_id[album[_PARTITIONS]]
                     part_id[album[_PARTITIONS]] += 1
                     if column.is_trie:
                         if column.rtrie_indicator == mdb.MDB_UINT_16:
                             val = rtrie.vid_for_value(
                                 vid16_nodes, vid16_kids, value)
                         else:
                             val = rtrie.vid_for_value(
                                 vid_nodes, vid_kids, value)
                     elif column.is_lz4:
                         val = clz4.compress(value)
                     else:
                         val = value
                     # self.assertEqual(db.get(txn, i), val)
                     if ind_db is not None:
                         #  row_id should be in bitmap too
                         if val in bitmaps:
                             bitmap = bitmaps[val]
                         else:
                             bitmap = BitSet()
                             bitmap.loads(ind_db.get(txn, val))
                             bitmaps[val] = bitmap
                         self.assertTrue(i in bitmap)
         txn.commit()
         env.close()
Пример #9
0
 def _bit_op(self, val, op):
     rval = BitSet()
     it = op(self.txn, val)
     for _, v in it:
         if v is None:
             continue
         bitset = BitSet()
         bitset.loads(v)
         rval |= bitset
     return rval
Пример #10
0
 def _bit_op(self, val, op):
     rval = BitSet()
     it = op(self.txn, val)
     for _, v in it:
         if v is None:
             continue
         bitset = BitSet()
         bitset.loads(v)
         rval |= bitset
     return rval
Пример #11
0
 def test_dumps_loads(self):
     b = BitSet()
     self.assertTrue(b.set(0))
     self.assertTrue(b.set(1))
     self.assertTrue(b.set(4))
     self.assertTrue(b.set(8))
     self.assertTrue(b.set(16))
     s = BitSet()
     s.loads(b.dumps())
     self.assertEqual(b, s)
Пример #12
0
 def test_logical_not(self):
     b = BitSet()
     b.set(0)
     b.set(1)
     b.set(8)
     b.set(9)
     c = ~b
     # test the logical not doesn't generate any numbers that are greater
     # than 9 in this case
     self.assertEqual(list(c), [2, 3, 4, 5, 6, 7])
Пример #13
0
 def test_marble_insert(self):
     #  test general infomation
     self.assertEqual(self.n_inserted, len(_ALBUMS))
     self.assertEqual(_NPARTITIONS, len(self.files))
     part_id = {}
     #  test that each sub db is fine
     for date, file in self.files.iteritems():
         env, txn, dbs, meta = self.marble._open(file)
         #  check meta db
         self.assertTrue(meta.contains(txn, "_vid_nodes"))
         self.assertTrue(meta.contains(txn, "_vid_kids"))
         self.assertTrue(meta.contains(txn, "_vid16_nodes"))
         self.assertTrue(meta.contains(txn, "_vid16_kids"))
         self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections"))
         self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date"))
         self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS))
         self.assertEqual(meta.get(txn, "_pdata"), ujson.dumps(date))
         vid_nodes, _ = meta.get_raw(txn, '_vid_nodes')
         vid_kids, _ = meta.get_raw(txn, '_vid_kids')
         vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0))
         vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0))
         #  check subdb, subinddb
         part_id[date] = 1
         for name, (db, ind_db, _, column, _) in dbs.iteritems():
             if name == "_count":
                 continue
             bitmaps = {}
             part_id[date] = 1
             for album in self.albums:
                 if date == album[_PARTITIONS]:  # match the partition
                     value = album[name]
                     i = part_id[album[_PARTITIONS]]
                     part_id[album[_PARTITIONS]] += 1
                     if column.is_trie:
                         if column.rtrie_indicator == mdb.MDB_UINT_16:
                             val = rtrie.vid_for_value(vid16_nodes, vid16_kids, value)
                         else:
                             val = rtrie.vid_for_value(vid_nodes, vid_kids, value)
                     elif column.is_lz4:
                         val = clz4.compress(value)
                     else:
                         val = value
                     # self.assertEqual(db.get(txn, i), val)
                     if ind_db is not None:
                         #  row_id should be in bitmap too
                         if val in bitmaps:
                             bitmap = bitmaps[val]
                         else:
                             bitmap = BitSet()
                             bitmap.loads(ind_db.get(txn, val))
                             bitmaps[val] = bitmap
                         self.assertTrue(i in bitmap)
         txn.commit()
         env.close()
Пример #14
0
def mdb_fetch(key, txn=None, ixdb=None):
    from pyebset import BitSet
    try:
        bitmaps = ixdb.get(txn, key)
    except:
        bitmaps = None

    if bitmaps is not None:
        bitset = BitSet()
        bitset.loads(bitmaps)
        return bitset
    return None
Пример #15
0
def mdb_fetch(key, txn=None, ixdb=None):
    from pyebset import BitSet
    try:
        bitmaps = ixdb.get(txn, key)
    except:
        bitmaps = None

    if bitmaps is not None:
        bitset = BitSet()
        bitset.loads(bitmaps)
        return bitset
    return None
Пример #16
0
 def test_contains(self):
     b = BitSet()
     b.set(1)
     b.set(4)
     b.set(10)
     b.set(100000)
     b.set(12323131)
     for i in [1, 4, 10, 100000, 12323131]:
         self.assertTrue(i in b)
Пример #17
0
class Tablet(object):
    def __init__(self, l=()):
        self.l = BitSet()
        for i in l:
            self.l.set(i)

    def iter_all(self):
        return iter(self.l)

    def bit_eq(self, col, other):
        b = BitSet()
        for i in self.l:
            if i == other:
                b.set(i)
        return b

    def bit_ne(self, col, other):
        b = BitSet()
        for i in self.l:
            if i != other:
                b.set(i)
        return b

    def bit_lt(self, col, other):
        b = BitSet()
        for i in self.l:
            if i < other:
                b.set(i)
        return b

    def bit_gt(self, col, other):
        b = BitSet()
        for i in self.l:
            if i > other:
                b.set(i)
        return b

    def bit_ge(self, col, other):
        b = BitSet()
        for i in self.l:
            if i >= other:
                b.set(i)
        return b

    def bit_le(self, col, other):
        b = BitSet()
        for i in self.l:
            if i <= other:
                b.set(i)
        return b
Пример #18
0
class Tablet(object):
    def __init__(self, l=()):
        self.l = BitSet()
        for i in l:
            self.l.set(i)

    def iter_all(self):
        return iter(self.l)

    def bit_eq(self, col, other):
        b = BitSet()
        for i in self.l:
            if i == other:
                b.set(i)
        return b

    def bit_ne(self, col, other):
        b = BitSet()
        for i in self.l:
            if i != other:
                b.set(i)
        return b

    def bit_lt(self, col, other):
        b = BitSet()
        for i in self.l:
            if i < other:
                b.set(i)
        return b

    def bit_gt(self, col, other):
        b = BitSet()
        for i in self.l:
            if i > other:
                b.set(i)
        return b

    def bit_ge(self, col, other):
        b = BitSet()
        for i in self.l:
            if i >= other:
                b.set(i)
        return b

    def bit_le(self, col, other):
        b = BitSet()
        for i in self.l:
            if i <= other:
                b.set(i)
        return b
Пример #19
0
 def bit_eq_ex(self, ix, keys):
     from collections import Iterable
     _, idb, _, column = self.dbs[ix]
     rval = BitSet()
     for key in keys:
         if isinstance(key, Iterable) and not isinstance(key, (basestring, unicode)):
             # in case the key is a composite object, just grab the first one
             key = key[0]
         zkey = self._vid_for_value(column, key)
         if zkey is not None:
             val = idb.get(self.txn, zkey)
             if val is not None:
                 bitset = BitSet()
                 bitset.loads(val)
                 rval |= bitset
     return rval
Пример #20
0
 def bit_ne(self, ix, key):
     _, idb, _, column = self.dbs[ix]
     rval = BitSet()
     key = self._vid_for_value(column, key)
     if key is not None:
         val = idb.get(self.txn, key)
         if val is not None:
             rval.loads(val)
             rval |= ZERO_BS
             rval.set(self.number_rows)
             rval.lnot_inplace()
     return rval
Пример #21
0
 def bit_ne(self, ix, key):
     _, idb, _, column = self.dbs[ix]
     rval = BitSet()
     key = self._vid_for_value(column, key)
     if key is not None:
         val = idb.get(self.txn, key)
         if val is not None:
             rval.loads(val)
             rval |= ZERO_BS
             rval.set(self.number_rows)
             rval.lnot_inplace()
     return rval
Пример #22
0
 def test_logical_not(self):
     b = BitSet()
     b.set(0)
     b.set(1)
     b.set(8)
     b.set(9)
     c = ~b
     # test the logical not doesn't generate any numbers that are greater
     # than 9 in this case
     self.assertEqual(list(c), [2, 3, 4, 5, 6, 7])
Пример #23
0
 def test_logical_not_1(self):
     b = BitSet()
     b.set(0)
     b.set(1)
     b.set(7)
     b.set(8)
     c = ~b
     # test the logical not doesn't generate any numbers that are greater
     # than 9 in this case
     self.assertEqual(list(c), [2, 3, 4, 5, 6])
     d = ~c
     self.assertListEqual(list(d), [0, 1, 7, 8])
Пример #24
0
    def test_lru(self):
        def get(db, txn, key):
            try:
                return db.get(txn, key)
            except:
                return None

        env = mdb.Env('/tmp/lru_test',
                      flags=mdb.MDB_WRITEMAP | mdb.MDB_NOSYNC
                      | mdb.MDB_NOSUBDIR)
        txn = env.begin_txn()
        ixdb = env.open_db(txn, 'ix', flags=mdb.MDB_CREATE)

        lru = LRUDict.getDict(5, partial(mdb_fetch, txn=txn, ixdb=ixdb),
                              partial(mdb_evict, txn=txn, ixdb=ixdb))

        lru.set('hello', BitSet())
        lru.set('goodbye', BitSet())
        lru.set('amine', BitSet())
        lru.set('solution', BitSet())
        lru.set('lsd', BitSet())
        self.assertEqual(len(lru._getContents()), 5)

        lru.set('proxy', BitSet())
        store = lru._getContents()
        self.assertNotIn('hello', store)
        self.assertIsNotNone(get(ixdb, txn, 'hello'))
        self.assertEqual(len(store), 5)

        bitmap = lru['hello']
        store = lru._getContents()
        self.assertIn('hello', store)
        self.assertEqual(len(store), 5)
        self.assertIsInstance(bitmap, BitSet)
        self.assertIsNone(lru.get('skibiddles'))

        # test eviction order
        self.assertIsNotNone(
            lru.get('goodbye')
        )  # this now should 'reset' goodbye so that it won't be evicted
        lru.set('whammy bar', BitSet())  # amine should be evicted
        store = lru._getContents()
        self.assertNotIn('amine', store)
        self.assertIn('goodbye', store)

        txn.commit()
        env.close()
Пример #25
0
    def test_marble_stream_bit_ops(self):
        stream = MarbleStream(self.files["1992-10-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1992-10-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "R&R")
        bs = BitSet()
        for i in range(1, 5):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()

        stream = MarbleStream(self.files["1986-01-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1986-01-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "SoundTrack")
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq and not-eq
        bitset = stream.bit_eq("rating", 4)
        bs = BitSet()
        bs.set(4)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        # test for less_than and less_eq
        bitset = stream.bit_ge("rating", 3)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 3)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 5)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 5)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 4)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 4)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 4)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()
Пример #26
0
    def test_logical_ops(self):
        b = BitSet()
        b.set(0)
        b.set(1)
        b.set(4)
        b.set(8)
        b.set(16)
        bb = BitSet()
        bb.set(0)
        bb.set(1)
        bb.set(4)
        bb.set(9)
        cc = BitSet()
        cc.set(0)
        cc.set(1)
        cc.set(4)
        cc.set(8)
        cc.set(9)
        cc.set(16)
        dd = BitSet()
        dd.set(0)
        dd.set(1)
        dd.set(4)
        ee = BitSet()
        ee.set(2)
        ee.set(3)

        la = b & bb
        lo = b | bb
        ln = ~ dd
        ll = ~ ln
        self.assertEqual(lo, cc)
        self.assertNotEqual(la, dd)
        self.assertEqual(list(ln), list(ee))
        self.assertEqual(len(b), 5)
        self.assertEqual(len(bb), 4)
        self.assertEqual(len(cc), 6)
        self.assertEqual(len(dd), 3)
        self.assertEqual(len(ee), 2)
        self.assertEqual(len(la), 3)
        self.assertEqual(len(lo), 6)
        self.assertEqual(len(ln), 2)
        self.assertEqual(len(ll), 3)
Пример #27
0
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names, limit):
    from disco import util
    from hustle.core.marble import Expr, MarbleStream
    from itertools import izip, repeat, islice, imap
    from sys import maxint
    from pyebset import BitSet

    empty = ()

    try:
        scheme, netloc, rest = util.urlsplit(url)
    except Exception as e:
        msg = "Error handling hustle_input_stream for %s. %s" % (url, e)
        raise util.DataError(msg, url)

    fle = util.localize(rest, disco_data=params._task.disco_data,
                        ddfs_data=params._task.ddfs_data)

    otab = None
    try:
        otab = MarbleStream(fle)
        bitmaps = {}

        for index, where in enumerate(wheres):
            # do not process where clauses that have nothing to do with this marble
            if where._name == otab.marble._name:
                if type(where) is Expr and not where.is_partition:
                    bm = where(otab)
                    if limit != maxint:
                        bs = BitSet()
                        for i in islice(bm, 0, limit):
                            bs.set(i)
                        bitmaps[index] = (bs, len(bs))
                    else:
                        bitmaps[index] = (bm, len(bm))
                else:
                    # it is either the table itself, or a partition expression.
                    # Either way, returns the entire table
                    if limit != maxint:
                        bs = BitSet()
                        for i in islice(otab.iter_all(), 0, limit):
                            bs.set(i)
                        bitmaps[index] = (bs, len(bs))
                    else:
                        bitmaps[index] = (otab.iter_all(), otab.number_rows)

        for index, (bitmap, blen) in bitmaps.iteritems():
            prefix_gen = [repeat(index, blen)] if gen_where_index else []

            # row_iter = prefix_gen + \
                # [otab.mget(col, bitmap) if col is not None else repeat(None, blen)
                 # for col in key_names[index]]
            row_creators = []
            for col, column_fn in key_names[index]:
                if col is not None:
                    if column_fn is None:
                        row_creators.append(otab.mget(col, bitmap))
                    else:
                        row_creators.append(imap(column_fn, otab.mget(col, bitmap)))
                else:
                    row_creators.append(repeat(None, blen))
            row_iter = prefix_gen + row_creators

            for row in izip(*row_iter):
                yield row, empty
    finally:
        if otab:
            otab.close()
Пример #28
0
 def test_eq_ne(self):
     b = BitSet()
     b.set(1)
     b.set(2)
     bb = BitSet()
     bb.set(1)
     bb.set(2)
     cc = BitSet()
     cc.set(2)
     cc.set(3)
     self.assertTrue(b == bb)
     self.assertTrue(bb != cc)
Пример #29
0
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index,
                        key_names, limit):
    from disco import util
    from hustle.core.marble import Expr, MarbleStream
    from itertools import izip, repeat, islice, imap
    from sys import maxint
    from pyebset import BitSet

    empty = ()

    try:
        scheme, netloc, rest = util.urlsplit(url)
    except Exception as e:
        msg = "Error handling hustle_input_stream for %s. %s" % (url, e)
        raise util.DataError(msg, url)

    fle = util.localize(rest,
                        disco_data=params._task.disco_data,
                        ddfs_data=params._task.ddfs_data)

    otab = None
    try:
        otab = MarbleStream(fle)
        bitmaps = {}

        for index, where in enumerate(wheres):
            # do not process where clauses that have nothing to do with this marble
            if where._name == otab.marble._name:
                if type(where) is Expr and not where.is_partition:
                    bm = where(otab)
                    if limit != maxint:
                        bs = BitSet()
                        for i in islice(bm, 0, limit):
                            bs.set(i)
                        bitmaps[index] = (bs, len(bs))
                    else:
                        bitmaps[index] = (bm, len(bm))
                else:
                    # it is either the table itself, or a partition expression.
                    # Either way, returns the entire table
                    if limit != maxint:
                        bs = BitSet()
                        for i in islice(otab.iter_all(), 0, limit):
                            bs.set(i)
                        bitmaps[index] = (bs, len(bs))
                    else:
                        bitmaps[index] = (otab.iter_all(), otab.number_rows)

        for index, (bitmap, blen) in bitmaps.iteritems():
            prefix_gen = [repeat(index, blen)] if gen_where_index else []

            # row_iter = prefix_gen + \
            # [otab.mget(col, bitmap) if col is not None else repeat(None, blen)
            # for col in key_names[index]]
            row_creators = []
            for col, column_fn in key_names[index]:
                if col is not None:
                    if column_fn is None:
                        row_creators.append(otab.mget(col, bitmap))
                    else:
                        row_creators.append(
                            imap(column_fn, otab.mget(col, bitmap)))
                else:
                    row_creators.append(repeat(None, blen))
            row_iter = prefix_gen + row_creators

            for row in izip(*row_iter):
                yield row, empty
    finally:
        if otab:
            otab.close()
Пример #30
0
def in_not(obj, invert, expr):
    if expr is None:
        return BitSet()
    return expr(obj, not invert)
Пример #31
0
    def test_marble_stream_bit_ops(self):
        stream = MarbleStream(self.files["1992-10-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1992-10-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "R&R")
        bs = BitSet()
        for i in range(1, 5):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()

        stream = MarbleStream(self.files["1986-01-03"])
        rowid = 1
        # test "name" index
        for album in self.albums:
            if album[_PARTITIONS] != "1986-01-03":
                continue
            bitset = stream.bit_eq("name", album["name"])
            bs = BitSet()
            bs.set(rowid)
            rowid += 1
            for i in bitset:
                self.assertTrue(i in bs)
        # test "genre" index
        bitset = stream.bit_eq("genre", "SoundTrack")
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq and not-eq
        bitset = stream.bit_eq("rating", 4)
        bs = BitSet()
        bs.set(4)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        # test "rating" index
        # test for eq_ex and not_eq_ex
        bitset = stream.bit_eq_ex("rating", [3, 4])
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_eq_ex("rating", [5])
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne_ex("rating", [5])
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ne_ex("rating", [3, 4])
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        # test for less_than and less_eq
        bitset = stream.bit_ge("rating", 3)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 3)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 3)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 3)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 5)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 5)
        bs = BitSet()
        for i in range(1, 7):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 5)
        bs = BitSet()
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 5)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_le("rating", 4)
        bs = BitSet()
        bs.set(4)
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_lt("rating", 4)
        bs = BitSet()
        bs.set(6)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_ge("rating", 4)
        bs = BitSet()
        for i in range(1, 6):
            bs.set(i)
        for i in bitset:
            self.assertTrue(i in bs)

        bitset = stream.bit_gt("rating", 4)
        bs = BitSet()
        for i in range(1, 4):
            bs.set(i)
        bs.set(5)
        for i in bitset:
            self.assertTrue(i in bs)

        stream.close()
Пример #32
0
 def __init__(self, l=()):
     self.l = BitSet()
     for i in l:
         self.l.set(i)
Пример #33
0
 def bit_gt(self, col, other):
     b = BitSet()
     for i in self.l:
         if i > other:
             b.set(i)
     return b
Пример #34
0
    def test_logical_ops(self):
        b = BitSet()
        b.set(0)
        b.set(1)
        b.set(4)
        b.set(8)
        b.set(16)
        bb = BitSet()
        bb.set(0)
        bb.set(1)
        bb.set(4)
        bb.set(9)
        cc = BitSet()
        cc.set(0)
        cc.set(1)
        cc.set(4)
        cc.set(8)
        cc.set(9)
        cc.set(16)
        dd = BitSet()
        dd.set(0)
        dd.set(1)
        dd.set(4)
        ee = BitSet()
        ee.set(2)
        ee.set(3)

        la = b & bb
        lo = b | bb
        ln = ~dd
        self.assertEqual(lo, cc)
        self.assertNotEqual(la, dd)
        self.assertEqual(list(ln), list(ee))
Пример #35
0
def mdb_evict(key, bitset, txn=None, ixdb=None):
    ixdb.put(txn, key, bitset.dumps())


class DUMMY(object):
    tobj = None

    def __getitem__(self, item):
        return self.tobj

_dummy = DUMMY()
_dummy_bitmap = DUMMY()
_dummy.tobj = _dummy_bitmap

ZERO_BS = BitSet()
ZERO_BS.set(0)

setattr(_dummy_bitmap, 'set', (lambda k: k))
setattr(_dummy, 'get', lambda k: _dummy_bitmap)


def kv_decoder(line, kvs=()):
    key, value = line
    return dict(zip(kvs, key + value))


def json_decoder(line):
    return ujson.loads(line)

Пример #36
0
 def bit_ne_ex(self, col, keys):
     rval = BitSet()
     for i in self.l:
         if i not in keys:
             rval.set(i)
     return rval
Пример #37
0
 def bit_le(self, col, other):
     b = BitSet()
     for i in self.l:
         if i <= other:
             b.set(i)
     return b
Пример #38
0
import unittest
from hustle.core.marble import Column
from pyebset import BitSet


ZERO_BS = BitSet()
ZERO_BS.set(0)


class Tablet(object):
    def __init__(self, l=()):
        self.l = BitSet()
        for i in l:
            self.l.set(i)

    def iter_all(self):
        return iter(self.l)

    def bit_eq(self, col, other):
        b = BitSet()
        for i in self.l:
            if i == other:
                b.set(i)
        return b

    def bit_ne(self, col, other):
        b = BitSet()
        for i in self.l:
            if i != other:
                b.set(i)
        return b
Пример #39
0
    def test_logical_ops(self):
        b = BitSet()
        b.set(0)
        b.set(1)
        b.set(4)
        b.set(8)
        b.set(16)
        bb = BitSet()
        bb.set(0)
        bb.set(1)
        bb.set(4)
        bb.set(9)
        cc = BitSet()
        cc.set(0)
        cc.set(1)
        cc.set(4)
        cc.set(8)
        cc.set(9)
        cc.set(16)
        dd = BitSet()
        dd.set(0)
        dd.set(1)
        dd.set(4)
        ee = BitSet()
        ee.set(2)
        ee.set(3)

        la = b & bb
        lo = b | bb
        ln = ~ dd
        self.assertEqual(lo, cc)
        self.assertNotEqual(la, dd)
        self.assertEqual(list(ln), list(ee))