def bit_eq(self, ix, key): _, idb, _, column = self.dbs[ix] rval = BitSet() zkey = self._vid_for_value(column, key) if zkey is not None: val = idb.get(self.txn, zkey) if val is not None: rval.loads(val) return rval
def get_ix(self, column_name, key): _, idb, _, _ = self.dbs[column_name] bitset = BitSet() try: data = idb.get(self.txn, key) bitset.loads(data) except: pass return bitset
def test_marble_insert(self): # test general infomation self.assertEqual(self.n_inserted, len(_ALBUMS)) self.assertEqual(_NPARTITIONS, len(self.files)) part_id = {} # test that each sub db is fine for date, file in self.files.iteritems(): env, txn, dbs, meta = self.marble._open(file) # check meta db self.assertTrue(meta.contains(txn, "_vid_nodes")) self.assertTrue(meta.contains(txn, "_vid_kids")) self.assertTrue(meta.contains(txn, "_vid16_nodes")) self.assertTrue(meta.contains(txn, "_vid16_kids")) self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections")) self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date")) self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS)) self.assertEqual(meta.get(txn, "_pdata"), ujson.dumps(date)) vid_nodes, _ = meta.get_raw(txn, '_vid_nodes') vid_kids, _ = meta.get_raw(txn, '_vid_kids') vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0)) vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0)) # check subdb, subinddb part_id[date] = 1 for name, (db, ind_db, _, column, _) in dbs.iteritems(): if name == "_count": continue bitmaps = {} part_id[date] = 1 for album in self.albums: if date == album[_PARTITIONS]: # match the partition value = album[name] i = part_id[album[_PARTITIONS]] part_id[album[_PARTITIONS]] += 1 if column.is_trie: if column.rtrie_indicator == mdb.MDB_UINT_16: val = rtrie.vid_for_value( vid16_nodes, vid16_kids, value) else: val = rtrie.vid_for_value( vid_nodes, vid_kids, value) elif column.is_lz4: val = clz4.compress(value) else: val = value # self.assertEqual(db.get(txn, i), val) if ind_db is not None: # row_id should be in bitmap too if val in bitmaps: bitmap = bitmaps[val] else: bitmap = BitSet() bitmap.loads(ind_db.get(txn, val)) bitmaps[val] = bitmap self.assertTrue(i in bitmap) txn.commit() env.close()
def test_dumps_loads(self): b = BitSet() self.assertTrue(b.set(0)) self.assertTrue(b.set(1)) self.assertTrue(b.set(4)) self.assertTrue(b.set(8)) self.assertTrue(b.set(16)) s = BitSet() s.loads(b.dumps()) self.assertEqual(b, s)
def _bit_op(self, val, op): rval = BitSet() it = op(self.txn, val) for _, v in it: if v is None: continue bitset = BitSet() bitset.loads(v) rval |= bitset return rval
def test_marble_insert(self): # test general infomation self.assertEqual(self.n_inserted, len(_ALBUMS)) self.assertEqual(_NPARTITIONS, len(self.files)) part_id = {} # test that each sub db is fine for date, file in self.files.iteritems(): env, txn, dbs, meta = self.marble._open(file) # check meta db self.assertTrue(meta.contains(txn, "_vid_nodes")) self.assertTrue(meta.contains(txn, "_vid_kids")) self.assertTrue(meta.contains(txn, "_vid16_nodes")) self.assertTrue(meta.contains(txn, "_vid16_kids")) self.assertEqual(meta.get(txn, "name"), ujson.dumps("Collections")) self.assertEqual(meta.get(txn, "partition"), ujson.dumps("date")) self.assertEqual(meta.get(txn, "fields"), ujson.dumps(_FIELDS)) self.assertEqual(meta.get(txn, "_pdata"), ujson.dumps(date)) vid_nodes, _ = meta.get_raw(txn, '_vid_nodes') vid_kids, _ = meta.get_raw(txn, '_vid_kids') vid16_nodes, _ = meta.get_raw(txn, '_vid16_nodes', (None, 0)) vid16_kids, _ = meta.get_raw(txn, '_vid16_kids', (None, 0)) # check subdb, subinddb part_id[date] = 1 for name, (db, ind_db, _, column, _) in dbs.iteritems(): if name == "_count": continue bitmaps = {} part_id[date] = 1 for album in self.albums: if date == album[_PARTITIONS]: # match the partition value = album[name] i = part_id[album[_PARTITIONS]] part_id[album[_PARTITIONS]] += 1 if column.is_trie: if column.rtrie_indicator == mdb.MDB_UINT_16: val = rtrie.vid_for_value(vid16_nodes, vid16_kids, value) else: val = rtrie.vid_for_value(vid_nodes, vid_kids, value) elif column.is_lz4: val = clz4.compress(value) else: val = value # self.assertEqual(db.get(txn, i), val) if ind_db is not None: # row_id should be in bitmap too if val in bitmaps: bitmap = bitmaps[val] else: bitmap = BitSet() bitmap.loads(ind_db.get(txn, val)) bitmaps[val] = bitmap self.assertTrue(i in bitmap) txn.commit() env.close()
def bit_ne(self, ix, key): _, idb, _, column = self.dbs[ix] rval = BitSet() key = self._vid_for_value(column, key) if key is not None: val = idb.get(self.txn, key) if val is not None: rval.loads(val) rval |= ZERO_BS rval.set(self.number_rows) rval.lnot_inplace() return rval
def mdb_fetch(key, txn=None, ixdb=None): from pyebset import BitSet try: bitmaps = ixdb.get(txn, key) except: bitmaps = None if bitmaps is not None: bitset = BitSet() bitset.loads(bitmaps) return bitset return None
def bit_eq_ex(self, ix, keys): from collections import Iterable _, idb, _, column = self.dbs[ix] rval = BitSet() for key in keys: if isinstance(key, Iterable) and not isinstance(key, (basestring, unicode)): # in case the key is a composite object, just grab the first one key = key[0] zkey = self._vid_for_value(column, key) if zkey is not None: val = idb.get(self.txn, zkey) if val is not None: bitset = BitSet() bitset.loads(val) rval |= bitset return rval