示例#1
0
 def test_select2(self):
     gap = 1
     while gap <= 1024:
         rb = RoaringBitmap(range(0, 100000, gap))
         for k in range(0, 100000 // gap):
             assert rb.select(k) == k * gap
         gap *= 2
示例#2
0
 def test_pickle(self, single):
     for name, data in single:
         rb = RoaringBitmap(data)
         rb_pickled = pickle.dumps(rb, protocol=-1)
         rb_unpickled = pickle.loads(rb_pickled)
         rb._checkconsistency()
         assert rb_unpickled == rb, name
示例#3
0
	def test_select2(self):
		gap = 1
		while gap <= 1024:
			rb = RoaringBitmap(range(0, 100000, gap))
			for k in range(0, 100000 // gap):
				assert rb.select(k) == k * gap
			gap *= 2
示例#4
0
	def test_initrange(self):
		# creates a positive, dense, and inverted block, respectively
		for n in [400, 6000, 61241]:
			ref = set(range(23, n))
			rb = RoaringBitmap(range(23, n))
			rb._checkconsistency()
			assert ref == rb, ('range(23, %d)' % n)
示例#5
0
	def test_aggregateor(self, multi):
		ref = set(multi[0])
		ref.update(*[set(a) for a in multi[1:]])
		rb = RoaringBitmap(multi[0])
		rb.update(*[RoaringBitmap(a) for a in multi[1:]])
		rb._checkconsistency()
		assert rb == ref, name
示例#6
0
	def test_pickle(self, single):
		for name, data in single:
			rb = RoaringBitmap(data)
			rb_pickled = pickle.dumps(rb, protocol=-1)
			rb_unpickled = pickle.loads(rb_pickled)
			rb._checkconsistency()
			assert rb_unpickled == rb, name
示例#7
0
 def test_initrange(self):
     # creates a positive, dense, and inverted block, respectively
     for n in [400, 6000, 61241]:
         ref = set(range(23, n))
         rb = RoaringBitmap(range(23, n))
         rb._checkconsistency()
         assert ref == rb, ('range(23, %d)' % n)
示例#8
0
	def test_rank2(self):
		rb = RoaringBitmap(range(0, 100000, 7))
		rb.update(range(100000, 200000, 1000))
		for k in range(100000):
			assert rb.rank(k) == 1 + k // 7
		for k in range(100000, 200000):
			assert rb.rank(k) == 1 + 100000 // 7 + 1 + (k - 100000) // 1000
示例#9
0
 def test_rank(self, single):
     for name, data in single:
         ref = sorted(set(data))
         rb = RoaringBitmap(data)
         for _ in range(10):
             x = random.choice(ref)
             assert x in rb, name
             assert rb.rank(x) == ref.index(x) + 1, name
示例#10
0
 def test_ixor(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref ^= ref2
         rb ^= rb2
         assert len(ref) == len(rb)
         assert ref == set(rb)
示例#11
0
 def test_ior(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref |= ref2
         rb |= rb2
         assert set(ref) == set(rb)
         assert rb == ref
示例#12
0
 def test_none(self, multi):
     orig = [RoaringBitmap(a) for a in multi]
     orig.insert(4, RoaringBitmap())
     mrb = MultiRoaringBitmap(orig)
     assert len(orig) == len(mrb)
     for rb1, rb2 in zip(orig, mrb):
         assert rb1 == rb2
     assert mrb.intersection([4, 5]) is None
示例#13
0
 def test_issue19(self):
     a = RoaringBitmap()
     b = RoaringBitmap(range(4095))
     c = RoaringBitmap(range(2))
     a |= b
     a |= c
     assert len(a - b - c) == 0
     assert len((b | c) - b - c) == 0
示例#14
0
	def test_ior(self, pair):
		for name, data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			ref |= ref2
			rb |= rb2
			rb._checkconsistency()
			assert rb == ref, name
示例#15
0
 def test_ior(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref |= ref2
         rb |= rb2
         rb._checkconsistency()
         assert rb == ref, name
示例#16
0
	def test_rank(self, single):
		for name, data in single:
			ref = sorted(set(data))
			rb = RoaringBitmap(data)
			for _ in range(10):
				x = random.choice(ref)
				assert x in rb, name
				assert rb.rank(x) == ref.index(x) + 1, name
示例#17
0
 def test_aggregateor(self):
     data = [[random.randint(0, 1000) for _ in range(2000)] for _ in range(10)]
     ref = set(data[0])
     ref.update(*[set(a) for a in data[1:]])
     rb = RoaringBitmap(data[0])
     rb.update(*[RoaringBitmap(a) for a in data[1:]])
     assert ref == set(rb)
     assert rb == ref
示例#18
0
	def test_rank(self, single):
		for data in single:
			ref = sorted(set(data))
			rb = RoaringBitmap(data)
			print(len(rb))
			for _ in range(10):
				x = random.choice(ref)
				assert x in rb
				assert rb.rank(x) == ref.index(x) + 1
示例#19
0
 def test_aggregateor(self):
     data = [[random.randint(0, 1000) for _ in range(2000)]
             for _ in range(10)]
     ref = set(data[0])
     ref.update(*[set(a) for a in data[1:]])
     rb = RoaringBitmap(data[0])
     rb.update(*[RoaringBitmap(a) for a in data[1:]])
     assert ref == set(rb)
     assert rb == ref
示例#20
0
	def test_clamp(self, single):
		for name, data in single:
			a, b = sorted(random.sample(data, 2))
			ref = set(data).intersection(range(a, b))
			rb = RoaringBitmap(data).intersection(range(a, b))
			rb2 = RoaringBitmap(data).clamp(a, b)
			assert a <= rb2.min() and rb2.max() < b, name
			assert ref == rb2, (name, a, b)
			assert rb == rb2, (name, a, b)
示例#21
0
 def test_rank(self, single):
     for data in single:
         ref = sorted(set(data))
         rb = RoaringBitmap(data)
         print(len(rb))
         for _ in range(10):
             x = random.choice(ref)
             assert x in rb
             assert rb.rank(x) == ref.index(x) + 1
示例#22
0
	def test_ixor(self, pair):
		for name, data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			ref ^= ref2
			rb ^= rb2
			rb._checkconsistency()
			assert len(ref) == len(rb), name
			assert ref == rb, name
示例#23
0
	def test_pop(self):
		rb = RoaringBitmap([60748, 28806, 54664, 28597, 58922, 75684, 56364,
			67421, 52608, 55686, 10427, 48506, 64363, 14506, 73077, 59035,
			70246, 19875, 73145, 40225, 58664, 6597, 65554, 73102, 26636,
			74227, 59566, 19023])
		while rb:
			rb.pop()
		rb._checkconsistency()
		assert len(rb) == 0
示例#24
0
 def test_ixor(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         ref ^= ref2
         rb ^= rb2
         rb._checkconsistency()
         assert len(ref) == len(rb), name
         assert ref == rb, name
示例#25
0
def test_fixtures(single):
    for name, data in single:
        rb = RoaringBitmap(data)
        if name == 'many keys':
            assert len(rb._keys()) > 100
        elif name == 'empty':
            assert len(rb) == 0
        else:
            assert name[0].upper() in rb.debuginfo()
示例#26
0
 def test_eq(self, single):
     for data in single:
         ref, ref2 = set(data), set(data)
         rb, rb2 = RoaringBitmap(data), RoaringBitmap(data)
         assert ref == ref2
         assert rb == rb2
         a = ref == ref2
         b = rb == rb2
         assert a == b
示例#27
0
 def test_neq(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert ref != ref2
         assert rb != rb2
         a = ref != ref2
         b = rb != rb2
         assert a == b
示例#28
0
 def test_clamp(self, single):
     for name, data in single:
         a, b = sorted(random.sample(data, 2))
         ref = set(data).intersection(range(a, b))
         rb = RoaringBitmap(data).intersection(range(a, b))
         rb2 = RoaringBitmap(data).clamp(a, b)
         assert a <= rb2.min() and rb2.max() < b, name
         assert ref == rb2, (name, a, b)
         assert rb == rb2, (name, a, b)
示例#29
0
    def test_initrb(self):
        r = RoaringBitmap(range(5))
        i = ImmutableRoaringBitmap(r)
        r = RoaringBitmap(i)
        assert r == i

        i = ImmutableRoaringBitmap(range(5))
        r = RoaringBitmap(i)
        assert r == i
示例#30
0
	def test_jaccard_dist(self, pair):
		for name, data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			assert abs((len(ref & ref2) / float(len(ref | ref2)))
					- rb.intersection_len(rb2)
					/ float(rb.union_len(rb2))) < 0.001, name
			assert abs((1 - (len(ref & ref2) / float(len(ref | ref2))))
					- rb.jaccard_dist(rb2)) < 0.001, name
示例#31
0
	def test_disjoint(self, pair):
		for data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			assert not ref.isdisjoint(ref2)
			assert not rb.isdisjoint(rb2)
			data3 = [a for a in data2 if a not in ref]
			ref3, rb3 = set(data3), RoaringBitmap(data3)
			assert ref.isdisjoint(ref3)
			assert rb.isdisjoint(rb3)
示例#32
0
	def test_disjoint(self, pair):
		for name, data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			refans = ref.isdisjoint(ref2)
			assert rb.isdisjoint(rb2) == refans, name
			data3 = [a for a in data2 if a not in ref]
			ref3, rb3 = set(data3), RoaringBitmap(data3)
			refans2 = ref.isdisjoint(ref3)
			assert rb.isdisjoint(rb3) == refans2, name
示例#33
0
 def test_clamp2(self):
     a = RoaringBitmap([0x00010001])
     b = RoaringBitmap([0x00030003, 0x00050005])
     c = RoaringBitmap([0x00070007])
     x = a | b | c
     assert x.clamp(0, 0x000FFFFF) == x
     assert x.clamp(0x000200FF, 0x000FFFFF) == b | c
     assert x.clamp(0x00030003, 0x000FFFFF) == b | c
     assert x.clamp(0, 0x00060006) == a | b
     assert x.clamp(0, 0x00050006) == a | b
     assert x.clamp(0, 0x00050005) == a | RoaringBitmap([0x00030003])
示例#34
0
	def test_contains(self, single):
		for name, data in single:
			ref = set(data)
			rb = RoaringBitmap(data)
			for a in data:
				assert a in ref, name
				assert a in rb, name
			for a in set(range(20000)) - set(data):
				assert a not in ref, name
				assert a not in rb, name
			rb._checkconsistency()
示例#35
0
 def test_contains(self, single):
     for name, data in single:
         ref = set(data)
         rb = RoaringBitmap(data)
         for a in data:
             assert a in ref, name
             assert a in rb, name
         for a in set(range(20000)) - set(data):
             assert a not in ref, name
             assert a not in rb, name
         rb._checkconsistency()
示例#36
0
 def test_subset(self, pair):
     for data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         assert not ref <= ref2
         assert not set(rb) <= ref2
         assert not rb <= rb2
         k = len(data2) // 2
         ref, rb = set(data2[:k]), RoaringBitmap(data2[:k])
         assert ref <= ref2
         assert set(rb) <= ref2
         assert rb <= rb2
示例#37
0
 def test_subset(self, pair):
     for name, data1, data2 in pair:
         ref, ref2 = set(data1), set(data2)
         rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
         refans = ref <= ref2
         assert (set(rb) <= ref2) == refans, name
         assert (rb <= rb2) == refans, name
         k = len(data2) // 2
         ref, rb = set(data2[:k]), RoaringBitmap(data2[:k])
         refans = ref <= ref2
         assert (set(rb) <= ref2) == refans, name
         assert (rb <= rb2) == refans, name
示例#38
0
def test_phase_2_no_temp_table_from_phase_1():
    cs = CloStream(filter_fn=NO_FILTER_FN)

    transaction = frozenset('CD')
    temp_table = {transaction: 0}

    assert cs.closed_df.loc[0, 'itemset'] == frozenset()
    cs._phase_2(temp_table)

    assert cs.closed_df.values.tolist() == [[frozenset(), 0],
                                            [frozenset('CD'), 1]]
    assert cs.cid_list_map == dict(C=RoaringBitmap([1]), D=RoaringBitmap([1]))
示例#39
0
	def test_discard(self, single):
		for data in single:
			ref = set()
			rb = RoaringBitmap()
			for n in sorted(data):
				ref.add(n)
				rb.add(n)
			for n in sorted(data):
				ref.discard(n)
				rb.discard(n)
			assert len(ref) == 0
			assert len(rb) == 0
			assert set(ref) == set(rb)
			assert rb == ref
示例#40
0
 def test_select(self, single):
     for data in single:
         ref = sorted(set(data))
         rb = RoaringBitmap(data)
         lrb = list(rb)
         idx = [random.randint(0, len(ref)) for _ in range(10)]
         for i in idx:
             assert lrb[i] == ref[i]
             assert rb.select(i) in rb
             assert rb.select(i) == ref[i]
             assert rb.rank(rb.select(i)) - 1 == i
             if rb.select(i) + 1 in rb:
                 assert rb.rank(rb.select(i) + 1) - 1 == i + 1
             else:
                 assert rb.rank(rb.select(i) + 1) - 1 == i
示例#41
0
 def test_clamp(self, multi):
     a, b = sorted(sample(multi[0], 2))
     ref = set.intersection(*[set(x) for x in multi]) & set(range(a, b))
     mrb = MultiRoaringBitmap([RoaringBitmap(x) for x in multi])
     rb = mrb.intersection(list(range(len(mrb))), start=a, stop=b)
     assert a <= rb.min() and rb.max() < b
     assert ref == rb
示例#42
0
 def test_minmax(self):
     rb = RoaringBitmap(range(0, 61440))
     assert rb.min() == 0
     assert rb.max() == 61439
     rb1 = RoaringBitmap(range(0, 61441))
     assert rb1.min() == 0
     assert rb1.max() == 61440
     assert rb1[61440] == 61440
     assert list(rb1)[61440] == 61440
示例#43
0
 def add(self, transaction):
     transaction = frozenset(transaction)
     for item in transaction:
         if item in self.item_to_tids:
             self.item_to_tids[item].add(self.n_transactions)
         else:
             self.item_to_tids[item] = RoaringBitmap([self.n_transactions])
     self.n_transactions += 1
示例#44
0
 def test_aggregateor(self, multi):
     ref = set(multi[0])
     ref.update(*[set(a) for a in multi[1:]])
     rb = RoaringBitmap(multi[0])
     rb.update(*[RoaringBitmap(a) for a in multi[1:]])
     rb._checkconsistency()
     assert rb == ref
示例#45
0
 def test_aggregateand(self, multi):
     ref = set(multi[0])
     ref.intersection_update(*[set(a) for a in multi[1:]])
     rb = RoaringBitmap(multi[0])
     rb.intersection_update(*[RoaringBitmap(a) for a in multi[1:]])
     rb._checkconsistency()
     assert rb == ref, name
示例#46
0
    def test_issue22(self):
        rb = RoaringBitmap(range(0, 61440))
        rb1 = RoaringBitmap(range(0, 61441))
        assert len(rb ^ rb) == 0
        assert len(rb - rb) == 0
        assert len(rb1 ^ rb1) == 0
        assert len(rb1 - rb1) == 0
        assert len(~rb) == 0
        assert len(~rb1) == 0

        rb1 = RoaringBitmap(range(0, 61441))
        assert len(rb ^ rb) == 0
        rb1 ^= rb1
        assert len(rb1) == 0

        rb1 = RoaringBitmap(range(0, 61441))
        rb1 -= rb1
        assert len(rb1) == 0
示例#47
0
 def test_eq(self, multi):
     orig = [RoaringBitmap(a) for a in multi]
     mrb = MultiRoaringBitmap(orig)
     mrb2 = MultiRoaringBitmap(orig)
     mrb3 = MultiRoaringBitmap(orig[1:])
     assert mrb == orig
     assert mrb == mrb2
     assert mrb != orig[1:]
     assert mrb != mrb3
示例#48
0
	def test_select(self, single):
		for data in single:
			ref = sorted(set(data))
			rb = RoaringBitmap(data)
			lrb = list(rb)
			idx = [random.randint(0, len(ref)) for _ in range(10)]
			for i in idx:
				assert lrb[i] == ref[i]
				assert rb.select(i) in rb
				assert rb.select(i) == ref[i]
				assert rb.rank(rb.select(i)) - 1 == i
				if rb.select(i) + 1 in rb:
					assert rb.rank(rb.select(i) + 1) - 1 == i + 1
				else:
					assert rb.rank(rb.select(i) + 1) - 1 == i
示例#49
0
	def test_discard(self, single):
		for name, data in single:
			ref = set()
			rb = RoaringBitmap()
			for n in sorted(data):
				ref.add(n)
				rb.add(n)
			for n in sorted(data):
				ref.discard(n)
				rb.discard(n)
			rb._checkconsistency()
			assert len(ref) == 0, name
			assert len(rb) == 0, name
			assert rb == ref, name
示例#50
0
def _indexfile(filename):
	"""Create bitmap with locations of non-empty lines."""
	result = RoaringBitmap()
	offset = 0
	with open(filename, 'rb') as tmp:
		for line in tmp:
			if not line.isspace():
				result.add(offset)
			offset += len(line)
	result.add(offset)
	return result.freeze()
示例#51
0
	def test_inititerator(self, single):
		for name, data in single:
			ref = set(a for a in data)
			rb = RoaringBitmap(a for a in data)
			rb._checkconsistency()
			assert ref == rb, name
示例#52
0
	def test_andlen(self, pair):
		for data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			assert len(ref & ref2) == rb.intersection_len(rb2)
示例#53
0
	def test_orlen(self, pair):
		for name, data1, data2 in pair:
			ref, ref2 = set(data1), set(data2)
			rb, rb2 = RoaringBitmap(data1), RoaringBitmap(data2)
			assert len(ref | ref2) == rb.union_len(rb2), name
			assert len(rb | rb2) == rb.union_len(rb2), name
示例#54
0
	def test_add(self, single):
		for name, data in single:
			ref = set()
			rb = RoaringBitmap()
			for n in sorted(data):
				ref.add(n)
				rb.add(n)
			assert rb == ref, name
			with pytest.raises(OverflowError):
				rb.add(-1)
				rb.add(1 << 32)
			rb.add(0)
			rb.add((1 << 32) - 1)
			rb._checkconsistency()
示例#55
0
	def test_inittrivial(self):
		data = list(range(5))
		ref = set(data)
		rb = RoaringBitmap(data)
		rb._checkconsistency()
		assert ref == rb
示例#56
0
	def test_add(self, single):
		for data in single:
			ref = set()
			rb = RoaringBitmap()
			for n in sorted(data):
				ref.add(n)
				rb.add(n)
			assert set(ref) == set(rb)
			assert rb == ref
			with pytest.raises(OverflowError):
				rb.add(-1)
				rb.add(1 << 32)
			rb.add(0)
			rb.add((1 << 32) - 1)
示例#57
0
	def test_initunsorted(self, single):
		for name, data in single:
			ref = set(data)
			rb = RoaringBitmap(data)
			rb._checkconsistency()
			assert ref == rb, name