def test_merge_sparse_with_sparse_to_dense(self): hll1 = HyperLogLogPlusPlus(length=16, random_seed=234) hll2 = HyperLogLogPlusPlus(length=16, random_seed=234) for i in range(int(16 * 6 / 2)): hll1.add(i) hll2.add(i + 100) merged_hll = hll1.merge(hll2) self.assertTrue(merged_hll.sparse_mode, 'Merged sketch should be in sparse mode.') self.assertEqual(merged_hll.estimate_cardinality(), 96, 'Estimated cardinality not correct under sparse mode.') hll1.add(1000) merged_hll = hll1.merge(hll2) self.assertFalse(merged_hll.sparse_mode, 'Merged sketch should not be in sparse mode.') self.assertAlmostEqual( merged_hll.estimate_cardinality(), 97, delta=97 * 0.05, msg='Estimated cardinality not correct under dense mode.' )
def test_merge_sparse_with_sparse_to_sparse(self): hll1 = HyperLogLogPlusPlus(length=16, random_seed=234) hll1.add(1) hll2 = HyperLogLogPlusPlus(length=16, random_seed=234) hll2.add(1) merged_hll = hll1.merge(hll2) self.assertTrue(merged_hll.sparse_mode, 'Merged sketch is not in sparse mode.') self.assertTrue(all(hll1.buckets == merged_hll.buckets), 'Merged sketch is not correct.') self.assertSameElements(merged_hll.temp_set, set([1]), 'Temp set is not correct.') self.assertEqual(merged_hll.estimate_cardinality(), 1, 'Estimated cardinality is not correct.')
def test_merge_dense_with_dense(self): hll1 = HyperLogLogPlusPlus(length=16, random_seed=234) hll2 = HyperLogLogPlusPlus(length=16, random_seed=234) for i in range(16 * 6 + 1): hll1.add(i) hll2.add(i + 100) merged_hll = hll1.merge(hll2) self.assertFalse(merged_hll.sparse_mode, 'Merged sketch should not be in sparse mode.') self.assertGreater(sum(hll2.buckets == merged_hll.buckets), 0, 'Merged sketch is not correct.') self.assertSameElements(merged_hll.temp_set, set(), 'Temp set is not correct.') self.assertAlmostEqual( merged_hll.estimate_cardinality(), 194, delta=194 * 0.1 )
def test_merge_sparse_with_dense(self): hll1 = HyperLogLogPlusPlus(length=16, random_seed=234) hll1.add(100) hll2 = HyperLogLogPlusPlus(length=16, random_seed=234) for i in range(16 * 6 + 1): hll2.add(i) merged_hll = hll1.merge(hll2) self.assertFalse(merged_hll.sparse_mode, 'Merged sketch should not be in sparse mode.') # Should change one bucket value given this random seed. self.assertEqual(sum(hll2.buckets == merged_hll.buckets), 16 - 1, 'Merged sketch is not correct.') self.assertSameElements(merged_hll.temp_set, set(), 'Temp set is not correct.') self.assertGreater(merged_hll.estimate_cardinality(), hll2.estimate_cardinality())