Exemplo n.º 1
0
    def test_sample_matrix_noisy(self):
        s1 = liquid_legions.LiquidLegions(2.0, 100000, random_seed=42)
        s2 = s1.get_compatible_sketch()

        s1.add_ids(list(range(30000)))
        s2.add_ids(list(range(20000, 40000)))

        dp_p = 0.25
        noiser = liquid_legions.Noiser(dp_p)
        s1, s2 = list(map(noiser, [s1, s2]))

        e = liquid_legions.VennEstimator([s1, s2])
        logging.info('Venn: %s', e.estimate_from_all())
        sampler = liquid_legions.Sampler([s1, s2])
        posteriors = sampler.get_all_posteriors()
        logging.info('Row sums: %s', posteriors.sum(axis=1))

        sample = sampler.sample_matrix()
        logging.info('Sample shape: %s', sample.shape)
        counts = sample.sum(axis=0)
        logging.info('Bit count expectations: %s', counts)
        s1_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[0])
        s2_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[1])
        logging.info('s1 cardinality: %s', s1_cardinality)
        logging.info('s2 cardinality: %s', s2_cardinality)
        self.assertAlmostEqual(s1_cardinality, 30000, delta=1500)
        self.assertAlmostEqual(s2_cardinality, 20000, delta=1000)
Exemplo n.º 2
0
 def test_venn_estimator_pure(self):
     chain = self.make_chain(2, 5, 100000, 50000)
     e = liquid_legions.VennEstimator(chain)
     self.assertAlmostEqual(e.estimate_from_all()[1], 50000, delta=2500)
     self.assertAlmostEqual(e.estimate_from_all()[2], 50000, delta=2500)
     self.assertAlmostEqual(e.estimate_from_all()[3], 50000, delta=2500)
     logging.info('Venn: %s', e.estimate_from_all())
Exemplo n.º 3
0
    def test_posteriors_noisy(self):
        s1 = liquid_legions.LiquidLegions(2.0, 100000, random_seed=42)
        s2 = s1.get_compatible_sketch()

        s1.add_ids(list(range(30000)))
        s2.add_ids(list(range(20000, 40000)))

        s1 = liquid_legions.Noiser(0.3)(s1)
        s2 = liquid_legions.Noiser(0.2)(s2)

        e = liquid_legions.VennEstimator([s1, s2])
        logging.info('Venn: %s', e.estimate_from_all())
        sampler = liquid_legions.Sampler([s1, s2])
        posteriors = sampler.get_all_posteriors()
        logging.info('Row sums: %s', posteriors.sum(axis=1))

        counts = posteriors.sum(axis=0)
        logging.info('Bit count expectations: %s', counts)
        s1_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[1] + counts[3])
        logging.info('s1 cardinality: %.3f', s1_cardinality)
        s2_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[2] + counts[3])
        logging.info('s2 cardinality: %.3f', s2_cardinality)
        union_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[1] + counts[2] + counts[3])
        logging.info('s1 | s2 cardinality: %.3f', union_cardinality)
        self.assertAlmostEqual(s1_cardinality, 30000, delta=1500)
        self.assertAlmostEqual(s2_cardinality, 20000, delta=1000)
        self.assertAlmostEqual(union_cardinality, 40000, delta=2000)
Exemplo n.º 4
0
 def test_venn_estimator_noised(self):
     chain = self.make_chain(2, 5, 100000, 50000)
     dp_p = 0.25
     noiser = liquid_legions.Noiser(dp_p)
     noised_chain = list(map(noiser, chain))
     e = liquid_legions.VennEstimator(noised_chain)
     self.assertAlmostEqual(e.estimate_from_all()[1], 50000, delta=6000)
     self.assertAlmostEqual(e.estimate_from_all()[2], 50000, delta=6000)
     self.assertAlmostEqual(e.estimate_from_all()[3], 50000, delta=6000)
     logging.info('Venn: %s', e.estimate_from_all())
Exemplo n.º 5
0
    def test_venn_priors_two_tiny(self):
        s1 = liquid_legions.LiquidLegions(20.0, 10, random_seed=42)
        s2 = s1.get_compatible_sketch()

        s1.add_ids(list(range(10)))
        s2.add_ids(list(range(10000)))

        e = liquid_legions.VennEstimator([s1, s2])
        logging.info('Venn: %s', e.estimate_from_all())
        sampler = liquid_legions.Sampler([s1, s2])
        priors = sampler.get_all_venn_priors()
        logging.info('Priors: %s', priors)
        logging.info('Row sums: %s', priors.sum(axis=1))
        self.assertAlmostEqual(priors[0][3], 1.0)
        self.assertAlmostEqual(priors[4][2], 0.95, delta=0.1)
        self.assertAlmostEqual(priors[9][0], 0.95, delta=0.1)
Exemplo n.º 6
0
    def test_posteriors_tiny_pure(self):
        s1 = liquid_legions.LiquidLegions(20.0, 10, random_seed=42)
        s2 = s1.get_compatible_sketch()

        s1.add_ids(list(range(10)))
        s2.add_ids(list(range(10000)))

        e = liquid_legions.VennEstimator([s1, s2])
        logging.info('Venn: %s', e.estimate_from_all())
        sampler = liquid_legions.Sampler([s1, s2])
        posteriors = sampler.get_all_posteriors()
        logging.info('Posteriors: %s', posteriors)
        logging.info('Row sums: %s', posteriors.sum(axis=1))
        self.assertAlmostEqual(posteriors[0, 3], 1.0)
        self.assertAlmostEqual(posteriors[3, 2], 1.0)
        for x in posteriors.sum(axis=1):
            self.assertAlmostEqual(x, 1.0)
Exemplo n.º 7
0
    def test_venn_priors_two(self):
        s1 = liquid_legions.LiquidLegions(20.0, 10000, random_seed=42)
        s2 = s1.get_compatible_sketch()

        s1.add_ids(list(range(1000)))
        s2.add_ids(list(range(20000)))

        e = liquid_legions.VennEstimator([s1, s2])
        logging.info('Venn: %s', e.estimate_from_all())
        sampler = liquid_legions.Sampler([s1, s2])
        priors = sampler.get_all_venn_priors()
        logging.info('Priors: %s', priors)
        counts = priors.sum(axis=0)
        logging.info('Counts: %s', counts)
        s1_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[1] + counts[3])
        s2_cardinality = sampler.sketch.get_cardinality_for_legionaries_count(
            counts[2] + counts[3])
        logging.info('Cardinalities: %.3f, %.3f', s1_cardinality,
                     s2_cardinality)
        self.assertAlmostEqual(s1_cardinality, 1000, delta=100)
        self.assertAlmostEqual(s2_cardinality, 20000, delta=2000)
Exemplo n.º 8
0
    def test_manual_sequential_merge_large_overlap_pure(self):
        noiser = liquid_legions.Noiser(0.0)  # No noise.

        s = liquid_legions.LiquidLegions(10.0, 50000, random_seed=42)

        true_set = set()
        for i in range(10):
            a = s.get_compatible_sketch()
            new_set = range(i * 1000, i * 1000 + 10000)
            a.add_ids(new_set)
            true_set = true_set | set(new_set)
            noised_a = noiser(a)
            sampler = liquid_legions.Sampler([s, noised_a])
            _, sampled_a = sampler.sample()
            venn_estimator = liquid_legions.VennEstimator([s, noised_a])
            logging.info('Venn: %s', venn_estimator())

            s.merge_in(sampled_a)
            logging.info('Step %d, cardinality %.3f, true cardinality: %d', i,
                         s.get_cardinality(), len(true_set))
        self.assertAlmostEqual(len(true_set),
                               s.get_cardinality(),
                               delta=len(true_set) * 0.1)
Exemplo n.º 9
0
 def test_venn_estimator_pure_single(self):
     s = liquid_legions.LiquidLegions(5, 100000, random_seed=42)
     s.add_ids(range(2000))
     e = liquid_legions.VennEstimator([s])
     self.assertAlmostEqual(e.estimate_from_all()[1], 2000, delta=50)
     logging.info('Venn: %s', e.estimate_from_all())