Exemplo n.º 1
0
    def test_add(self):
        s = SlidingHyperLogLog(0.05, 100)

        for i in range(10):
            s.add(i, str(i))

        M = [(i, max(R for ts, R in lpfm)) for i, lpfm in enumerate(s.LPFM) if lpfm]
        self.assertEqual(M, [(1, 1), (41, 1), (44, 1), (76, 3), (103, 4), (182, 1), (442, 2), (464, 5), (497, 1), (506, 1)])
Exemplo n.º 2
0
    def test_add(self):
        s = SlidingHyperLogLog(0.05, 100)

        for i in range(10):
            s.add(i, str(i))

        M = [(i, max(R for ts, R in lpfm)) for i, lpfm in enumerate(s.LPFM)
             if lpfm]
        self.assertEqual(M, [(1, 1), (41, 1), (44, 1), (76, 3), (103, 4),
                             (182, 1), (442, 2), (464, 5), (497, 1), (506, 1)])
Exemplo n.º 3
0
    def test_pickle(self):
        a = SlidingHyperLogLog(0.05, 100)
        for i in xrange(10000):
            a.add(i, str('k1-%d' % i))

        b = pickle.loads(pickle.dumps(a))
        self.assertEqual(a.window, b.window)
        self.assertEqual(a.alpha, b.alpha)
        self.assertEqual(a.p, b.p)
        self.assertEqual(a.m, b.m)
        self.assertEqual(a.LPFM, b.LPFM)
Exemplo n.º 4
0
 def test_calc_cardinality_sliding1(self):
     a = SlidingHyperLogLog(0.05, 100)
     a.add(1, 'k1')
     self.assertEqual(int(a.card(1)), 1)
     self.assertEqual(int(a.card(101)), 1)
     self.assertEqual(int(a.card(102)), 0)
     a.add(2, 'k2')
     a.add(3, 'k3')
     self.assertEqual(int(a.card(3)), 3)
     self.assertEqual(int(a.card(101)), 3)
     self.assertEqual(int(a.card(102)), 2)
     self.assertEqual(int(a.card(103)), 1)
     self.assertEqual(int(a.card(104)), 0)
Exemplo n.º 5
0
 def test_init(self):
     s = SlidingHyperLogLog(0.05, 100)
     self.assertEqual(s.window, 100)
     self.assertEqual(s.p, 9)
     self.assertEqual(s.alpha, 0.7197831133217303)
     self.assertEqual(s.m, 512)
     self.assertEqual(len(s.LPFM), 512)
Exemplo n.º 6
0
    def test_calc_cardinality(self):
        clist = [1, 5, 10, 30, 60, 200, 1000, 10000, 60000]
        n = 30
        rel_err = 0.05

        for card in clist:
            s = 0.0
            for c in xrange(n):
                a = SlidingHyperLogLog(rel_err, 100)

                for i in xrange(card):
                    a.add(int(time.time()), os.urandom(20))

                s += a.card(int(time.time()))

            z = (float(s) / n - card) / (rel_err * card / math.sqrt(n))
            self.assertLess(-1.96, z)
            self.assertGreater(1.96, z)
Exemplo n.º 7
0
    def test_calc_cardinality(self):
        clist = [1, 5, 10, 30, 60, 200, 1000, 10000, 60000]
        n = 30
        rel_err = 0.05

        for card in clist:
            s = 0.0
            for c in xrange(n):
                a = SlidingHyperLogLog(rel_err, 100)

                for i in xrange(card):
                    a.add(int(time.time()), os.urandom(20))

                s += a.card(int(time.time()))

            z = (float(s) / n - card) / (rel_err * card / math.sqrt(n))
            self.assertLess(-1.96, z)
            self.assertGreater(1.96, z)
Exemplo n.º 8
0
    def test_calc_cardinality_sliding3(self):
        clist = [30, 60, 200, 1000, 10000, 60000]
        rel_err = 0.05
        t1 = 0
        t2 = 0
        for card in clist:
            a = SlidingHyperLogLog(rel_err, card)

            for i in xrange(card):
                a.add(i, os.urandom(20))

            ts = time.time()
            l1 = [a.card(1.5 * card, w / 10.0) for w in range(1, card + 1, card / 10)]
            t1 = (time.time() - ts)
            ts = time.time()
            l2 = a.card_wlist(1.5 * card, [ w / 10.0 for w in range(1, card + 1, card / 10)])
            t2 = (time.time() - ts)
            #print card, t1, t2
            self.assertEqual(l1, l2)
Exemplo n.º 9
0
    def test_from_list(self):
        s1 = SlidingHyperLogLog(0.05, 100)

        for i in range(10):
            s1.add(i, str(i))

        s2 = SlidingHyperLogLog.from_list(s1.LPFM, 100)
        self.assertEqual(s1, s2)
        self.assertEqual(s1.card(9), s2.card(9))
        self.assertEqual(
            s1.card_wlist(9, [100, 3, 5]),
            [s2.card(9, 100), s2.card(9, 3),
             s2.card(9, 5)])
Exemplo n.º 10
0
    def test_from_list(self):
        s1 = SlidingHyperLogLog(0.05, 100)

        for i in range(10):
            s1.add(i, str(i))

        s2 = SlidingHyperLogLog.from_list(s1.LPFM, 100)
        self.assertEqual(s1, s2)
        self.assertEqual(s1.card(9), s2.card(9))
        self.assertEqual(s1.card_wlist(9, [100, 3, 5]), [s2.card(9, 100), s2.card(9, 3), s2.card(9, 5)])
Exemplo n.º 11
0
    def test_calc_cardinality_sliding3(self):
        clist = [30, 60, 200, 1000, 10000, 60000]
        rel_err = 0.05
        t1 = 0
        t2 = 0
        for card in clist:
            a = SlidingHyperLogLog(rel_err, card)

            for i in xrange(card):
                a.add(i, os.urandom(20))

            ts = time.time()
            l1 = [
                a.card(1.5 * card, w / 10.0)
                for w in range(1, card + 1, card / 10)
            ]
            t1 = (time.time() - ts)
            ts = time.time()
            l2 = a.card_wlist(
                1.5 * card, [w / 10.0 for w in range(1, card + 1, card / 10)])
            t2 = (time.time() - ts)
            #print card, t1, t2
            self.assertEqual(l1, l2)
Exemplo n.º 12
0
 def test_calc_cardinality_sliding1(self):
     a = SlidingHyperLogLog(0.05, 100)
     a.add(1, 'k1')
     self.assertEqual(int(a.card(1)), 1)
     self.assertEqual(int(a.card(101)), 1)
     self.assertEqual(int(a.card(102)), 0)
     a.add(2, 'k2')
     a.add(3, 'k3')
     self.assertEqual(int(a.card(3)), 3)
     self.assertEqual(int(a.card(101)), 3)
     self.assertEqual(int(a.card(102)), 2)
     self.assertEqual(int(a.card(103)), 1)
     self.assertEqual(int(a.card(104)), 0)
Exemplo n.º 13
0
    def test_update_err(self):
        a = SlidingHyperLogLog(0.05, 100)
        b = SlidingHyperLogLog(0.01, 100)

        self.assertRaises(ValueError, a.update, b)
Exemplo n.º 14
0
    def test_update(self):
        a = SlidingHyperLogLog(0.05, 100)
        b = SlidingHyperLogLog(0.05, 100)
        c = SlidingHyperLogLog(0.05, 100)

        for i in xrange(10000):
            a.add(i, str('k1-%d' % i))
            c.add(i, str('k1-%d' % i))

        for i in xrange(10000):
            b.add(i, str('k2-%d' % i))
            c.add(i, str('k2-%d' % i))

        a.update(b)

        self.assertNotEqual(a, b)
        self.assertNotEqual(b, c)
        self.assertEqual(a, c)
Exemplo n.º 15
0
    def test_update(self):
        a = SlidingHyperLogLog(0.05, 100)
        b = SlidingHyperLogLog(0.05, 100)
        c = SlidingHyperLogLog(0.05, 100)

        for i in xrange(10000):
            a.add(i, str('k1-%d' % i))
            c.add(i, str('k1-%d' % i))

        for i in xrange(10000):
            b.add(i, str('k2-%d' % i))
            c.add(i, str('k2-%d' % i))

        a.update(b)

        self.assertNotEqual(a, b)
        self.assertNotEqual(b, c)
        self.assertEqual(a, c)