示例#1
0
 def testMaintainsOrder(self):
     b = reservoir._ReservoirBucket(100)
     for i in xrange(10000):
         b.AddItem(i)
     items = b.Items()
     prev = -1
     for item in items:
         self.assertTrue(item > prev)
         prev = item
示例#2
0
 def testMaintainsOrder(self):
   b = reservoir._ReservoirBucket(100)
   for i in xrange(10000):
     b.AddItem(i)
   items = b.Items()
   prev = -1
   for item in items:
     self.assertTrue(item > prev)
     prev = item
示例#3
0
 def testRemovesItems(self):
     b = reservoir._ReservoirBucket(100)
     for i in xrange(10):
         b.AddItem(i)
     self.assertEqual(len(b.Items()), 10)
     self.assertEqual(b._num_items_seen, 10)
     self.assertEqual(b.FilterItems(lambda x: x <= 7), 2)
     self.assertEqual(len(b.Items()), 8)
     self.assertEqual(b._num_items_seen, 8)
示例#4
0
 def testRemovesItems(self):
   b = reservoir._ReservoirBucket(100)
   for i in xrange(10):
     b.AddItem(i)
   self.assertEqual(len(b.Items()), 10)
   self.assertEqual(b._num_items_seen, 10)
   self.assertEqual(b.FilterItems(lambda x: x <= 7), 2)
   self.assertEqual(len(b.Items()), 8)
   self.assertEqual(b._num_items_seen, 8)
示例#5
0
  def testRemovesItemsWhenItemsAreReplaced(self):
    b = reservoir._ReservoirBucket(100)
    for i in xrange(10000):
      b.AddItem(i)
    self.assertEqual(b._num_items_seen, 10000)

    # Remove items
    num_removed = b.FilterItems(lambda x: x <= 7)
    self.assertGreater(num_removed, 92)
    self.assertEqual([], [item for item in b.Items() if item > 7])
    self.assertEqual(b._num_items_seen,
                     int(round(10000 * (1 - float(num_removed) / 100))))
示例#6
0
    def testRemovesItemsWhenItemsAreReplaced(self):
        b = reservoir._ReservoirBucket(100)
        for i in xrange(10000):
            b.AddItem(i)
        self.assertEqual(b._num_items_seen, 10000)

        # Remove items
        num_removed = b.FilterItems(lambda x: x <= 7)
        self.assertGreater(num_removed, 92)
        self.assertEqual([], [item for item in b.Items() if item > 7])
        self.assertEqual(b._num_items_seen,
                         int(round(10000 * (1 - float(num_removed) / 100))))
示例#7
0
    def testLazyFunctionEvaluationAndAlwaysKeepLast(self):
        class FakeRandom(object):
            def randint(self, a, b):  # pylint:disable=unused-argument
                return 999

        class Incrementer(object):
            def __init__(self):
                self.n = 0

            def increment_and_double(self, x):
                self.n += 1
                return x * 2

        # We've mocked the randomness generator, so that once it is full, the last
        # item will never get durable reservoir inclusion. Since always_keep_last is
        # false, the function should only get invoked 100 times while filling up
        # the reservoir. This laziness property is an essential performance
        # optimization.
        b = reservoir._ReservoirBucket(100,
                                       FakeRandom(),
                                       always_keep_last=False)
        incrementer = Incrementer()
        for i in range(1000):
            b.AddItem(i, incrementer.increment_and_double)
        self.assertEqual(incrementer.n, 100)
        self.assertEqual(b.Items(), [x * 2 for x in range(100)])

        # This time, we will always keep the last item, meaning that the function
        # should get invoked once for every item we add.
        b = reservoir._ReservoirBucket(100,
                                       FakeRandom(),
                                       always_keep_last=True)
        incrementer = Incrementer()

        for i in range(1000):
            b.AddItem(i, incrementer.increment_and_double)
        self.assertEqual(incrementer.n, 1000)
        self.assertEqual(b.Items(), [x * 2 for x in range(99)] + [999 * 2])
示例#8
0
  def testLazyFunctionEvaluationAndAlwaysKeepLast(self):

    class FakeRandom(object):

      def randint(self, a, b):  # pylint:disable=unused-argument
        return 999

    class Incrementer(object):

      def __init__(self):
        self.n = 0

      def increment_and_double(self, x):
        self.n += 1
        return x * 2

    # We've mocked the randomness generator, so that once it is full, the last
    # item will never get durable reservoir inclusion. Since always_keep_last is
    # false, the function should only get invoked 100 times while filling up
    # the reservoir. This laziness property is an essential performance
    # optimization.
    b = reservoir._ReservoirBucket(100, FakeRandom(), always_keep_last=False)
    incrementer = Incrementer()
    for i in xrange(1000):
      b.AddItem(i, incrementer.increment_and_double)
    self.assertEqual(incrementer.n, 100)
    self.assertEqual(b.Items(), [x * 2 for x in xrange(100)])

    # This time, we will always keep the last item, meaning that the function
    # should get invoked once for every item we add.
    b = reservoir._ReservoirBucket(100, FakeRandom(), always_keep_last=True)
    incrementer = Incrementer()

    for i in xrange(1000):
      b.AddItem(i, incrementer.increment_and_double)
    self.assertEqual(incrementer.n, 1000)
    self.assertEqual(b.Items(), [x * 2 for x in xrange(99)] + [999 * 2])
示例#9
0
    def testBucketReservoirSamplingViaStatisticalProperties(self):
        # Not related to a 'ReservoirBucket', but instead number of buckets we put
        # samples into for testing the shape of the distribution
        b = reservoir._ReservoirBucket(_max_size=self.samples)
        # add one extra item because we always keep the most recent item, which
        # would skew the distribution; we can just slice it off the end instead.
        for i in xrange(self.total + 1):
            b.AddItem(i)

        divbins = [0] * self.n_buckets
        modbins = [0] * self.n_buckets
        # Slice off the last item when we iterate.
        for item in b.Items()[0:-1]:
            divbins[item // self.total_per_bucket] += 1
            modbins[item % self.n_buckets] += 1

        for bucket_index in xrange(self.n_buckets):
            divbin = divbins[bucket_index]
            modbin = modbins[bucket_index]
            self.AssertBinomialQuantity(divbin)
            self.AssertBinomialQuantity(modbin)
示例#10
0
  def testBucketReservoirSamplingViaStatisticalProperties(self):
    # Not related to a 'ReservoirBucket', but instead number of buckets we put
    # samples into for testing the shape of the distribution
    b = reservoir._ReservoirBucket(_max_size=self.samples)
    # add one extra item because we always keep the most recent item, which
    # would skew the distribution; we can just slice it off the end instead.
    for i in xrange(self.total + 1):
      b.AddItem(i)

    divbins = [0] * self.n_buckets
    modbins = [0] * self.n_buckets
    # Slice off the last item when we iterate.
    for item in b.Items()[0:-1]:
      divbins[item // self.total_per_bucket] += 1
      modbins[item % self.n_buckets] += 1

    for bucket_index in xrange(self.n_buckets):
      divbin = divbins[bucket_index]
      modbin = modbins[bucket_index]
      self.AssertBinomialQuantity(divbin)
      self.AssertBinomialQuantity(modbin)
示例#11
0
 def testEmptyBucket(self):
   b = reservoir._ReservoirBucket(1)
   self.assertFalse(b.Items())
示例#12
0
 def testDoesntOverfill(self):
   b = reservoir._ReservoirBucket(10)
   for i in xrange(1000):
     b.AddItem(i)
   self.assertEqual(len(b.Items()), 10)
   self.assertEqual(b._num_items_seen, 1000)
示例#13
0
 def testSizeRequirement(self):
     with self.assertRaises(ValueError):
         reservoir._ReservoirBucket(-1)
     with self.assertRaises(ValueError):
         reservoir._ReservoirBucket(10.3)
示例#14
0
 def testSizeZeroBucket(self):
     b = reservoir._ReservoirBucket(0)
     for i in xrange(20):
         b.AddItem(i)
         self.assertEqual(b.Items(), list(range(i + 1)))
     self.assertEqual(b._num_items_seen, 20)
示例#15
0
 def testSizeOneBucket(self):
     b = reservoir._ReservoirBucket(1)
     for i in xrange(20):
         b.AddItem(i)
         self.assertEqual(b.Items(), [i])
     self.assertEqual(b._num_items_seen, 20)
示例#16
0
 def testKeepsLatestItem(self):
     b = reservoir._ReservoirBucket(5)
     for i in xrange(100):
         b.AddItem(i)
         last = b.Items()[-1]
         self.assertEqual(last, i)
示例#17
0
 def testKeepsLatestItem(self):
   b = reservoir._ReservoirBucket(5)
   for i in xrange(100):
     b.AddItem(i)
     last = b.Items()[-1]
     self.assertEqual(last, i)
示例#18
0
 def testDoesntOverfill(self):
     b = reservoir._ReservoirBucket(10)
     for i in xrange(1000):
         b.AddItem(i)
     self.assertEqual(len(b.Items()), 10)
     self.assertEqual(b._num_items_seen, 1000)
示例#19
0
 def testFillToSize(self):
     b = reservoir._ReservoirBucket(100)
     for i in xrange(100):
         b.AddItem(i)
     self.assertEqual(b.Items(), list(xrange(100)))
     self.assertEqual(b._num_items_seen, 100)
示例#20
0
 def testEmptyBucket(self):
     b = reservoir._ReservoirBucket(1)
     self.assertFalse(b.Items())
示例#21
0
 def testSizeOneBucket(self):
   b = reservoir._ReservoirBucket(1)
   for i in xrange(20):
     b.AddItem(i)
     self.assertEqual(b.Items(), [i])
   self.assertEqual(b._num_items_seen, 20)
示例#22
0
 def testFillToSize(self):
   b = reservoir._ReservoirBucket(100)
   for i in xrange(100):
     b.AddItem(i)
   self.assertEqual(b.Items(), list(xrange(100)))
   self.assertEqual(b._num_items_seen, 100)
示例#23
0
 def testSizeZeroBucket(self):
   b = reservoir._ReservoirBucket(0)
   for i in xrange(20):
     b.AddItem(i)
     self.assertEqual(b.Items(), list(range(i + 1)))
   self.assertEqual(b._num_items_seen, 20)
示例#24
0
 def testSizeRequirement(self):
   with self.assertRaises(ValueError):
     reservoir._ReservoirBucket(-1)
   with self.assertRaises(ValueError):
     reservoir._ReservoirBucket(10.3)