def test_insert_same(self):
    hll = HyperLogLogPlusPlus(random_seed=42)

    hll.add(1)
    card_one = hll.estimate_cardinality()
    hll.add(1)

    self.assertEqual(card_one, hll.estimate_cardinality())
  def insertion_test_helper(self, number_to_insert, acceptable_error=.05):
    hll = HyperLogLogPlusPlus(random_seed=137)

    for i in range(number_to_insert):
      hll.add(i)

    error_ratio = hll.estimate_cardinality() / number_to_insert
    self.assertAlmostEqual(error_ratio, 1.0, delta=acceptable_error)
    def test_simple_estimate_smaller(self):
        hll = HyperLogLogPlusPlus(length=self.vector_length,
                                  random_seed=42,
                                  num_integer_bits=self.num_integer_bits)

        one_vector = np.ones(self.vector_length)
        hll.buckets = one_vector
        alpha_16 = 0.673
        hll_should_estimate = alpha_16 * self.vector_length**2 * 2 / self.vector_length

        self.assertEqual(alpha_16, hll.alpha)
        self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)
    def test_simple_estimate_larger(self):
        m = 2**14
        hll = HyperLogLogPlusPlus(length=m,
                                  random_seed=42,
                                  num_integer_bits=self.num_integer_bits)

        thirty_vector = 30 * np.ones(m)
        hll.buckets = thirty_vector
        alpha_m = 0.7213 / (1 + 1.079 / m)
        hll_should_estimate = alpha_m * m**2 * 2**30 / m

        self.assertEqual(alpha_m, hll.alpha)
        self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)
  def test_merge_sparse_with_dense(self):
    hll1 = HyperLogLogPlusPlus(length=16, random_seed=234)
    hll1.add(100)
    hll2 = HyperLogLogPlusPlus(length=16, random_seed=234)
    for i in range(16 * 6 + 1):
      hll2.add(i)

    merged_hll = hll1.merge(hll2)
    self.assertFalse(merged_hll.sparse_mode,
                     'Merged sketch should not be in sparse mode.')
    # Should change one bucket value given this random seed.
    self.assertEqual(sum(hll2.buckets == merged_hll.buckets), 16 - 1,
                     'Merged sketch is not correct.')
    self.assertSameElements(merged_hll.temp_set, set(),
                            'Temp set is not correct.')
    self.assertGreater(merged_hll.estimate_cardinality(),
                       hll2.estimate_cardinality())