def _hll_merge(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
def _hll_merge(v): hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
class TestRegisterFunctions(unittest.TestCase): def setUp(self): self.k = 5 self.hll = HyperLogLog(5) def test_set_last_register(self): self.hll.set_register(self.k - 1, 1) self.assertTrue(self.hll.registers()[self.k - 1] == 1) def test_set_first_register(self): self.hll.set_register(0, 1) self.assertTrue(self.hll.registers()[0] == 1) def test_set_register_with_negative_value_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_set_register_with_greater_than_max_rank_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, 33) def test_set_register_with_index_out_of_bounds(self): with self.assertRaises(IndexError): self.hll.set_register(32, 1) def test_set_register_with_negative_index_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_bytesarray_has_correct_values(self): expected = bytearray(32) for i in range(31): expected[i] = randint(0, 16) for i in range(31): self.hll.set_register(i, expected[i]) registers = self.hll.registers() for i in range(31): self.assertEqual(expected[i], registers[i]) def test_registers_returns_bytesarray(self): self.assertTrue(type(self.hll.registers()) is bytearray) def test_bytesarray_has_correct_length(self): self.assertTrue(len(self.hll.registers()) == pow(2, self.k))
def _hll_init(v): hll = HyperLogLog(k) zero = hll.registers() def regs(x): hll.set_registers(zero); if x is not None: hll.add(str(x)); return hll.registers() return v.apply(lambda x: regs(x))
def _hll_init_agg(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: if isinstance(x, (bytes, bytearray)): hll.set_registers(bytearray(x)) hll_res.merge(hll) elif x is not None: hll_res.add(str(x)) return hll_res.registers()
def test_merge(self): expected = bytearray(4) expected[0] = 1 expected[3] = 1 hll = HyperLogLog(2) hll2 = HyperLogLog(2) hll.set_register(0, 1) hll2.set_register(3, 1) hll.merge(hll2) self.assertEqual(hll.registers(), expected)
class TestRegisterFunctions(unittest.TestCase): def setUp(self): self.k = 5 self.hll = HyperLogLog(5) def test_set_last_register(self): self.hll.set_register(self.k - 1, 1) self.assertTrue(self.hll.registers()[self.k - 1] == 1) def test_set_first_register(self): self.hll.set_register(0, 1) self.assertTrue(self.hll.registers()[0] == 1) def test_set_register_with_negative_value_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_set_register_with_greater_than_max_rank_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, 33) def test_set_register_with_index_out_of_bounds(self): with self.assertRaises(IndexError): self.hll.set_register(32, 1) def test_set_register_with_negative_index_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_bytesarray_has_correct_values(self): expected = bytearray(randint(0, 16) for x in range(32)) for i in range(32): self.hll.set_register(i, expected[i]) registers = self.hll.registers() self.assertEqual(expected, registers) def test_registers_returns_bytesarray(self): self.assertTrue(type(self.hll.registers()) is bytearray) def test_bytesarray_has_correct_length(self): self.assertTrue(len(self.hll.registers()) == pow(2, self.k)) def test_set_registers(self): expected = bytearray(randint(0, 16) for x in range(32)) self.hll.set_registers(expected) registers = self.hll.registers() self.assertEqual(expected, registers)
def test_k_param_determines_the_number_of_registers(self): hll = HyperLogLog(5) self.assertEqual(len(hll.registers()), 32) self.assertEqual(hll.size(), 32)
def test_all_registers_initialized_to_zero(self): hll = HyperLogLog(5) registers = hll.registers() for register in registers: self.assertEqual(register, 0)