def _hll_merge(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
def _hll_merge(v): hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
def _hll_init_agg(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: if isinstance(x, (bytes, bytearray)): hll.set_registers(bytearray(x)) hll_res.merge(hll) elif x is not None: hll_res.add(str(x)) return hll_res.registers()
def _stats_from_json(json: Dict[str, Any]) -> Dict[str, Any]: stats = {} if 'messages_sent' in json: stats['messages_sent'] = json['messages_sent'] if 'messages_received' in json: stats['messages_received'] = json['messages_received'] if 'users_active' in json: hll = HyperLogLog(12) hll.set_registers(bytearray(json['users_active'])) stats['users_active'] = hll return stats
class TestRegisterFunctions(unittest.TestCase): def setUp(self): self.k = 5 self.hll = HyperLogLog(5) def test_set_last_register(self): self.hll.set_register(self.k - 1, 1) self.assertTrue(self.hll.registers()[self.k - 1] == 1) def test_set_first_register(self): self.hll.set_register(0, 1) self.assertTrue(self.hll.registers()[0] == 1) def test_set_register_with_negative_value_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_set_register_with_greater_than_max_rank_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, 33) def test_set_register_with_index_out_of_bounds(self): with self.assertRaises(IndexError): self.hll.set_register(32, 1) def test_set_register_with_negative_index_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_bytesarray_has_correct_values(self): expected = bytearray(randint(0, 16) for x in range(32)) for i in range(32): self.hll.set_register(i, expected[i]) registers = self.hll.registers() self.assertEqual(expected, registers) def test_registers_returns_bytesarray(self): self.assertTrue(type(self.hll.registers()) is bytearray) def test_bytesarray_has_correct_length(self): self.assertTrue(len(self.hll.registers()) == pow(2, self.k)) def test_set_registers(self): expected = bytearray(randint(0, 16) for x in range(32)) self.hll.set_registers(expected) registers=self.hll.registers() self.assertEqual(expected, registers)
class TestRegisterFunctions(unittest.TestCase): def setUp(self): self.k = 5 self.hll = HyperLogLog(5) def test_set_last_register(self): self.hll.set_register(self.k - 1, 1) self.assertTrue(self.hll.registers()[self.k - 1] == 1) def test_set_first_register(self): self.hll.set_register(0, 1) self.assertTrue(self.hll.registers()[0] == 1) def test_set_register_with_negative_value_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_set_register_with_greater_than_max_rank_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, 33) def test_set_register_with_index_out_of_bounds(self): with self.assertRaises(IndexError): self.hll.set_register(32, 1) def test_set_register_with_negative_index_fails(self): with self.assertRaises(ValueError): self.hll.set_register(0, -1) def test_bytesarray_has_correct_values(self): expected = bytearray(randint(0, 16) for x in range(32)) for i in range(32): self.hll.set_register(i, expected[i]) registers = self.hll.registers() self.assertEqual(expected, registers) def test_registers_returns_bytesarray(self): self.assertTrue(type(self.hll.registers()) is bytearray) def test_bytesarray_has_correct_length(self): self.assertTrue(len(self.hll.registers()) == pow(2, self.k)) def test_set_registers(self): expected = bytearray(randint(0, 16) for x in range(32)) self.hll.set_registers(expected) registers = self.hll.registers() self.assertEqual(expected, registers)