def test_init(): lc = LinearCounter(8000) assert lc.sizeof() == 1000, "Unexpected size in bytes" with pytest.raises(ValueError) as excinfo: lc = LinearCounter(0) assert str(excinfo.value) == 'Counter length can\'t be 0 or negative'
def test_count(): lc = LinearCounter(100000) errors = [] cardinality = 0 for i in range(100): cardinality += 1 element = "element_{}".format(i) lc.add(element) error = abs(cardinality - lc.count()) / float(cardinality) errors.append(error) avg_error = sum(errors) / float(len(errors)) assert avg_error >= 0 assert avg_error <= 0.1
def test_len(): lc = LinearCounter(8000) assert len(lc) == 8000 lc = LinearCounter(8001) assert len(lc) == 8008
def test_count(): lc = LinearCounter(100000) assert lc.count() == 0 lc.add("test") assert lc.count() == 1 lc.add("test") assert lc.count() == 1 lc.add("test2") assert lc.count() == 2 del lc lc = LinearCounter(100000) for word in LOREM_TEXT["text"].split(): lc.add(word) assert lc.count() == LOREM_TEXT["num_of_unique_words"]
def test_add(): lc = LinearCounter(8000) for word in ["test", 1, {"hello": "world"}]: lc.add(word)
def test_repr(): lc = LinearCounter(8000) assert repr(lc) == "<LinearCounter (length: 8000)>"
def test_count_small(): lc = LinearCounter(100000) assert lc.count() == 0 lc.add("test") assert lc.count() == 1 lc.add("test") assert lc.count() == 1 lc.add("test2") assert lc.count() == 2
if __name__ == "__main__": consumer = KafkaConsumer( 'sunday', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', value_deserializer=lambda x: json.loads(x.decode('utf-8'))) total_bytes = 0 consumer_start = time.time() msg_consumed_max = 1000000 msg_consumed_count = 0 # one minute window previous_window = 0 users_bitmap = LinearCounter(60000) for message in consumer: json_msg = message.value total_bytes = total_bytes + utf8len(json_msg) ts = json_msg['ts'] uid = json_msg['uid'] # convert ts(seconds) to minutes - using it as the 'minute window' current_window = int(ts / 60) if previous_window != current_window: # current minute window changed # print for the previous window the unique users count if previous_window > 0: print_minute_stats(previous_window, users_bitmap.count())