def test_quantile(): h = distogram.Distogram(bin_count=3) h = distogram.update(h, 16, count=4) h = distogram.update(h, 23, count=3) h = distogram.update(h, 28, count=5) assert distogram.quantile(h, 0.5) == approx(23.625)
def test_count_at_not_enough_elements(): h = distogram.Distogram() h = distogram.update(h, 1) h = distogram.update(h, 2) h = distogram.update(h, 3) assert distogram.count_at(h, 2.5) == 2
def test_count(): h = distogram.Distogram(bin_count=3) assert distogram.count(h) == 0 h = distogram.update(h, 16, count=4) assert distogram.count(h) == 4 h = distogram.update(h, 23, count=3) assert distogram.count(h) == 7 h = distogram.update(h, 28, count=5) assert distogram.count(h) == 12
def test_count_at(): h = distogram.Distogram(bin_count=3) print(h) # fill histogram h = distogram.update(h, 16, count=4) h = distogram.update(h, 23, count=3) h = distogram.update(h, 28, count=5) print(h) actual_result = distogram.count_at(h, 25) assert actual_result == approx(6.859999999)
def test_count_at_right(): h = distogram.Distogram(bin_count=6) for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]: h = distogram.update(h, i) assert distogram.count_at(h, 6.5) == approx(7.307692307692308)
def test_quantile_not_enough_elemnts(): h = distogram.Distogram(bin_count=10) for i in [12.3, 5.4, 8.2, 100.53, 23.5, 13.98]: h = distogram.update(h, i) assert distogram.quantile(h, 0.5) == approx(13.14)
def test_count_at_left(): h = distogram.Distogram(bin_count=6) for i in [1, 2, 3, 4, 5, 6, 0.7, 1.1]: h = distogram.update(h, i) assert distogram.count_at(h, 0.77) == approx(0.14)
def test_quantile_out_of_bouns(): h = distogram.Distogram() for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]: h = distogram.update(h, i) assert distogram.quantile(h, -0.2) is None assert distogram.quantile(h, 10) is None
def test_count_at_out_of_bouns(): h = distogram.Distogram() for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]: h = distogram.update(h, i) assert distogram.count_at(h, 0.2) is None assert distogram.count_at(h, 10) is None
def test_update(): h = distogram.Distogram(bin_count=3) # fill histogram h = distogram.update(h, 23) assert h.bins == [(23, 1)] h = distogram.update(h, 28) assert h.bins == [(23, 1), (28, 1)] h = distogram.update(h, 16) assert h.bins == [(16, 1), (23, 1), (28, 1)] # update count on existing value h = distogram.update(h, 23) assert h.bins == [(16, 1), (23, 2), (28, 1)] h = distogram.update(h, 28) assert h.bins == [(16, 1), (23, 2), (28, 2)] h = distogram.update(h, 16) assert h.bins == [(16, 2), (23, 2), (28, 2)] # merge values h = distogram.update(h, 26) assert h.bins[0] == (16, 2) assert h.bins[1] == (23, 2) assert h.bins[2][0] == approx(27.33333) assert h.bins[2][1] == 3
def test_count_at_normal(): points = 10000 normal = [random.normalvariate(0.0, 1.0) for _ in range(points)] h = distogram.Distogram() for i in normal: h = distogram.update(h, i) assert distogram.count_at(h, 0) == approx(points/2, rel=0.05)
def test_bounds(): normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)] h = distogram.Distogram() for i in normal: h = distogram.update(h, i) dmin, dmax = distogram.bounds(h) assert dmin == min(normal) assert dmax == max(normal)
def test_stats(): normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)] h = distogram.Distogram() for i in normal: h = distogram.update(h, i) assert distogram.mean(h) == approx(np.mean(normal), abs=0.1) assert distogram.variance(h) == approx(np.var(normal), abs=0.1) assert distogram.stddev(h) == approx(np.std(normal), abs=0.1)
def test_quantile_on_right(): h = distogram.Distogram(bin_count=6) data = [12.3, 8.2, 100.53, 23.5, 13.98, 200, 200.2, 200.8, 200.4, 200.1] for i in data: h = distogram.update(h, i) assert distogram.quantile(h, 0.99) == approx(np.quantile(data, 0.99), rel=0.01) assert distogram.quantile(h, 0.85) == approx(np.quantile(data, 0.85), rel=0.01)
def test_normal(): # normal = np.random.normal(0,1, 1000) normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)] h = distogram.Distogram(bin_count=64) for i in normal: h = distogram.update(h, i) assert distogram.quantile(h, 0.5) == approx(np.quantile(normal, 0.5), abs=0.2) assert distogram.quantile(h, 0.95) == approx(np.quantile(normal, 0.95), abs=0.2)
def test_quantile_on_left(): h = distogram.Distogram(bin_count=6) data = [12.3, 5.2, 5.4, 4.9, 5.5, 5.6, 8.2, 30.53, 23.5, 13.98] for i in data: h = distogram.update(h, i) assert distogram.quantile(h, 0.01) == approx(np.quantile(data, 0.01), rel=0.01) assert distogram.quantile(h, 0.05) == approx(np.quantile(data, 0.05), rel=0.05) assert distogram.quantile(h, 0.25) == approx(np.quantile(data, 0.25), rel=0.05)
import numpy as np import distogram print("generating distribution...") utterance_count = 10000000 distribution = np.random.normal(size=utterance_count) #distribution = np.random.uniform(size=utterance_count) if len(sys.argv) >= 2 and sys.argv[1] == '--enable-np': print('using numpy types') else: print('using python types') distribution = distribution.tolist() print("distribution generated") print("running update bench (5 times)...") total_time = 0 for i in range(5): start_time = time.time() h = distogram.Distogram() for i in distribution: h = distogram.update(h, i) end_time = time.time() total_time += end_time - start_time total_time /= 5 print("ran update bench in {} seconds (mean of 5 runs)".format(total_time)) print("req/s: {}".format(utterance_count // total_time))