示例#1
0
def test_quantile():
    h = distogram.Distogram(bin_count=3)
    h = distogram.update(h, 16, count=4)
    h = distogram.update(h, 23, count=3)
    h = distogram.update(h, 28, count=5)

    assert distogram.quantile(h, 0.5) == approx(23.625)
示例#2
0
def test_count_at_not_enough_elements():
    h = distogram.Distogram()

    h = distogram.update(h, 1)
    h = distogram.update(h, 2)
    h = distogram.update(h, 3)

    assert distogram.count_at(h, 2.5) == 2
示例#3
0
def test_count():
    h = distogram.Distogram(bin_count=3)
    assert distogram.count(h) == 0

    h = distogram.update(h, 16, count=4)
    assert distogram.count(h) == 4
    h = distogram.update(h, 23, count=3)
    assert distogram.count(h) == 7
    h = distogram.update(h, 28, count=5)
    assert distogram.count(h) == 12
示例#4
0
def test_count_at():
    h = distogram.Distogram(bin_count=3)
    print(h)

    # fill histogram
    h = distogram.update(h, 16, count=4)
    h = distogram.update(h, 23, count=3)
    h = distogram.update(h, 28, count=5)
    print(h)

    actual_result = distogram.count_at(h, 25)
    assert actual_result == approx(6.859999999)
示例#5
0
def test_count_at_right():
    h = distogram.Distogram(bin_count=6)

    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
        h = distogram.update(h, i)

    assert distogram.count_at(h, 6.5) == approx(7.307692307692308)
示例#6
0
def test_quantile_not_enough_elemnts():
    h = distogram.Distogram(bin_count=10)

    for i in [12.3, 5.4, 8.2, 100.53, 23.5, 13.98]:
        h = distogram.update(h, i)

    assert distogram.quantile(h, 0.5) == approx(13.14)
示例#7
0
def test_count_at_left():
    h = distogram.Distogram(bin_count=6)

    for i in [1, 2, 3, 4, 5, 6, 0.7, 1.1]:
        h = distogram.update(h, i)

    assert distogram.count_at(h, 0.77) == approx(0.14)
示例#8
0
def test_quantile_out_of_bouns():
    h = distogram.Distogram()

    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
        h = distogram.update(h, i)

    assert distogram.quantile(h, -0.2) is None
    assert distogram.quantile(h, 10) is None
示例#9
0
def test_count_at_out_of_bouns():
    h = distogram.Distogram()

    for i in [1, 2, 3, 4, 5, 6, 6.7, 6.1]:
        h = distogram.update(h, i)

    assert distogram.count_at(h, 0.2) is None
    assert distogram.count_at(h, 10) is None
示例#10
0
def test_update():
    h = distogram.Distogram(bin_count=3)

    # fill histogram
    h = distogram.update(h, 23)
    assert h.bins == [(23, 1)]
    h = distogram.update(h, 28)
    assert h.bins == [(23, 1), (28, 1)]
    h = distogram.update(h, 16)
    assert h.bins == [(16, 1), (23, 1), (28, 1)]

    # update count on existing value
    h = distogram.update(h, 23)
    assert h.bins == [(16, 1), (23, 2), (28, 1)]
    h = distogram.update(h, 28)
    assert h.bins == [(16, 1), (23, 2), (28, 2)]
    h = distogram.update(h, 16)
    assert h.bins == [(16, 2), (23, 2), (28, 2)]

    # merge values
    h = distogram.update(h, 26)
    assert h.bins[0] == (16, 2)
    assert h.bins[1] == (23, 2)
    assert h.bins[2][0] == approx(27.33333)
    assert h.bins[2][1] == 3
示例#11
0
def test_count_at_normal():
    points = 10000
    normal = [random.normalvariate(0.0, 1.0) for _ in range(points)]
    h = distogram.Distogram()

    for i in normal:
        h = distogram.update(h, i)

    assert distogram.count_at(h, 0) == approx(points/2, rel=0.05)
示例#12
0
def test_bounds():
    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
    h = distogram.Distogram()

    for i in normal:
        h = distogram.update(h, i)

    dmin, dmax = distogram.bounds(h)
    assert dmin == min(normal)
    assert dmax == max(normal)
示例#13
0
def test_stats():
    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
    h = distogram.Distogram()

    for i in normal:
        h = distogram.update(h, i)

    assert distogram.mean(h) == approx(np.mean(normal), abs=0.1)
    assert distogram.variance(h) == approx(np.var(normal), abs=0.1)
    assert distogram.stddev(h) == approx(np.std(normal), abs=0.1)
示例#14
0
def test_quantile_on_right():
    h = distogram.Distogram(bin_count=6)

    data = [12.3, 8.2, 100.53, 23.5, 13.98, 200, 200.2, 200.8, 200.4, 200.1]
    for i in data:
        h = distogram.update(h, i)

    assert distogram.quantile(h, 0.99) == approx(np.quantile(data, 0.99),
                                                 rel=0.01)
    assert distogram.quantile(h, 0.85) == approx(np.quantile(data, 0.85),
                                                 rel=0.01)
示例#15
0
def test_normal():
    # normal = np.random.normal(0,1, 1000)
    normal = [random.normalvariate(0.0, 1.0) for _ in range(10000)]
    h = distogram.Distogram(bin_count=64)

    for i in normal:
        h = distogram.update(h, i)

    assert distogram.quantile(h, 0.5) == approx(np.quantile(normal, 0.5),
                                                abs=0.2)
    assert distogram.quantile(h, 0.95) == approx(np.quantile(normal, 0.95),
                                                 abs=0.2)
示例#16
0
def test_quantile_on_left():
    h = distogram.Distogram(bin_count=6)

    data = [12.3, 5.2, 5.4, 4.9, 5.5, 5.6, 8.2, 30.53, 23.5, 13.98]
    for i in data:
        h = distogram.update(h, i)

    assert distogram.quantile(h, 0.01) == approx(np.quantile(data, 0.01),
                                                 rel=0.01)
    assert distogram.quantile(h, 0.05) == approx(np.quantile(data, 0.05),
                                                 rel=0.05)
    assert distogram.quantile(h, 0.25) == approx(np.quantile(data, 0.25),
                                                 rel=0.05)
示例#17
0
import numpy as np
import distogram

print("generating distribution...")
utterance_count = 10000000
distribution = np.random.normal(size=utterance_count)
#distribution = np.random.uniform(size=utterance_count)
if len(sys.argv) >= 2 and sys.argv[1] == '--enable-np':
    print('using numpy types')
else:
    print('using python types')
    distribution = distribution.tolist()

print("distribution generated")
print("running update bench (5 times)...")
total_time = 0

for i in range(5):
    start_time = time.time()

    h = distogram.Distogram()
    for i in distribution:
        h = distogram.update(h, i)

    end_time = time.time()
    total_time += end_time - start_time

total_time /= 5
print("ran update bench in {} seconds (mean of 5 runs)".format(total_time))
print("req/s: {}".format(utterance_count // total_time))