Пример #1
0
def test_merge_equivalent_to_single_run():
    vals1 = [1, 2, 3, 1, 2, 3]
    vals2 = [1, 2, 3, 4, 5]
    all_vals = vals1 + vals2

    hll1 = hllsketch.HllSketch()
    for v in vals1:
        hll1.update(v)

    hll2 = hllsketch.HllSketch()
    for v in vals2:
        hll2.update(v)

    hll3 = hllsketch.HllSketch()
    for v in all_vals:
        hll3.update(v)

    merged_2_into_1 = hll1.merge(hll2)
    merged_1_into_2 = hll2.merge(hll1)

    assert merged_1_into_2.get_estimate() == merged_2_into_1.get_estimate()
    assert merged_1_into_2.get_estimate() == hll3.get_estimate()

    assert merged_1_into_2.get_lower_bound(
        1) == merged_2_into_1.get_lower_bound(1)
    assert merged_1_into_2.get_lower_bound(1) == hll3.get_lower_bound(1)

    assert merged_1_into_2.get_upper_bound(
        1) == merged_2_into_1.get_upper_bound(1)
    assert merged_1_into_2.get_upper_bound(1) == hll3.get_upper_bound(1)
Пример #2
0
def test_floats_ints_different():
    int_vals = [1, 2, 3] * 2
    float_vals = [1.0, 2.0, 3.0] * 2
    hll = hllsketch.HllSketch()
    for v in int_vals + float_vals:
        hll.update(v)

    assert hll.get_estimate() == pytest.approx(6, 1e-5)
Пример #3
0
def test_update():
    hll = hllsketch.HllSketch()
    vals = [1, 2, 3] * 2
    for v in vals:
        hll.update(v)

    assert hll.get_estimate() == pytest.approx(3, 1e-5)
    assert hll.get_upper_bound() == pytest.approx(3, 1e-3)
    assert hll.get_lower_bound() == pytest.approx(3, 1e-3)
Пример #4
0
def test_summary():
    hll = hllsketch.HllSketch()
    vals = 3 * [1, 2, 3] + [4.0, 6.0, 9.0, 9.0] + ["a", "a", "dkfjadlf", "c"]
    for v in vals:
        hll.update(v)

    n = 3 + 3 + 3
    summary = hll.to_summary()
    assert summary.estimate == pytest.approx(n, 1e-5)
    assert summary.upper == pytest.approx(n, 1e-4)
    assert summary.lower == pytest.approx(n, 1e-4)
Пример #5
0
def test_protobuf_roundtrip():
    hll1 = hllsketch.HllSketch()
    vals = [1, 2, 3, 1, 2, 3, "a", "a", "b", 2.0, 8.0, 9.0]
    for v in vals:
        hll1.update(v)

    msg = hll1.to_protobuf()
    hll2 = hllsketch.HllSketch.from_protobuf(msg)

    assert hll1.get_estimate() == hll2.get_estimate()
    assert hll1.get_upper_bound() == hll2.get_upper_bound()
    assert hll1.get_lower_bound() == hll2.get_lower_bound()
Пример #6
0
def test_enum_tracking():
    class X(Enum):
        A = 1
        B = 2
        C = 3
        D = 4

    vals = [X.A, X.B, X.C, X.D] + [1, 2, 3, 4]
    hll = hllsketch.HllSketch()
    for v in vals:
        hll.update(v)

    assert hll.get_estimate() == pytest.approx(4, 1e-5)
Пример #7
0
def test_datetime_tracking():
    hll = hllsketch.HllSketch()
    vals = [
        datetime(2020, 2, 3),
        datetime(2020, 2, 3),
        datetime(2020, 2, 3, tzinfo=pytz.UTC),
        datetime(2020, 2, 3, 21, 28, 31),
        datetime.utcnow(),
    ]
    for v in vals:
        hll.update(v)

    assert hll.get_estimate() == pytest.approx(4, 1e-5)
Пример #8
0
def test_serialize_then_merge_does_not_fail():
    """
    There is a bizarre circumstance when serializing a dataksetches HLL sketch
    using sketch.serialize_updatable() which results in an occasional
    RuntimeError after deserialization and trying to update a union with that
    sketch.

    This bug does not _always_ creep up, therefore this test must be run
    many times.

    It appears to work when using serialize_compact() instead.

    This also only seems to occur with numpy datatypes, e.g. np.float, rather
    than with python floats.
    """
    import numpy as np

    vals = np.array([
        21.0,
        15.0,
        14.0,
        23.0,
        24.0,
        2.0,
        26.0,
        31.0,
        26.0,
        30.0,
        17.0,
        36.0,
        18.0,
        28.0,
        58.0,
        32.0,
        10.0,
        19.0,
        15.0,
        8.0,
        31.0,
    ])

    for attempt in range(50):
        # A larger lg_k seems to increase the odds of failure
        hll = hllsketch.HllSketch(20)
        for v in vals:
            hll.update(v)
        msg = hll.to_protobuf()
        msg_str = msg.SerializeToString()
        msg1 = hllsketch.HllSketchMessage.FromString(msg_str)
        hll2 = hllsketch.HllSketch.from_protobuf(msg1)
        hll2.merge(hll2)
Пример #9
0
def test_protobuf_then_merge():
    hll = hllsketch.HllSketch()
    vals = [1, 2, 3]
    hll2 = hllsketch.HllSketch.from_protobuf(hll.to_protobuf())
    hll2.merge(hll2)
Пример #10
0
def test_empty_sketch_summary_returns_none():
    sketch = hllsketch.HllSketch()
    assert sketch.to_summary() is None