def test_queries_from_shrivastava_example(): # NOTE: percentiles and rank given by q-digest are # approximated, thus no sense to compare them to the # exact values in a test. qd = QuantileDigest(3, 5) for i in range(1): qd.add(0) for i in range(4): qd.add(2) for i in range(6): qd.add(3) for i in range(1): qd.add(4) for i in range(1): qd.add(5) for i in range(1): qd.add(6) for i in range(1): qd.add(7) qd.compress() median = qd.quantile_query(0.5) assert median == 3, "Incorrect approx. median" rank = qd.inverse_quantile_query(3) assert rank == 4, "Incorrect approx. rank" percentile85 = qd.quantile_query(0.85) assert percentile85 == 7, "Incorrect approx. 85th percentile" rank = qd.inverse_quantile_query(5) assert rank == 10, "Incorrect approx. rank" num_of_values = qd.interval_query(3, 5) assert num_of_values == 6, "Incorrect approx. number of values in interval"
"""Example how to use QuantileDigest.""" import random from pdsa.rank.qdigest import QuantileDigest if __name__ == '__main__': qd = QuantileDigest(4, 5) random.seed(42) for i in range(100): qd.add(random.randrange(0, 16)) qd.compress() print(qd) print("Size in bytes of the q-digest:", qd.sizeof()) print("Total elements in the q-digest:", qd.count()) print("50th percentile (median):", qd.quantile_query(0.5)) print("95th percentile:", qd.quantile_query(0.95)) print("Rank of the element <10>:", qd.inverse_quantile_query(10)) print("Number of elements in [4, 9]:", qd.interval_query(4, 9))