示例#1
0
def experiment4(n, m, k):
    """
    Run experiment 4, where each key is inserted a random number 20 or less times, and all insertions are in a random order
    :param n: number of distinct keys
    :param m: the size of the bloom filter
    :param k: the number of hashes used by the bloom filter
    """
    distinct_keys = generate_keys(n)
    bf = RevisedExtendedBloomFilter(m, k)
    all_keys = []
    c_list = {}

    for e in distinct_keys:
        c = randint(0, 20)
        c_list[e] = c
        for _ in range(c):
            all_keys.append(e)

    fisher_yates_shuffle(all_keys)

    for e in all_keys:
        bf.insert(e)

    # Get the false-positive rate
    return calc_false_positive_rate_random(bf, distinct_keys, c_list)
示例#2
0
def experiment7(n, m, k):
    """
    Run experiment 7, where each key is inserted a poisson random variable with lambda=20 times, and shuffled
    :param n: number of distinct keys
    :param m: the size of the bloom filter
    :param k: the number of hashes used by the bloom filter
    """
    distinct_keys = generate_keys(n)
    bf = RevisedExtendedBloomFilter(m, k)
    all_keys = []
    c_list = {}

    for e in distinct_keys:
        c = np.random.poisson(20)
        c_list[e] = c
        for _ in range(c):
            all_keys.append(e)

    fisher_yates_shuffle(all_keys)

    for e in all_keys:
        bf.insert(e)

    # Get the false-positive rate
    return calc_false_positive_rate_random(bf, distinct_keys, c_list)
示例#3
0
def experiment2(n, m, k):
    """
    Run experiment 2, where each key is inserted 20 times in a row
    :param n: number of distinct keys
    :param m: the size of the bloom filter
    :param k: the number of hashes used by the bloom filter
    """
    distinct_keys = generate_keys(n)
    bf = RevisedExtendedBloomFilter(m, k)
    for e in distinct_keys:
        for _ in range(20):
            bf.insert(e)

    # Get the false-positive rate
    return calc_false_positive_rate(bf, distinct_keys, 20)
示例#4
0
def experiment3(n, m, k):
    """
    Run experiment 3, where each key is inserted 20 times, but all insertions are in a random order
    :param n: number of distinct keys
    :param m: the size of the bloom filter
    :param k: the number of hashes used by the bloom filter
    """
    distinct_keys = generate_keys(n)
    bf = RevisedExtendedBloomFilter(m, k)
    all_keys = distinct_keys * 20
    fisher_yates_shuffle(all_keys)
    for e in all_keys:
        bf.insert(e)

    # Get the false-positive rate
    return calc_false_positive_rate(bf, distinct_keys, 20)