def experiment4(n, m, k): """ Run experiment 4, where each key is inserted a random number 20 or less times, and all insertions are in a random order :param n: number of distinct keys :param m: the size of the bloom filter :param k: the number of hashes used by the bloom filter """ distinct_keys = generate_keys(n) bf = RevisedExtendedBloomFilter(m, k) all_keys = [] c_list = {} for e in distinct_keys: c = randint(0, 20) c_list[e] = c for _ in range(c): all_keys.append(e) fisher_yates_shuffle(all_keys) for e in all_keys: bf.insert(e) # Get the false-positive rate return calc_false_positive_rate_random(bf, distinct_keys, c_list)
def experiment7(n, m, k): """ Run experiment 7, where each key is inserted a poisson random variable with lambda=20 times, and shuffled :param n: number of distinct keys :param m: the size of the bloom filter :param k: the number of hashes used by the bloom filter """ distinct_keys = generate_keys(n) bf = RevisedExtendedBloomFilter(m, k) all_keys = [] c_list = {} for e in distinct_keys: c = np.random.poisson(20) c_list[e] = c for _ in range(c): all_keys.append(e) fisher_yates_shuffle(all_keys) for e in all_keys: bf.insert(e) # Get the false-positive rate return calc_false_positive_rate_random(bf, distinct_keys, c_list)
def experiment2(n, m, k): """ Run experiment 2, where each key is inserted 20 times in a row :param n: number of distinct keys :param m: the size of the bloom filter :param k: the number of hashes used by the bloom filter """ distinct_keys = generate_keys(n) bf = RevisedExtendedBloomFilter(m, k) for e in distinct_keys: for _ in range(20): bf.insert(e) # Get the false-positive rate return calc_false_positive_rate(bf, distinct_keys, 20)
def experiment3(n, m, k): """ Run experiment 3, where each key is inserted 20 times, but all insertions are in a random order :param n: number of distinct keys :param m: the size of the bloom filter :param k: the number of hashes used by the bloom filter """ distinct_keys = generate_keys(n) bf = RevisedExtendedBloomFilter(m, k) all_keys = distinct_keys * 20 fisher_yates_shuffle(all_keys) for e in all_keys: bf.insert(e) # Get the false-positive rate return calc_false_positive_rate(bf, distinct_keys, 20)