def test_decay(self): tbf = TimingBloomFilter(500, decay_time=4, ioloop=self.io_loop).start() tbf += "hello" assert tbf.contains("hello") == True try: self.wait(timeout = 4) except: pass assert tbf.contains("hello") == False
def test_save(self): tbf = TimingBloomFilter(5, decay_time=30, ioloop=self.io_loop).start() tbf += "hello" assert "hello" in tbf prev_num_nonzero = tbf.num_non_zero tbf.tofile(open("test.tbf", "w+")) with TestFile("test.tbf") as fd: tbf2 = TimingBloomFilter.fromfile(fd) assert "hello" in tbf assert prev_num_nonzero == tbf2.num_non_zero
def get_bloom(**overrides): ''' Helper function to easily get a bloom for testing. ''' kwargs = copy(BLOOM_DEFAULTS) kwargs.update(overrides) return TimingBloomFilter(**kwargs)
def test_bloom_repeat_saves_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(temp_path) # Save the bloom bloom.save() # Capture the mtime on the save files bloom_file = testing_dir.join('bloom.npy') meta_file = testing_dir.join('meta.json') first_bloom_save = bloom_file.mtime() first_meta_save = meta_file.mtime() # Sleep for 1 second to deal ensure mtimes will advance time.sleep(1) # Reload the bloom second_gen_bloom = TimingBloomFilter.load(temp_path) # Add a few more keys to the reloaded bloom second_gen_bloom.add('101') second_gen_bloom.add('102') second_gen_bloom.add('103') # Save the second generation bloom second_gen_bloom.save() # Check that the mtimes have changed assert first_bloom_save < bloom_file.mtime() assert first_meta_save < meta_file.mtime() # Load the bloom one more time third_gen_bloom = TimingBloomFilter.load(temp_path) # Check that the loaded data is as expected assert third_gen_bloom.contains('50') assert third_gen_bloom.contains('103') assert not third_gen_bloom.contains('105')
def test_init_with_bloom_data(exists_mock, load_mock): # Setup test data and mocks exists_mock.return_value = True load_mock.return_value = sentinel.data capacity = 1000 error = 0.002 decay_time = 86400 data_path = '/does/not/exist' # Call init bloom = TimingBloomFilter( capacity=capacity, error=error, decay_time=decay_time, data_path=data_path, ) # Check that the bloom is setup as expected assert_bloom_values( bloom, { 'capacity': capacity, 'error': error, 'data_path': data_path, 'id': None, 'num_bytes': 12935, 'num_hashes': 9, 'ring_size': 15, 'dN': 7, 'seconds_per_tick': 12342.857142857143, '_optimize': True, 'data': sentinel.data, }) test_dp, test_mf, test_bf = bloom._get_paths(None) assert test_dp == '/does/not/exist' assert test_mf == '/does/not/exist/meta.json' assert test_bf == '/does/not/exist/bloom.npy' exists_mock.assert_called_once_with(test_bf) load_mock.assert_called_once_with(test_bf)
def test_init_with_bloom_data(exists_mock, load_mock): # Setup test data and mocks exists_mock.return_value = True load_mock.return_value = sentinel.data capacity = 1000 error = 0.002 decay_time = 86400 data_path = '/does/not/exist' # Call init bloom = TimingBloomFilter( capacity=capacity, error=error, decay_time=decay_time, data_path=data_path, ) # Check that the bloom is setup as expected assert_bloom_values(bloom, { 'capacity': capacity, 'error': error, 'data_path': data_path, 'id': None, 'num_bytes': 12935, 'num_hashes': 9, 'ring_size': 15, 'dN': 7, 'seconds_per_tick': 12342.857142857143, '_optimize': True, 'data': sentinel.data, }) test_dp, test_mf, test_bf = bloom._get_paths(None) assert test_dp == '/does/not/exist' assert test_mf == '/does/not/exist/meta.json' assert test_bf == '/does/not/exist/bloom.npy' exists_mock.assert_called_once_with(test_bf) load_mock.assert_called_once_with(test_bf)
def test_holistic(self): n = int(2e4) N = int(1e4) T = 3 print "TimingBloom with capacity %e and expiration time %ds" % (n, T) with TimingBlock("Initialization"): tbf = TimingBloomFilter(n, decay_time=T, ioloop=self.io_loop) orig_decay = tbf.decay def new_decay(*args, **kwargs): with TimingBlock("Decaying"): val = orig_decay(*args, **kwargs) return val setattr(tbf, "decay", new_decay) tbf._setup_decay() tbf.start() print "num_hashes = %d, num_bytes = %d" % (tbf.num_hashes, tbf.num_bytes) print "sizeof(TimingBloom) = %d bytes" % (tbf.num_bytes) with TimingBlock("Adding %d values" % N, N): for i in xrange(N): tbf.add(str(i)) last_insert = time.time() with TimingBlock("Testing %d positive values" % N, N): for i in xrange(N): assert str(i) in tbf with TimingBlock("Testing %d negative values" % N, N): err = 0 for i in xrange(N, 2*N): if str(i) in tbf: err += 1 tot_err = err / float(N) assert tot_err <= tbf.error, "Error is too high: %f > %f" % (tot_err, tbf.error) try: t = T - (time.time() - last_insert) + 1 if t > 0: self.wait(timeout = t) except: pass with TimingBlock("Testing %d expired values" % N, N): err = 0 for i in xrange(N): if str(i) in tbf: err += 1 tot_err = err / float(N) assert tot_err <= tbf.error, "Error is too high: %f > %f" % (tot_err, tbf.error) assert tbf.num_non_zero == 0, "All entries in the bloom should be zero: %d non-zero entries" % tbf.num_non_zero
def test_init_no_bloom_data(): # Setup test data capacity = 1000 error = 0.002 decay_time = 86400 data_path = '/does/not/exist' id = 5 # Call init and get back a bloom bloom = TimingBloomFilter( capacity=capacity, error=error, decay_time=decay_time, data_path=data_path, id=id, ) # Make sure the bloom is setup as expected assert_bloom_values( bloom, { 'capacity': capacity, 'error': error, 'data_path': data_path, 'id': id, 'num_bytes': 12935, 'num_hashes': 9, 'ring_size': 15, 'dN': 7, 'seconds_per_tick': 12342.857142857143, '_optimize': True, }) test_dp, test_mf, test_bf = bloom._get_paths(None) assert test_dp == '/does/not/exist' assert test_mf == '/does/not/exist/meta.json' assert test_bf == '/does/not/exist/bloom.npy' assert_empty_bloom(bloom)
def test_init_no_bloom_data(): # Setup test data capacity = 1000 error = 0.002 decay_time = 86400 data_path = '/does/not/exist' id = 5 # Call init and get back a bloom bloom = TimingBloomFilter( capacity=capacity, error=error, decay_time=decay_time, data_path=data_path, id=id, ) # Make sure the bloom is setup as expected assert_bloom_values(bloom, { 'capacity': capacity, 'error': error, 'data_path': data_path, 'id': id, 'num_bytes': 12935, 'num_hashes': 9, 'ring_size': 15, 'dN': 7, 'seconds_per_tick': 12342.857142857143, '_optimize': True, }) test_dp, test_mf, test_bf = bloom._get_paths(None) assert test_dp == '/does/not/exist' assert test_mf == '/does/not/exist/meta.json' assert test_bf == '/does/not/exist/bloom.npy' assert_empty_bloom(bloom)
def get_bloom(temp_path, disable_optimizations=False): # Create a bloom bloom = TimingBloomFilter( capacity=1000, decay_time=86400, data_path=temp_path, disable_optimizations=disable_optimizations, ) # Add a bunch of keys for i in range(100): bloom.add(str(i)) # Check that the bloom is working as expected assert bloom.contains('1') assert bloom.contains('50') assert not bloom.contains('101') return bloom
def test_bloom_initial_save_and_load_without_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(temp_path, disable_optimizations=True) # Save the bloom bloom.save() # Check that the expected files were created bloom_file = testing_dir.join('bloom.npy') meta_file = testing_dir.join('meta.json') assert bloom_file.check() assert meta_file.check() # Reload the bloom reloaded = TimingBloomFilter.load(temp_path) # Check that the reloaded bloom is working as expected assert reloaded.contains('1') assert reloaded.contains('50') assert not reloaded.contains('101')