def test_decay(self): stbf = ScalingTimingBloomFilter(500, decay_time=4, ioloop=self.io_loop).start() stbf += "hello" assert stbf.contains("hello") == True try: self.wait(timeout = 4) except: pass assert stbf.contains("hello") == False
def test_save(self): stbf = ScalingTimingBloomFilter(5, decay_time=30, ioloop=self.io_loop).start() stbf += "hello" assert "hello" in stbf prev_num_nonzero = stbf.blooms[0]['bloom'].num_non_zero stbf.tofile(open("test.stbf", "w+")) with TestFile("test.stbf") as fd: stbf2 = ScalingTimingBloomFilter.fromfile(fd) assert "hello" in stbf assert prev_num_nonzero == stbf2.blooms[0]['bloom'].num_non_zero
def test_size_stability(self): stbf = ScalingTimingBloomFilter(10, decay_time=5, min_fill_factor=0.2, growth_factor=2, ioloop=self.io_loop).start() for i in xrange(100): stbf.add("FOO%d" % i) assert len(stbf.blooms) > 0, "Did not scale up" for i in xrange(100, 130): stbf.add("FOO%d" % i) try: self.wait(timeout = .5) except: pass if len(stbf.blooms) == 1 and stbf.blooms[0]['id'] == 1: return assert "Did not scale down"
def test_full_init(): # Setup test data error = 0.0002 capacity = 1000 decay_time = 86400 ticker = MagicMock(NoOpTicker) data_path = '/foo/bar/baz' error_tightening_ratio = 0.4 growth_factor = 3 min_fill_factor = 0.1 max_fill_factor = 0.9 insert_tail = False disable_optimizations = True timing_bloom = MagicMock(TimingBloomFilter) timing_bloom.seconds_per_tick = 100 blooms = [timing_bloom] # Create the bloom bloom = ScalingTimingBloomFilter( error=error, capacity=capacity, decay_time=decay_time, ticker=ticker, data_path=data_path, error_tightening_ratio=error_tightening_ratio, growth_factor=growth_factor, min_fill_factor=min_fill_factor, max_fill_factor=max_fill_factor, insert_tail=insert_tail, blooms=blooms, disable_optimizations=disable_optimizations, ) # Check that the bloom's state matches expectations assert_bloom_values( bloom, { 'error': error, 'capacity': capacity, 'decay_time': decay_time, 'error_tightening_ratio': error_tightening_ratio, 'growth_factor': growth_factor, 'max_fill_factor': max_fill_factor, 'min_fill_factor': min_fill_factor, 'insert_tail': insert_tail, 'data_path': data_path, 'blooms': blooms, 'error_initial': 0.00012, 'seconds_per_tick': timing_bloom.seconds_per_tick, 'disable_optimizations': disable_optimizations, }) data_path, meta_filename, blooms_path = _get_paths(bloom.data_path, None) assert meta_filename == '/foo/bar/baz/meta.json' assert blooms_path == '/foo/bar/baz/blooms' ticker.setup.assert_called_once_with(bloom.decay, timing_bloom.seconds_per_tick) ticker.start.assert_called_once_with()
def test_save_and_load_with_scaling(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path, disable_optimizations=True, capacity=200) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add enough items to trigger a scale for i in range(101, 201): second_gen_bloom.add(str(i)) # Call save second_gen_bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 2 == len(blooms_path.listdir()) assert meta_file.check() # Load again and make sure the new keys are found third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) assert third_gen_bloom.contains('101') assert third_gen_bloom.contains('150') assert not third_gen_bloom.contains('201')
def test_holistic(self): n = int(1e4) N = int(2e4) T = 3 print "ScalingTimingBloom with capacity %e and expiration time %ds" % (n, T) with TimingBlock("Initialization"): stbf = ScalingTimingBloomFilter(n, decay_time=T, ioloop=self.io_loop) orig_decay = stbf.decay def new_decay(*args, **kwargs): with TimingBlock("Decaying"): val = orig_decay(*args, **kwargs) return val setattr(stbf, "decay", new_decay) stbf._setup_decay() stbf.start() print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.) with TimingBlock("Adding %d values" % N, N): for i in xrange(N): stbf.add(str(i)) last_insert = time.time() print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.) with TimingBlock("Testing %d positive values" % N, N): for i in xrange(N): assert str(i) in stbf with TimingBlock("Testing %d negative values" % N, N): err = 0 for i in xrange(N, 2*N): if str(i) in stbf: err += 1 tot_err = err / float(N) assert tot_err <= stbf.error, "Error is too high: %f > %f" % (tot_err, stbf.error) try: t = T - (time.time() - last_insert) + 1 if t > 0: self.wait(timeout = t) except: pass print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.) with TimingBlock("Testing %d expired values" % N, N): err = 0 for i in xrange(N): if str(i) in stbf: err += 1 tot_err = err / float(N) assert tot_err <= stbf.error, "Error is too high: %f > %f" % (tot_err, stbf.error) assert len(stbf.blooms) == 1, "Decay should have pruned all but one bloom filters: %d blooms left" % len(stbf.blooms)
def test_bloom_repeat_saves_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Capture the mtime on the save files bloom_file = testing_dir.join('blooms/0/bloom.npy') meta_file = testing_dir.join('meta.json') first_bloom_save = bloom_file.mtime() first_meta_save = meta_file.mtime() # Sleep for 1 second to deal ensure mtimes will advance time.sleep(1) # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add a few more keys to the reloaded bloom second_gen_bloom.add('101') second_gen_bloom.add('102') second_gen_bloom.add('103') # Save the second generation bloom second_gen_bloom.save() # Check that the mtimes have changed assert first_bloom_save < bloom_file.mtime() assert first_meta_save < meta_file.mtime() # Load the bloom one more time third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Check that the loaded data is as expected assert third_gen_bloom.contains('50') assert third_gen_bloom.contains('103') assert not third_gen_bloom.contains('105')
def test_discover_blooms(listdir_mock, isdir_mock): # Setup mocks listdir_mock.return_value = ['/path/1', '/path/2', '/path/3'] isdir_mock.side_effect = [True, False, True] # Call discover blooms paths = ScalingTimingBloomFilter.discover_blooms('/path') # Check results expected_paths = ['/path/1', '/path/3'] assert expected_paths == paths # Check that system calls were made as expected listdir_mock.assert_called_once_with('/path') for path in listdir_mock.return_value: isdir_mock.assert_any_call(path)
def test_init_without_optimizations(timing_bloom_mock): # Setup test data error = 0.0002 capacity = 1000 decay_time = 86400 disable_optimizations = True # Create bloom bloom = ScalingTimingBloomFilter( error=error, capacity=capacity, decay_time=decay_time, disable_optimizations=disable_optimizations, ) assert_bloom_values( bloom, { 'error': error, 'capacity': capacity, 'decay_time': decay_time, 'error_tightening_ratio': 0.5, 'error_initial': 0.0001, 'growth_factor': 2, 'max_fill_factor': 0.8, 'min_fill_factor': 0.2, 'insert_tail': True, 'data_path': None, 'seconds_per_tick': bloom.blooms[0].seconds_per_tick, 'disable_optimizations': disable_optimizations, }) expected_blooms_len = 1 assert expected_blooms_len == len(bloom.blooms) timing_bloom_mock.assert_called_once_with( capacity=693, decay_time=decay_time, error=0.0001, id=0, disable_optimizations=disable_optimizations, ) expected_ticker_class = NoOpTicker assert isinstance(bloom.ticker, expected_ticker_class)
def get_bloom(n=100, **updates): kwargs = copy(BLOOM_DEFAULTS) kwargs.update(updates) # Create a bloom bloom = ScalingTimingBloomFilter(**kwargs) # Add a bunch of keys for i in range(100): bloom.add(str(i)) # Check that the bloom is working as expected assert bloom.contains('1') assert bloom.contains(str(n // 2)) assert not bloom.contains(str(n + 1)) return bloom
def get_bloom( n=100, **updates): kwargs = copy(BLOOM_DEFAULTS) kwargs.update(updates) # Create a bloom bloom = ScalingTimingBloomFilter(**kwargs) # Add a bunch of keys for i in range(100): bloom.add(str(i)) # Check that the bloom is working as expected assert bloom.contains('1') assert bloom.contains(str(n // 2)) assert not bloom.contains(str(n + 1)) return bloom
def test_load__with_blooms(timing_bloom_mock): # Setup test data test_data = { 'capacity': 500, 'decay_time': 30, 'error': 0.5, 'error_tightening_ratio': 0.2, 'growth_factor': 5, 'min_fill_factor': 0.1, 'max_fill_factor': 0.9, 'insert_tail': False, 'disable_optimizations': True, } open_mock = mock_open(read_data=json.dumps(test_data)) data_path = '/test/foo/bar' bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2'] ScalingTimingBloomFilter.discover_blooms = MagicMock(ScalingTimingBloomFilter) ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths # Call load with patch('__builtin__.open', open_mock, create=True): loaded = ScalingTimingBloomFilter.load(data_path) # Check that metadata was opened as expected open_mock.assert_called_once_with(data_path + '/meta.json', 'r') # Check that the loaded bloom looks as expected for key, value in test_data.iteritems(): assert value == getattr(loaded, key) expected_bloom_count = 2 assert expected_bloom_count == len(loaded.blooms) # Check that the sub blooms were loaded as expected for path in bloom_paths: timing_bloom_mock.load.assert_any_call(path) expected_load_calls = len(bloom_paths) assert expected_load_calls == timing_bloom_mock.load.call_count
def test_load__with_blooms(timing_bloom_mock): # Setup test data test_data = { 'capacity': 500, 'decay_time': 30, 'error': 0.5, 'error_tightening_ratio': 0.2, 'growth_factor': 5, 'min_fill_factor': 0.1, 'max_fill_factor': 0.9, 'insert_tail': False, 'disable_optimizations': True, } open_mock = mock_open(read_data=json.dumps(test_data)) data_path = '/test/foo/bar' bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2'] ScalingTimingBloomFilter.discover_blooms = MagicMock( ScalingTimingBloomFilter) ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths # Call load with patch('__builtin__.open', open_mock, create=True): loaded = ScalingTimingBloomFilter.load(data_path) # Check that metadata was opened as expected open_mock.assert_called_once_with(data_path + '/meta.json', 'r') # Check that the loaded bloom looks as expected for key, value in test_data.iteritems(): assert value == getattr(loaded, key) expected_bloom_count = 2 assert expected_bloom_count == len(loaded.blooms) # Check that the sub blooms were loaded as expected for path in bloom_paths: timing_bloom_mock.load.assert_any_call(path) expected_load_calls = len(bloom_paths) assert expected_load_calls == timing_bloom_mock.load.call_count
def test_bloom_initial_save_and_load_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom reloaded = ScalingTimingBloomFilter.load(temp_path) # Check that the reloaded bloom is working as expected assert reloaded.contains('1') assert reloaded.contains('50') assert not reloaded.contains('101')
def get_bloom(bloom_mocks=None, **overrides): ''' Helper function to easily get a bloom for testing. ''' kwargs = copy(BLOOM_DEFAULTS) kwargs.update(overrides) if bloom_mocks: blooms = [] for mock_info in bloom_mocks: mock = MagicMock(TimingBloomFilter) mock_attrs = copy(TIMING_BLOOM_DEFAULTS) mock_attrs.update(mock_info.get('attrs', {})) for key, value in mock_attrs.iteritems(): setattr(mock, key, value) for key, value in mock_info.get('return_values', {}).iteritems(): attr = getattr(mock, key) attr.return_value = value blooms.append(mock) kwargs['blooms'] = blooms return ScalingTimingBloomFilter(**kwargs)