def test_save_and_load_with_scaling(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)

    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path,
                      disable_optimizations=True,
                      capacity=200)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add enough items to trigger a scale
    for i in range(101, 201):
        second_gen_bloom.add(str(i))

    # Call save
    second_gen_bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 2 == len(blooms_path.listdir())
    assert meta_file.check()

    # Load again and make sure the new keys are found
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    assert third_gen_bloom.contains('101')
    assert third_gen_bloom.contains('150')
    assert not third_gen_bloom.contains('201')
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('blooms/0/bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save <  bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
def test_save_and_load_with_scaling(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)

    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path, disable_optimizations=True, capacity=200)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add enough items to trigger a scale
    for i in range(101, 201):
        second_gen_bloom.add(str(i))

    # Call save
    second_gen_bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 2 == len(blooms_path.listdir())
    assert meta_file.check()

    # Load again and make sure the new keys are found
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    assert third_gen_bloom.contains('101')
    assert third_gen_bloom.contains('150')
    assert not third_gen_bloom.contains('201')
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('blooms/0/bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save < bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
def test_load__with_blooms(timing_bloom_mock):
    # Setup test data
    test_data = {
        'capacity': 500,
        'decay_time': 30,
        'error': 0.5,
        'error_tightening_ratio': 0.2,
        'growth_factor': 5,
        'min_fill_factor': 0.1,
        'max_fill_factor': 0.9,
        'insert_tail': False,
        'disable_optimizations': True,
    }
    open_mock = mock_open(read_data=json.dumps(test_data))
    data_path = '/test/foo/bar'
    bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2']
    ScalingTimingBloomFilter.discover_blooms = MagicMock(
        ScalingTimingBloomFilter)
    ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths

    # Call load
    with patch('__builtin__.open', open_mock, create=True):
        loaded = ScalingTimingBloomFilter.load(data_path)

    # Check that metadata was opened as expected
    open_mock.assert_called_once_with(data_path + '/meta.json', 'r')

    # Check that the loaded bloom looks as expected
    for key, value in test_data.iteritems():
        assert value == getattr(loaded, key)
    expected_bloom_count = 2
    assert expected_bloom_count == len(loaded.blooms)

    # Check that the sub blooms were loaded as expected
    for path in bloom_paths:
        timing_bloom_mock.load.assert_any_call(path)
    expected_load_calls = len(bloom_paths)
    assert expected_load_calls == timing_bloom_mock.load.call_count
def test_load__with_blooms(timing_bloom_mock):
    # Setup test data 
    test_data = {
        'capacity': 500,
        'decay_time': 30,
        'error': 0.5,
        'error_tightening_ratio': 0.2,
        'growth_factor': 5,
        'min_fill_factor': 0.1,
        'max_fill_factor': 0.9,
        'insert_tail': False,
        'disable_optimizations': True,
    }
    open_mock = mock_open(read_data=json.dumps(test_data))
    data_path = '/test/foo/bar'
    bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2']
    ScalingTimingBloomFilter.discover_blooms = MagicMock(ScalingTimingBloomFilter)
    ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths

    # Call load
    with patch('__builtin__.open', open_mock, create=True):
        loaded = ScalingTimingBloomFilter.load(data_path)


    # Check that metadata was opened as expected
    open_mock.assert_called_once_with(data_path + '/meta.json', 'r')

    # Check that the loaded bloom looks as expected
    for key, value in test_data.iteritems():
        assert value == getattr(loaded, key)
    expected_bloom_count = 2
    assert expected_bloom_count == len(loaded.blooms)

    # Check that the sub blooms were loaded as expected
    for path in bloom_paths:
        timing_bloom_mock.load.assert_any_call(path)
    expected_load_calls = len(bloom_paths)
    assert expected_load_calls == timing_bloom_mock.load.call_count
def test_bloom_initial_save_and_load_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    reloaded = ScalingTimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')
def test_bloom_initial_save_and_load_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    reloaded = ScalingTimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')