def test_chained_read_through_write(): read_store = bs.MemoryStore({'foo': 42}, read=True, write=False) store_ahead = bs.MemoryStore(read=True, write=True, read_through_write=True) read_through_write_store = bs.MemoryStore(read=True, write=True, read_through_write=True) no_read_through_write_store = bs.MemoryStore(read=True, write=True, read_through_write=False) stores = [ no_read_through_write_store, read_through_write_store, read_store, store_ahead ] chained_store = bs.ChainedStore(stores) assert 'foo' not in read_through_write_store assert 'foo' not in no_read_through_write_store assert 'foo' not in store_ahead # verify we read from the read-only store assert chained_store['foo'] == 42 assert 'foo' in read_through_write_store assert 'foo' not in store_ahead assert 'foo' not in no_read_through_write_store
def test_permissions(): store = bs.MemoryStore(read=True, write=True, delete=True) store.put('a', 1) assert store.get('a') == 1 store.delete('a') store = bs.MemoryStore(read=False, write=False, delete=False) with pytest.raises(cs.PermissionError) as e: store.put('a', 1) with pytest.raises(cs.PermissionError) as e: store.get('a') with pytest.raises(cs.PermissionError) as e: store.delete('a')
def test_chained_with_readonly(): read_store = bs.MemoryStore({'foo': 42}, read=True, write=False, delete=False) write_store = bs.MemoryStore(read=True, write=True, delete=False) stores = [read_store, write_store] chained_store = bs.ChainedStore(stores) # verify we read from the read-only store assert chained_store['foo'] == 42 # but that it is not written to chained_store.put('bar', 55) assert 'bar' in chained_store assert 'bar' in write_store assert 'bar' not in read_store
def test_inputs_json(db_session): repo = r.DbRepo(db_session, bs.MemoryStore()) @p.provenance(version=0, name='initial_data', repo=repo) def load_data(filename, timestamp): return {'data': [1,2,3], 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_X(data, process_x_inc, timestamp): _data = [i + process_x_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_Y(data, process_y_inc, timestamp): _data = [i + process_y_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def combine_processed_data(filename, inc_x, inc_y, timestamp): _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])] return {'data': _data, 'timestamp': timestamp} def pipeline(filename, timestamp, process_x_inc, process_y_inc): data = load_data(filename, timestamp) inc_x = process_data_X(data, process_x_inc, timestamp) inc_y = process_data_Y(data, process_y_inc, timestamp) res = combine_processed_data(filename, inc_x, inc_y, timestamp) return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res} now = datetime(2016, 9, 27, 7, 51, 11, 613544) expected_inputs_json = { "__varargs": [], "filename": "foo-bar", "timestamp": now, "inc_x": { "id": "2c33a362ebd51f830d0b245473ab6c1269674259", "name": "test_repos.process_data_X", "type": "ArtifactProxy" }, "inc_y": { "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702", "name": "test_repos.process_data_Y", "type": "ArtifactProxy" } } results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json
def test_chained_writes_may_be_allowed_on_read_throughs_only(): read_store = bs.MemoryStore({'foo': 42}, read=True, write=False) read_through_write_only_store = bs.MemoryStore(read=True, write=False, read_through_write=True) write_store = bs.MemoryStore(read=True, write=True, read_through_write=False) stores = [write_store, read_through_write_only_store, read_store] chained_store = bs.ChainedStore(stores) # verify we read from the read-only store assert chained_store['foo'] == 42 assert 'foo' in read_through_write_only_store assert 'foo' not in write_store chained_store.put('bar', 55) assert 'bar' in chained_store assert 'bar' not in read_through_write_only_store assert 'bar' in write_store
def test_chained_storage_with_disk_and_s3_sharing_cachedir(s3fs): tmp_dir = '/tmp/prov_shared_store' shutil.rmtree(tmp_dir, ignore_errors=True) mem_store = bs.MemoryStore(read=True, write=True, delete=True) disk_store = bs.DiskStore(tmp_dir, read=True, write=True, delete=True) s3_store = bs.S3Store( tmp_dir, s3fs=s3fs, basepath='bucket/prov_test', read=True, write=True, delete=True, always_check_remote=True, ) stores = [mem_store, disk_store, s3_store] chained_store = bs.ChainedStore(stores) key = 'foobar' data = {'a': 1, 'b': 2} for store in stores: assert key not in store assert key not in store chained_store.put(key, data) assert key in store for store in stores: assert key in store assert store.get(key) == data assert store[key] == data store.delete(key) assert key not in store with pytest.raises(KeyError): store.delete(key) with pytest.raises(KeyError): store.get(key)
def memory_store(): return bs.MemoryStore()
def test_memory_blobstore_raises(key, obj): store = bs.MemoryStore(read=True, write=True, delete=True, on_duplicate_key='raise') assert_store_basic_ops(store, key, obj)
def test_memory_blobstore(key, obj): store = bs.MemoryStore(read=True, write=True, delete=True) assert_store_basic_ops(store, key, obj)
def test_inputs_json(db_session): repo = r.DbRepo(db_session, bs.MemoryStore()) @p.provenance(version=0, name='initial_data', repo=repo) def load_data(filename, timestamp): return {'data': [1, 2, 3], 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_X(data, process_x_inc, timestamp): _data = [i + process_x_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_Y(data, process_y_inc, timestamp): _data = [i + process_y_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def combine_processed_data(filename, inc_x, inc_y, timestamp): _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])] return {'data': _data, 'timestamp': timestamp} def pipeline(filename, timestamp, process_x_inc, process_y_inc): data = load_data(filename, timestamp) inc_x = process_data_X(data, process_x_inc, timestamp) inc_y = process_data_Y(data, process_y_inc, timestamp) res = combine_processed_data(filename, inc_x, inc_y, timestamp) return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res} now = datetime(2016, 9, 27, 7, 51, 11, 613544) expected_inputs_json = { '__varargs': [], 'filename': 'foo-bar', 'timestamp': now, 'inc_x': { 'id': 'c74da9d379234901fe7a89e03fa800b0', # md5 # "id": "2c33a362ebd51f830d0b245473ab6c1269674259", # sha1 'name': 'test_repos.process_data_X', 'type': 'ArtifactProxy', }, 'inc_y': { 'id': 'a1bd4d4ae1f33ae6379613618427f127', # md5 # "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702", # sha1 'name': 'test_repos.process_data_Y', 'type': 'ArtifactProxy', }, } results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json