def test_warns_experimental(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout() first_aset = co.arraysets['writtenaset'] second_aset = co.arraysets['second_aset'] with pytest.warns(UserWarning, match='Dataloaders are experimental'): make_tf_dataset([first_aset, second_aset]) co.close()
def test_dataset_loader_fails_with_write_enabled_checkout(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout(write=True) first_aset = co.arraysets['writtenaset'] second_aset = co.arraysets['second_aset'] with pytest.raises(TypeError): make_tf_dataset([first_aset, second_aset]) co.close()
def test_wans_arrayset_sample_size_mismatch(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout(write=True) second_aset = co.arraysets['second_aset'] del second_aset['10'] co.commit('deleting') co.close() co = repo.checkout() first_aset = co.arraysets['writtenaset'] second_aset = co.arraysets['second_aset'] with pytest.warns(UserWarning, match='Arraysets do not contain equal number of samples'): make_tf_dataset([first_aset, second_aset]) co.close()
def test_with_keys(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout() aset = co.arraysets['writtenaset'] # with keys keys = ['2', '4', '5', '6', '7', '9', '15', '18', '19'] bad_tensor0 = aset['0'] bad_tensor1 = aset['1'] bad_tensor3 = aset['3'] bad_tensor8 = aset['8'] tf_dset = make_tf_dataset(aset, keys=keys) tf_dset = tf_dset.batch(3) total_batches = 0 for dset1 in tf_dset: total_batches += 1 assert dset1[0].shape == tf.TensorShape((3, 5, 7)) for sample in dset1[0]: assert not np.allclose(sample, bad_tensor0) assert not np.allclose(sample, bad_tensor1) assert not np.allclose(sample, bad_tensor3) assert not np.allclose(sample, bad_tensor8) assert total_batches == 3 co.close()
def test_lots_of_data_with_multiple_backend(self, repo_with_10000_samples): repo = repo_with_10000_samples co = repo.checkout() aset = co.arraysets['aset'] tf_dset = make_tf_dataset([aset]) tf_dset = tf_dset.batch(1000) for data in tf_dset: assert data[0].shape == (1000, 5, 7) co.close()
def test_local_without_data_fails_data_unavailable(self, written_two_cmt_server_repo, managed_tmpdir): new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) server, _ = written_two_cmt_server_repo repo = Repository(path=new_tmpdir, exists=False) repo.clone('name', '[email protected]', server, remove_old=True) co = repo.checkout() aset = co.arraysets['writtenaset'] with pytest.raises(FileNotFoundError): tf_dset = make_tf_dataset(aset, keys=['1', '2']) co.close() repo._env._close_environments()
def test_dataset_loader(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout() first_aset = co.arraysets['writtenaset'] second_aset = co.arraysets['second_aset'] # multiple datasets tf_dset = make_tf_dataset([first_aset, second_aset]) tf_dset = tf_dset.batch(6) for dset1, dset2 in tf_dset.take(2): assert dset1.shape == tf.TensorShape((6, 5, 7)) assert dset2.shape == tf.TensorShape((6, 5, 7)) co.close()
def test_local_without_data_fails_no_common_no_local( self, written_two_cmt_server_repo, managed_tmpdir): new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) server, _ = written_two_cmt_server_repo repo = Repository(path=new_tmpdir, exists=False) repo.clone('name', '[email protected]', server, remove_old=True) co = repo.checkout() aset = co.columns['writtenaset'] with pytest.raises(ValueError): tf_dset = make_tf_dataset(aset) co.close() repo._env._close_environments()
def test_variably_shaped(self, variable_shape_written_repo): # Variably shaped test is required since the collation is dependent on # the way we return the data from generator repo = variable_shape_written_repo co = repo.checkout(write=True) aset = co.arraysets['writtenaset'] for i in range(5, 10): aset[i] = np.random.random((2, i)) co.commit('added data') co.close() co = repo.checkout() aset = co.arraysets['writtenaset'] tf_dset = make_tf_dataset(aset) shape_obj = tf.TensorShape((2, None)) tf_dset = tf_dset.padded_batch(5, padded_shapes=(shape_obj,)) for val in tf_dset: assert val[0].shape[0] == 5 assert val[0].shape[1] == 2 assert 11 > val[0].shape[2] > 4 co.close()
def test_with_index_range(self, repo_with_20_samples): repo = repo_with_20_samples co = repo.checkout() aset = co.arraysets['writtenaset'] # with keys bad_tensor0 = aset['0'] bad_tensor1 = aset['1'] # with index range index_range = slice(2, 20) tf_dset = make_tf_dataset(aset, index_range=index_range) tf_dset = tf_dset.batch(3) total_batches = 0 for dset1 in tf_dset: total_batches += 1 assert dset1[0].shape == tf.TensorShape((3, 5, 7)) for sample in dset1[0]: assert not np.allclose(sample, bad_tensor0) assert not np.allclose(sample, bad_tensor1) assert total_batches == 6 co.close()