def test_publish_error_messages(): for version in ['1.0.0', '4.0.0', '5.0.0']: if version == '1.0.0': error_msg = ("A version '1.0.0' already exists for database " f"'{DB_NAME}'.") elif version == '4.0.0': error_msg = ( "5 files are referenced in tables that cannot be found. " "Missing files are: '['audio/002.wav', 'audio/003.wav', " "'audio/004.wav', 'audio/005.wav', 'audio/new.wav']'.") elif version == '5.0.0': error_msg = ( "25 files are referenced in tables that cannot be found. " "Missing files are: '['audio/002.wav', 'audio/003.wav', " "'audio/004.wav', 'audio/005.wav', 'audio/new.wav', " "'file0.wav', 'file1.wav', 'file10.wav', 'file11.wav', " "'file12.wav', 'file13.wav', 'file14.wav', 'file15.wav', " "'file16.wav', 'file17.wav', 'file18.wav', 'file19.wav', " "'file2.wav', 'file3.wav', 'file4.wav'], ...'.") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=None, num_workers=pytest.NUM_WORKERS, verbose=False, )
def test_invalid_archives(name): archives = {os.path.join('audio', '001.wav'): name} with pytest.raises(ValueError): audb.publish( DB_ROOT_VERSION['1.0.0'], '1.0.1', pytest.PUBLISH_REPOSITORY, archives=archives, num_workers=pytest.NUM_WORKERS, verbose=False, )
def fixture_publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db db = audformat.testing.create_db(minimal=True) db.name = DB_NAME db['files'] = audformat.Table(audformat.filewise_index(list(DB_FILES))) db['files']['original'] = audformat.Column() db['files']['original'].set(list(DB_FILES)) for file in DB_FILES: signal = np.zeros( ( DB_FILES[file]['channels'], DB_FILES[file]['sampling_rate'], ), dtype=np.float32, ) path = os.path.join(DB_ROOT, file) audeer.mkdir(os.path.dirname(path)) audiofile.write(path, signal, DB_FILES[file]['sampling_rate'], bit_depth=DB_FILES[file]['bit_depth']) db['segments'] = audformat.Table( audformat.segmented_index( [list(DB_FILES)[0]] * 3, starts=['0s', '1s', '2s'], ends=['1s', '2s', '3s'], )) db.save(DB_ROOT) # publish db audb.publish( DB_ROOT, '1.0.0', pytest.PUBLISH_REPOSITORY, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
def publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db db = audformat.testing.create_db(minimal=True) db.name = DB_NAME db['files'] = audformat.Table(audformat.filewise_index(DB_FILES['1.0.0'])) # publish 1.0.0 db.save(DB_ROOT) audformat.testing.create_audio_files(db) archives = { db.files[0]: 'bundle', db.files[1]: 'bundle', } audb.publish( DB_ROOT, '1.0.0', pytest.PUBLISH_REPOSITORY, archives=archives, verbose=False, ) # publish 2.0.0 db['files'].extend_index( audformat.filewise_index(DB_FILES['2.0.0']), inplace=True, ) db.save(DB_ROOT) audformat.testing.create_audio_files(db) audb.publish( DB_ROOT, '2.0.0', pytest.PUBLISH_REPOSITORY, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
def fixture_publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db DB.save(DB_ROOT) # publish db audb.publish( DB_ROOT, DB_VERSION, pytest.PUBLISH_REPOSITORY, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
def fixture_publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db db = audformat.testing.create_db(minimal=True) db.name = DB_NAME db.schemes['scheme'] = audformat.Scheme() audformat.testing.add_table( db, 'table1', 'filewise', num_files=[0, 1, 2], ) audformat.testing.add_table( db, 'table2', 'filewise', num_files=[1, 2, 3], ) db.save(DB_ROOT) audformat.testing.create_audio_files(db) # publish 1.0.0 audb.publish( DB_ROOT, DB_VERSION, pytest.PUBLISH_REPOSITORY, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
def fixture_publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db db = audformat.testing.create_db(minimal=True) db.name = DB_NAME db.schemes['scheme'] = audformat.Scheme( labels=['positive', 'neutral', 'negative'] ) audformat.testing.add_table( db, 'emotion', audformat.define.IndexType.SEGMENTED, num_files=5, columns={'emotion': ('scheme', None)} ) db.schemes['speaker'] = audformat.Scheme( labels=['adam', 'eve'] ) db['files'] = audformat.Table(db.files) db['files']['speaker'] = audformat.Column(scheme_id='speaker') db['files']['speaker'].set( ['adam', 'adam', 'eve', 'eve'], index=audformat.filewise_index(db.files[:4]), ) # publish 1.0.0 db.save(DB_ROOT_VERSION['1.0.0']) audformat.testing.create_audio_files(db) archives = db['files']['speaker'].get().dropna().to_dict() audb.publish( DB_ROOT_VERSION['1.0.0'], '1.0.0', pytest.PUBLISH_REPOSITORY, archives=archives, verbose=False, ) # publish 1.1.0, add table audformat.testing.add_table( db, 'train', audformat.define.IndexType.SEGMENTED, columns={'label': ('scheme', None)} ) db.save(DB_ROOT_VERSION['1.1.0']) audformat.testing.create_audio_files(db) shutil.copy( os.path.join(DB_ROOT_VERSION['1.0.0'], 'db.csv'), os.path.join(DB_ROOT_VERSION['1.1.0'], 'db.csv'), ) audb.publish( DB_ROOT_VERSION['1.1.0'], '1.1.0', pytest.PUBLISH_REPOSITORY, verbose=False, ) # publish 1.1.1, change label db['train'].df['label'][0] = None db.save(DB_ROOT_VERSION['1.1.1']) audformat.testing.create_audio_files(db) shutil.copy( os.path.join(DB_ROOT_VERSION['1.1.0'], 'db.csv'), os.path.join(DB_ROOT_VERSION['1.1.1'], 'db.csv'), ) audb.publish( DB_ROOT_VERSION['1.1.1'], '1.1.1', pytest.PUBLISH_REPOSITORY, verbose=False, ) # publish 2.0.0, alter and remove media db.save(DB_ROOT_VERSION['2.0.0']) audformat.testing.create_audio_files(db) file = os.path.join(DB_ROOT_VERSION['2.0.0'], db.files[0]) y, sr = audiofile.read(file) y[0] = 1 audiofile.write(file, y, sr) file = db.files[-1] db.pick_files(lambda x: x != file) os.remove(audeer.safe_path(os.path.join(DB_ROOT_VERSION['2.0.0'], file))) db.save(DB_ROOT_VERSION['2.0.0']) shutil.copy( os.path.join(DB_ROOT_VERSION['1.1.1'], 'db.csv'), os.path.join(DB_ROOT_VERSION['2.0.0'], 'db.csv'), ) audb.publish( DB_ROOT_VERSION['2.0.0'], '2.0.0', pytest.PUBLISH_REPOSITORY, verbose=False, ) # publish 3.0.0, remove table db.drop_tables('train') db.save(DB_ROOT_VERSION['3.0.0']) audformat.testing.create_audio_files(db) shutil.copy( os.path.join(DB_ROOT_VERSION['2.0.0'], 'db.csv'), os.path.join(DB_ROOT_VERSION['3.0.0'], 'db.csv'), ) audb.publish( DB_ROOT_VERSION['3.0.0'], '3.0.0', pytest.PUBLISH_REPOSITORY, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
def fixture_publish_db(): clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST) # create db db = audformat.testing.create_db(minimal=True) db.name = DB_NAME db.schemes['scheme'] = audformat.Scheme(labels=['some', 'test', 'labels']) audformat.testing.add_table( db, 'test', audformat.define.IndexType.SEGMENTED, columns={'label': ('scheme', None)}, num_files=[0, 1], ) audformat.testing.add_table( db, 'dev', audformat.define.IndexType.SEGMENTED, columns={'label': ('scheme', None)}, num_files=[10, 11], ) audformat.testing.add_table( db, 'train', audformat.define.IndexType.SEGMENTED, columns={'label': ('scheme', None)}, num_files=[20, 21], ) # Add nested folder structure to ensure not all files in an archive # are stored in the same folder mapping = { 'audio/020.wav': 'audio/1/020.wav', 'audio/021.wav': 'audio/2/021.wav', } files = db['train'].df.index.get_level_values('file') starts = db['train'].df.index.get_level_values('start') ends = db['train'].df.index.get_level_values('end') db['train'].df.index = audformat.segmented_index( files=[mapping[f] for f in files], starts=starts, ends=ends, ) db.save(DB_ROOT) audformat.testing.create_audio_files(db) # publish db archives = {} for table in db.tables: archives.update({file: table for file in db[table].files}) audb.publish( DB_ROOT, '1.0.0', pytest.PUBLISH_REPOSITORY, archives=archives, verbose=False, ) yield clear_root(DB_ROOT) clear_root(pytest.FILE_SYSTEM_HOST)
}, 13: { 'gender': 'female', 'age': 32, 'language': 'deu' }, 14: { 'gender': 'female', 'age': 35, 'language': 'deu' }, 15: { 'gender': 'male', 'age': 25, 'language': 'deu' }, 16: { 'gender': 'female', 'age': 31, 'language': 'deu' }, } db.save(build_dir) audb.publish( build_dir, version, repository, previous_version=previous_version, )
import audb DB_ROOT = './build' repository = audb.Repository( name='data-public', host='https://audeering.jfrog.io/artifactory', backend='artifactory', ) audb.publish( DB_ROOT, version='1.1.0', repository=repository, num_workers=1, verbose=True, )
def test_update_database(): version = '2.1.0' start_version = '2.0.0' db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail with missing dependency file previous_version = start_version dep_file = os.path.join( DB_ROOT_VERSION[version], audb.core.define.DEPENDENCIES_FILE, ) os.remove(dep_file) print(audeer.list_file_names(DB_ROOT_VERSION[version])) error_msg = ( f"You want to depend on '{previous_version}' " f"of {DB_NAME}, " f"but you don't have a '{audb.core.define.DEPENDENCIES_FILE}' " f"file present " f"in {DB_ROOT_VERSION[version]}. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version={previous_version}?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Reload data to restore dependency file shutil.rmtree(DB_ROOT_VERSION[version]) db = audb.load_to( DB_ROOT_VERSION[version], DB_NAME, version=start_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Remove one file as in version 3.0.0 remove_file = os.path.join('audio', '001.wav') remove_path = os.path.join(DB_ROOT_VERSION[version], remove_file) os.remove(remove_path) db.drop_files(remove_file) db.save(DB_ROOT_VERSION[version]) # == Fail as 2.0.0 is not the latest version previous_version = 'latest' error_msg = (f"You want to depend on '{audb.latest_version(DB_NAME)}' " f"of {DB_NAME}, " f"but the MD5 sum of your " f"'{audb.core.define.DEPENDENCIES_FILE}' file " f"in {DB_ROOT_VERSION[version]} " f"does not match the MD5 sum of the corresponding file " f"for the requested version in the repository. " f"Did you forgot to call " f"'audb.load_to({DB_ROOT_VERSION[version]}, {DB_NAME}, " f"version='{audb.latest_version(DB_NAME)}') " f"or modified the file manually?") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # == Fail as we require a previous version previous_version = None error_msg = ( f"You did not set a dependency to a previous version, " f"but you have a '{audb.core.define.DEPENDENCIES_FILE}' file present " f"in {DB_ROOT_VERSION[version]}.") with pytest.raises(RuntimeError, match=re.escape(error_msg)): audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) previous_version = start_version deps = audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, previous_version=previous_version, num_workers=pytest.NUM_WORKERS, verbose=False, ) # Check that depencies include previous and actual version only versions = audeer.sort_versions([deps.version(f) for f in deps.files]) assert versions[-1] == version assert versions[0] == previous_version # Check that there is no difference in the database # if published from scratch or from previous version db1 = audb.load( DB_NAME, version=version, full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db2 = audb.load( DB_NAME, version='3.0.0', full_path=False, num_workers=pytest.NUM_WORKERS, verbose=False, ) db1.meta['audb'] = {} db2.meta['audb'] = {} assert db1 == db2
def test_publish(version): db = audformat.Database.load(DB_ROOT_VERSION[version]) print(db.is_portable) print(db.files) if not audb.versions(DB_NAME): with pytest.raises(RuntimeError): audb.latest_version(DB_NAME) archives = db['files']['speaker'].get().dropna().to_dict() deps = audb.publish( DB_ROOT_VERSION[version], version, pytest.PUBLISH_REPOSITORY, archives=archives, previous_version=None, num_workers=pytest.NUM_WORKERS, verbose=False, ) backend = audb.core.utils.lookup_backend(DB_NAME, version) number_of_files = len(set(archives.keys())) number_of_archives = len(set(archives.values())) assert len(deps.files) - len(deps.archives) == (number_of_files - number_of_archives) for archive in set(archives.values()): assert archive in deps.archives db = audb.load( DB_NAME, version=version, full_path=False, num_workers=pytest.NUM_WORKERS, ) assert db.name == DB_NAME versions = audb.versions(DB_NAME) latest_version = audb.latest_version(DB_NAME) assert version in versions assert latest_version == versions[-1] df = audb.available(only_latest=False) assert DB_NAME in df.index assert set(df[df.index == DB_NAME]['version']) == set(versions) df = audb.available(only_latest=True) assert DB_NAME in df.index assert df[df.index == DB_NAME]['version'][0] == latest_version for file in db.files: name = archives[file] if file in archives else file file_path = backend.join(db.name, 'media', name) backend.exists(file_path, version) path = os.path.join(DB_ROOT_VERSION[version], file) assert deps.checksum(file) == audbackend.md5(path) if deps.format(file) in [ audb.core.define.Format.WAV, audb.core.define.Format.FLAC, ]: assert deps.bit_depth(file) == audiofile.bit_depth(path) assert deps.channels(file) == audiofile.channels(path) assert deps.duration(file) == audiofile.duration(path) assert deps.sampling_rate(file) == audiofile.sampling_rate(path)