def create_sample_dataset(temp_dir): ds = audiomate.Corpus(str(temp_dir)) file_1_path = resources.sample_wav_file('wav_1.wav') file_2_path = resources.sample_wav_file('wav_2.wav') file_3_path = resources.get_resource_path( ['audio_formats', 'flac_1_16k_16b.flac']) file_1 = ds.new_file(file_1_path, track_idx='wav_1') file_2 = ds.new_file(file_2_path, track_idx='wav_2') file_3 = ds.new_file(file_3_path, track_idx='wav_3') issuer_1 = Speaker('spk-1', gender=Gender.MALE) issuer_2 = Speaker('spk-2', gender=Gender.FEMALE) issuer_3 = Issuer('spk-3') ds.import_issuers([issuer_1, issuer_2, issuer_3]) # 2.5951875 utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx) utt_2 = ds.new_utterance('utt-2', file_2.idx, issuer_idx=issuer_2.idx, start=0, end=1.5) utt_3 = ds.new_utterance('utt-3', file_2.idx, issuer_idx=issuer_2.idx, start=1.5, end=2.5) # 5.0416875 utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx) utt_1.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who am i')])) utt_2.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are you')])) utt_3.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who is he')])) utt_4.set_label_list( LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT, labels=[Label('who are they')])) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs={'utt-1', 'utt-2', 'utt-3'}) sv_train = subview.Subview(ds, filter_criteria=[train_filter]) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'}) sv_dev = subview.Subview(ds, filter_criteria=[dev_filter]) ds.import_subview('train', sv_train) ds.import_subview('dev', sv_dev) return ds
def test_validate_passes(self): corpus = audiomate.Corpus() corpus.new_file(resources.sample_wav_file('wav_1.wav'), 'wav1') corpus.new_file(resources.sample_wav_file('wav_2.wav'), 'wav2') val = validation.TrackReadValidator() res = val.validate(corpus) assert res.passed
def _load(self, path): corpus = audiomate.Corpus(path=path) for item in glob.glob('{}/*.wav'.format(path), recursive=True): basename, __ = os.path.splitext(os.path.basename(item)) corpus.new_file(item, basename) corpus.new_utterance(basename, basename) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) for folder in os.scandir(path): if folder.is_dir() and not folder.name.startswith('_'): SpeechCommandsReader._load_folder(folder, corpus) SpeechCommandsReader._create_subviews(path, corpus) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) speaker_path = os.path.join(path, 'data', 'speaker_demographics.csv') speakers = FluentSpeechReader.load_speakers(speaker_path) FluentSpeechReader.load_part(path, 'train', corpus, speakers) FluentSpeechReader.load_part(path, 'valid', corpus, speakers) FluentSpeechReader.load_part(path, 'test', corpus, speakers) return corpus
def test_validate_doesnt_pass(self): corpus = audiomate.Corpus() corpus.new_file(resources.sample_wav_file('wav_1.wav'), 'wav1') corpus.new_file(resources.sample_wav_file('invalid_audio.wav'), 'wav2') val = validation.TrackReadValidator() res = val.validate(corpus) assert not res.passed assert len(res.invalid_items) == 1 assert 'wav2' in res.invalid_items
def test_load_with_default_reader_when_reader_unspecified(self): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('default')) assert corpus.name == 'default' assert corpus.path == resources.sample_corpus_path('default') assert corpus.num_files == 4 assert 'file-1' in corpus.files assert 'file-2' in corpus.files assert 'file-3' in corpus.files assert 'file-4' in corpus.files
def _load(self, path): corpus = audiomate.Corpus() article_paths = sorted(self.get_articles(path)) reader_map = {} file_map = {} for article_path in article_paths: audio_files = self.get_audio_file_info(article_path) reader_name, reader_gender = self.get_reader_info(article_path) segments = self.get_segments(article_path) if reader_name not in reader_map.keys(): speaker = issuers.Speaker('{:0>8}'.format(len(reader_map)), gender=reader_gender) reader_map[reader_name] = speaker corpus.import_issuers(speaker) else: speaker = reader_map[reader_name] for start, end, text in segments: file_path = self.find_audio_file_for_segment( start, end, audio_files) if file_path is not None: if file_path not in file_map.keys(): track = tracks.FileTrack( '{:0>10}'.format(len(file_map)), file_path) file_map[file_path] = track corpus.import_tracks(track) else: track = file_map[file_path] track_offset = audio_files[file_path] utt_start = start - track_offset utt_end = end - track_offset utt_idx = '{}_{}_{}_{}'.format(speaker.idx, track.idx, int(start * 1000), int(end * 1000)) if utt_idx not in self.invalid_utterance_ids: utt = corpus.new_utterance(utt_idx, track.idx, issuer_idx=speaker.idx, start=utt_start, end=utt_end) ll = annotations.LabelList.create_single( text, audiomate.corpus.LL_WORD_TRANSCRIPT) utt.set_label_list(ll) return audiomate.Corpus.from_corpus(corpus)
def test_from_corpus_only_utterances_and_files(self): ds = audiomate.Corpus() ds.new_file('/random/path', 'file_1') ds.new_file('/random/path2', 'file_2') ds.new_utterance('utt_1', 'file_1') ds.new_utterance('utt_2', 'file_2') copy = audiomate.Corpus.from_corpus(ds) assert copy.num_files == 2 assert copy.num_utterances == 2 assert copy.num_issuers == 0
def corpus(): corpus = audiomate.Corpus() ex_file = tracks.FileTrack('existing_file', '../any/path.wav') ex_issuer = issuers.Issuer('existing_issuer') ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer) corpus.tracks['existing_file'] = ex_file corpus.issuers['existing_issuer'] = ex_issuer corpus.utterances['existing_utt'] = ex_utterance return corpus
def test_load_with_custom_reader_specified_by_instance(self): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('musan'), reader=MusanReader()) assert corpus.name == 'musan' assert corpus.path == resources.sample_corpus_path('musan') assert corpus.num_files == 5 assert 'music-fma-0000' in corpus.files assert 'noise-free-sound-0000' in corpus.files assert 'noise-free-sound-0001' in corpus.files assert 'speech-librivox-0000' in corpus.files assert 'speech-librivox-0001' in corpus.files
def setUp(self): self.tempdir = tempfile.mkdtemp() self.corpus = audiomate.Corpus(self.tempdir) self.ex_file = assets.File('existing_file', '../any/path.wav') self.ex_issuer = assets.Issuer('existing_issuer') self.ex_utterance = assets.Utterance('existing_utt', self.ex_file, issuer=self.ex_issuer) self.corpus.files['existing_file'] = self.ex_file self.corpus.issuers['existing_issuer'] = self.ex_issuer self.corpus.utterances['existing_utt'] = self.ex_utterance
def test_save_at_path_throws_exception_when_writer_does_not_exist(self): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('default')) assert corpus.name == 'default' assert corpus.path == resources.sample_corpus_path('default') assert corpus.num_files == 4 tempdir_contents = os.listdir(self.tempdir) assert len(tempdir_contents) == 0 with pytest.raises(UnknownWriterException): corpus.save_at(self.tempdir, writer='does_not_exist') assert len(os.listdir(self.tempdir)) == 0
def _load(self, path): wav_file_path = os.path.join(path, WAV_FILE_NAME) spk2gender_path = os.path.join(path, SPK2GENDER_FILE_NAME) utt2spk_path = os.path.join(path, UTT2SPK_FILE_NAME) segments_path = os.path.join(path, SEGMENTS_FILE_NAME) text_path = os.path.join(path, TRANSCRIPTION_FILE_NAME) corpus = audiomate.Corpus(path=path) default.DefaultReader.read_files(wav_file_path, corpus) KaldiReader.read_genders(spk2gender_path, corpus) utt2spk = default.DefaultReader.read_utt_to_issuer_mapping(utt2spk_path, corpus) KaldiReader.read_utterances(segments_path, corpus, utt2spk) KaldiReader.read_transcriptions(text_path, corpus) return corpus
def _load(self, path): file_path = os.path.join(path, FILES_FILE_NAME) issuer_path = os.path.join(path, ISSUER_FILE_NAME) utt_issuer_path = os.path.join(path, UTT_ISSUER_FILE_NAME) utterance_path = os.path.join(path, UTTERANCE_FILE_NAME) feat_path = os.path.join(path, FEAT_CONTAINER_FILE_NAME) corpus = audiomate.Corpus(path=path) default.DefaultReader.read_files(file_path, corpus) default.DefaultReader.read_issuers(issuer_path, corpus) utt_id_to_issuer = default.DefaultReader.read_utt_to_issuer_mapping(utt_issuer_path, corpus) default.DefaultReader.read_utterances(utterance_path, corpus, utt_id_to_issuer) BroadcastReader.read_labels(path, corpus) default.DefaultReader.read_feature_containers(feat_path, corpus) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) for part in SUBSETS: sub_path = os.path.join(path, part) ids = TudaReader.get_ids_from_folder(sub_path, part) for idx in ids: TudaReader.load_file(sub_path, idx, corpus) subview_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=ids) subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter]) corpus.import_subview(part, subview_corpus) return corpus
def test_save_at_corpus_path_throws_exception_when_writer_does_not_exist( self, tmpdir): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('default')) assert corpus.name == 'default' assert corpus.path == resources.sample_corpus_path('default') assert corpus.num_tracks == 6 tempdir_contents = os.listdir(tmpdir.strpath) assert len(tempdir_contents) == 0 corpus.path = tmpdir.strpath with pytest.raises(UnknownWriterException): corpus.save(writer='does_not_exist') assert len(os.listdir(tmpdir.strpath)) == 0
def _load(self, path): create_or_get_issuer = { 'music': self._create_or_get_music_issuer, 'noise': self._create_or_get_noise_issuer, 'speech': self._create_or_get_speech_issuer, } corpus = audiomate.Corpus(path=path) for type_name, type_directory in self._directories(path).items(): for _, source_directory in self._directories( type_directory).items(): annotations_path = os.path.join(source_directory, ANN_FILE_NAME_) annotations = {} if os.path.exists(annotations_path): annotations = textfile.read_separated_lines_with_first_key( annotations_path, separator=' ', max_columns=ANN_NUM_COLUMS_[type_name]) it = os.scandir(source_directory) for entry in it: if not entry.name.endswith('.wav'): continue file_path = os.path.join(source_directory, entry.name) file_idx = entry.name[0:-4] # chop of .wav utterance_idx = file_idx # every file is a separate utterance issuer_idx = create_or_get_issuer[type_name](corpus, file_idx, annotations) corpus.new_file(file_path, file_idx=file_idx, copy_file=False) utterance = corpus.new_utterance(utterance_idx, file_idx, issuer_idx) utterance.set_label_list( assets.LabelList.create_single( type_name, idx=audiomate.corpus.LL_DOMAIN)) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) speaker_info_path = os.path.join(path, 'SPEAKERS.TXT') speakers = LibriSpeechReader.load_speakers(speaker_info_path) sf = LibriSpeechReader.available_subfolders for subset_idx, subset_path in sf(path, SUBSETS.keys()).items(): subset_utt_ids = set() for speaker_idx, speaker_path in sf(subset_path).items(): corpus.import_issuers(speakers[speaker_idx]) for chapter_idx, chapter_path in sf(speaker_path).items(): transcript_path = os.path.join( chapter_path, '{}-{}.trans.txt'.format(speaker_idx, chapter_idx) ) transcripts = LibriSpeechReader.load_transcripts(transcript_path) for utt_idx, transcript in transcripts.items(): file_path = os.path.join(chapter_path, '{}.flac'.format(utt_idx)) corpus.new_file(file_path, utt_idx) utterance = corpus.new_utterance( utt_idx, utt_idx, speaker_idx ) utterance.set_label_list( annotations.LabelList.create_single( transcript, idx=audiomate.corpus.LL_WORD_TRANSCRIPT ) ) subset_utt_ids.add(utt_idx) filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids)) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(subset_idx, subview) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) test_folder = os.path.join(path, 'test') train_folder = os.path.join(path, 'train') test_utterance_ids = AEDReader.load_folder(test_folder, corpus) train_utterance_ids = AEDReader.load_folder(train_folder, corpus) test_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=test_utterance_ids) train_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=train_utterance_ids) test_subset = subset.Subview(corpus, filter_criteria=[test_filter]) train_subset = subset.Subview(corpus, filter_criteria=[train_filter]) corpus.import_subview('test', test_subset) corpus.import_subview('train', train_subset) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) for dir_path in sorted(VoxforgeReader.data_folders(path)): item = os.path.basename(dir_path) etc_folder = os.path.join(dir_path, 'etc') wav_folder = os.path.join(dir_path, 'wav') readme_path = os.path.join(etc_folder, 'README') # LOAD ISSUER issuer = VoxforgeReader.parse_speaker_info(readme_path) if issuer.idx is None or issuer.idx == 'anonymous': issuer.idx = item # LOAD TRANSCRIPTIONS prompts, prompts_orig = VoxforgeReader.parse_prompts(etc_folder) # LOAD FILES/UTTS for file_name in os.listdir(wav_folder): wav_path = os.path.join(wav_folder, file_name) basename, ext = os.path.splitext(file_name) idx = '{}-{}'.format(item, basename) is_valid_wav = os.path.isfile(wav_path) and ext == '.wav' \ and idx not in self.invalid_utterance_ids has_transcription = basename in prompts.keys() if is_valid_wav and has_transcription: if issuer.idx not in corpus.issuers.keys(): corpus.import_issuers([issuer]) corpus.new_file(wav_path, idx) utt = corpus.new_utterance(idx, idx, issuer.idx) utt.set_label_list(annotations.LabelList.create_single(prompts[basename], idx=audiomate.corpus.LL_WORD_TRANSCRIPT)) if basename in prompts_orig.keys(): raw = annotations.LabelList.create_single(prompts_orig[basename], idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW) utt.set_label_list(raw) return corpus
def _load(self, path): file_path = os.path.join(path, FILES_FILE_NAME) audio_path = os.path.join(path, AUDIO_CONTAINER_FILE_NAME) issuer_path = os.path.join(path, ISSUER_FILE_NAME) utt_issuer_path = os.path.join(path, UTT_ISSUER_FILE_NAME) utterance_path = os.path.join(path, UTTERANCE_FILE_NAME) feat_path = os.path.join(path, FEAT_CONTAINER_FILE_NAME) corpus = audiomate.Corpus(path=path) DefaultReader.read_files(file_path, corpus) DefaultReader.read_tracks_from_audio_containers(audio_path, corpus) DefaultReader.read_issuers(issuer_path, corpus) utt_id_to_issuer = DefaultReader.read_utt_to_issuer_mapping(utt_issuer_path, corpus) DefaultReader.read_utterances(utterance_path, corpus, utt_id_to_issuer) DefaultReader.read_labels(path, corpus) DefaultReader.read_feature_containers(feat_path, corpus) DefaultReader.read_subviews(path, corpus) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) regex = re.compile(LABEL_PATTERN) for file_name in os.listdir(path): base_name, ext = os.path.splitext(file_name) if ext == '.wav': file_path = os.path.join(path, file_name) match = regex.match(base_name) label = match.group(1) corpus.new_file(file_path, base_name) utt = corpus.new_utterance(base_name, base_name) ll = annotations.LabelList.create_single( label, idx=audiomate.corpus.LL_SOUND_CLASS) utt.set_label_list(ll) return corpus
def prepare(): rand = random.Random(x=234) target_corpus = audiomate.Corpus() issuers = resources.generate_issuers(1000, rand=rand) target_corpus.import_issuers(issuers) tracks = resources.generate_tracks(1000, rand=rand) target_corpus.import_tracks(tracks) utterances = [] for issuer, track in zip(issuers, tracks): utts = resources.generate_utterances(track, issuer, 10, (3, 3), (3, 3), rand=rand) utterances.extend(utts) return target_corpus, utterances
def _load(self, path): corpus = audiomate.Corpus(path=path) for part in SUBSETS: sub_path = os.path.join(path, part) ids = TudaReader.get_ids_from_folder(sub_path, part) utt_ids = [] for idx in ids: add_ids = TudaReader.load_file(sub_path, idx, corpus) utt_ids.extend(add_ids) subview_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=utt_ids) subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter]) corpus.import_subview(part, subview_corpus) TudaReader.create_wav_type_subviews(corpus, utt_ids, prefix='{}_'.format(part)) TudaReader.create_wav_type_subviews(corpus, corpus.utterances.keys()) return corpus
def test_save_at_path_with_default_writer_when_writer_unspecified(self): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('default')) assert corpus.name == 'default' assert corpus.path == resources.sample_corpus_path('default') assert corpus.num_files == 4 tempdir_contents = os.listdir(self.tempdir) assert len(tempdir_contents) == 0 corpus.save_at(self.tempdir) tempdir_contents = os.listdir(self.tempdir) assert len(tempdir_contents) == 9 assert 'files.txt' in tempdir_contents assert 'issuers.json' in tempdir_contents assert 'labels_raw_text.txt' in tempdir_contents assert 'labels_text.txt' in tempdir_contents assert 'utt_issuers.txt' in tempdir_contents assert 'utterances.txt' in tempdir_contents
def _load(self, path): corpus = audiomate.Corpus(path=path) meta_data = ESC50Reader.load_meta_data(path) folds = collections.defaultdict(list) esc10_utt_ids = [] for record in meta_data: file_name = record[0] file_id = os.path.splitext(file_name)[0] file_path = os.path.abspath(os.path.join(path, 'audio', file_name)) fold = record[1] category = record[3] esc10 = record[4] corpus.new_file(file_path, file_id) utt = corpus.new_utterance(file_id, file_id) utt.set_label_list( assets.LabelList.create_single( category, idx=audiomate.corpus.LL_SOUND_CLASS)) folds['fold-{}'.format(fold)].append(file_id) if esc10 == 'True': esc10_utt_ids.append(file_id) for fold_id, fold_utt_ids in folds.items(): fold_filter = subset.MatchingUtteranceIdxFilter( utterance_idxs=set(fold_utt_ids)) fold_sv = subset.Subview(corpus, filter_criteria=[fold_filter]) corpus.import_subview(fold_id, fold_sv) esc10_filter = subset.MatchingUtteranceIdxFilter( utterance_idxs=set(esc10_utt_ids)) esc10_sv = subset.Subview(corpus, filter_criteria=[esc10_filter]) corpus.import_subview('esc-10', esc10_sv) return corpus
def _load(self, path): corpus = audiomate.Corpus(path=path) for file_path in glob.glob(os.path.join(path, 'recordings', '*.wav')): file_idx = os.path.splitext(os.path.basename(file_path))[0] corpus.new_file(file_path, file_idx) idx_parts = file_idx.split('_') digit = idx_parts[0] issuer_idx = '_'.join(idx_parts[1:-1]) if issuer_idx not in corpus.issuers.keys(): issuer = issuers.Speaker(issuer_idx) corpus.import_issuers(issuer) utterance = corpus.new_utterance(file_idx, file_idx, issuer_idx) utterance.set_label_list( annotations.LabelList.create_single( str(digit), idx=audiomate.corpus.LL_WORD_TRANSCRIPT)) return corpus
def test_save_at_path_with_writer_specified_by_instance(self, tmpdir): corpus = audiomate.Corpus() corpus = corpus.load(resources.sample_corpus_path('kaldi'), reader='kaldi') assert corpus.name == 'kaldi' assert corpus.path == resources.sample_corpus_path('kaldi') assert corpus.path != tmpdir.strpath assert corpus.num_tracks == 4 tempdir_contents = os.listdir(tmpdir.strpath) assert len(tempdir_contents) == 0 corpus.save_at(tmpdir.strpath, writer=KaldiWriter()) assert corpus.path == tmpdir.strpath tempdir_contents = os.listdir(tmpdir.strpath) assert len(tempdir_contents) == 4 assert 'segments' in tempdir_contents assert 'text' in tempdir_contents assert 'utt2spk' in tempdir_contents assert 'wav.scp' in tempdir_contents
def _load(self, path): corpus = audiomate.Corpus(path=path) data_path = os.path.join(path, 'data') meta_data = AudioMNISTReader.load_speaker_meta(path) for speaker_idx in os.listdir(data_path): speaker_path = os.path.join(data_path, speaker_idx) if os.path.isdir(speaker_path): for file_path in glob.glob(os.path.join(speaker_path, '*.wav')): file_idx = os.path.splitext(os.path.basename(file_path))[0] corpus.new_file(file_path, file_idx) idx_parts = file_idx.split('_') digit = idx_parts[0] if speaker_idx not in corpus.issuers.keys(): issuer = issuers.Speaker( speaker_idx, gender=AudioMNISTReader.get_gender( meta_data, speaker_idx), age_group=AudioMNISTReader.get_age_group( meta_data, speaker_idx)) corpus.import_issuers(issuer) utterance = corpus.new_utterance(file_idx, file_idx, speaker_idx) utterance.set_label_list( annotations.LabelList.create_single( str(digit), idx=audiomate.corpus.LL_WORD_TRANSCRIPT)) return corpus