def _create_subviews(path, corpus): """ Load the subviews based on testing_list.txt and validation_list.txt """ test_list_path = os.path.join(path, 'testing_list.txt') dev_list_path = os.path.join(path, 'validation_list.txt') test_list = textfile.read_separated_lines(test_list_path, separator='/', max_columns=2) dev_list = textfile.read_separated_lines(dev_list_path, separator='/', max_columns=2) test_set = set([ '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in test_list ]) dev_set = set([ '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in dev_list ]) inv_train_set = test_set.union(dev_set) train_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=inv_train_set, inverse=True) train_view = subview.Subview(corpus, filter_criteria=train_filter) corpus.import_subview('train', train_view) dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=dev_set, inverse=False) dev_view = subview.Subview(corpus, filter_criteria=dev_filter) corpus.import_subview('dev', dev_view) test_filter = subview.MatchingUtteranceIdxFilter( utterance_idxs=test_set, inverse=False) test_view = subview.Subview(corpus, filter_criteria=test_filter) corpus.import_subview('test', test_view)
def test_write_utt2spk(self, writer, tmpdir): ds = resources.create_dataset() # Add utt without issuer # so in utt2spk it ends up with "utt-idx utt-idx" ds.new_file('/random/path', 'wav-33') ds.new_utterance('utt-23', 'wav-33') path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'utt2spk'), separator=' ', max_columns=2) assert content[0][0] == 'spk-1-utt-1' assert content[0][1] == 'spk-1' assert content[1][0] == 'spk-1-utt-2' assert content[1][1] == 'spk-1' assert content[2][0] == 'spk-2-utt-3' assert content[2][1] == 'spk-2' assert content[3][0] == 'spk-2-utt-4' assert content[3][1] == 'spk-2' assert content[4][0] == 'spk-3-utt-5' assert content[4][1] == 'spk-3' assert content[5][0] == 'utt-23' assert content[5][1] == 'utt-23'
def _load(self, path): corpus = audiomate.Corpus(path=path) meta_file_path = os.path.join(path, 'metadata', 'UrbanSound8K.csv') meta_data = textfile.read_separated_lines(meta_file_path, separator=',', max_columns=8)[1:] folds = collections.defaultdict(set) for record in meta_data: file_name = record[0] fold = record[5] label = record[7] file_path = os.path.join(path, 'audio', 'fold{}'.format(fold), file_name) if os.path.isfile(file_path): basename = os.path.splitext(file_name)[0] corpus.new_file(file_path, basename) utt = corpus.new_utterance(basename, basename) utt.set_label_list( annotations.LabelList.create_single( label, idx=audiomate.corpus.LL_SOUND_CLASS)) folds['fold{}'.format(fold)].add(basename) for fold_idx, fold_utterance_ids in folds.items(): utt_filter = subset.MatchingUtteranceIdxFilter( utterance_idxs=fold_utterance_ids) subview = subset.Subview(corpus, filter_criteria=[utt_filter]) corpus.import_subview(fold_idx, subview) return corpus
def read_labels(path, corpus): label_reference_file = os.path.join(path, LABEL_FILE) label_references = textfile.read_separated_lines(label_reference_file, separator=' ', max_columns=3) for record in label_references: utt_idx = record[0] label_path = os.path.join(path, record[1]) label_idx = None if len(record) > 2: label_idx = record[2] ll = annotations.LabelList(idx=label_idx) for label in audacity.read_label_file(label_path): start = label[0] end = label[1] value = label[2] if end < 0: end = float('inf') ll.addl(value, start, end) ll.apply(extract_meta_from_label_value) corpus.utterances[utt_idx].set_label_list(ll)
def test_write_segments(self, writer, tmpdir): ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == -1 assert content[1][0] == 'utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == -1 assert content[2][0] == 'utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == -1
def test_save_subset_dev(self, writer, tmpdir): ds = resources.create_dataset() writer.save(ds, tmpdir.strpath) all_path = os.path.join(tmpdir.strpath, 'dev.csv') assert os.path.isfile(all_path) records = textfile.read_separated_lines(all_path, separator=',') assert len(records) == 3 # HEADER assert len(records[0]) == 3 assert records[0][1] == 'wav_filesize' assert records[0][2] == 'transcript' # DATA RECORDS utts = {r[0]: (r[1], r[2]) for r in records[1:]} path = os.path.join(tmpdir.strpath, 'audio', 'utt-4.wav') assert len(utts[path]) == 2 assert utts[path][0] == '32044' assert utts[path][1] == ds.utterances['utt-4'].label_lists[ corpus.LL_WORD_TRANSCRIPT].labels[0].value path = ds.utterances['utt-5'].track.path assert len(utts[path]) == 2 assert utts[path][0] == '83090' assert utts[path][1] == ds.utterances['utt-5'].label_lists[ corpus.LL_WORD_TRANSCRIPT].labels[0].value
def test_write_segments_no_speaker_prefix(self, tmpdir): writer = io.KaldiWriter(prefix_utterances_with_speaker=False) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == -1 assert content[1][0] == 'utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == -1 assert content[2][0] == 'utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == -1
def test_write_segments_absolute_times(self, writer, tmpdir): writer = io.KaldiWriter(use_absolute_times=True) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'segments'), separator=' ', max_columns=4) assert content[0][0] == 'spk-1-utt-1' assert content[0][1] == 'wav-1' assert float(content[0][2]) == 0 assert float(content[0][3]) == pytest.approx(2.5951875) assert content[1][0] == 'spk-1-utt-2' assert content[1][1] == 'wav_2' assert float(content[1][2]) == 0 assert float(content[1][3]) == pytest.approx(2.5951875) assert content[2][0] == 'spk-2-utt-3' assert content[2][1] == 'wav_3' assert float(content[2][2]) == pytest.approx(0) assert float(content[2][3]) == pytest.approx(1.5) assert content[3][0] == 'spk-2-utt-4' assert content[3][1] == 'wav_3' assert float(content[3][2]) == pytest.approx(1.5) assert float(content[3][3]) == pytest.approx(2.5) assert content[4][0] == 'spk-3-utt-5' assert content[4][1] == 'wav_4' assert float(content[4][2]) == 0 assert float(content[4][3]) == pytest.approx(2.5951875)
def test_read_separated_lines(self): file_path = os.path.join(os.path.dirname(__file__), 'multi_column_file.txt') expected = [['a', '1', 'x'], ['b', '2', 'y'], ['c', '3', 'z']] records = textfile.read_separated_lines(file_path, separator='\t') self.assertListEqual(expected, records)
def test_read_separated_keeping_empty_columns(): file_path = os.path.join(os.path.dirname(__file__), 'empty_column_file.txt') expected = [ ['a', '1', 'x', '', ''], ['b', '2', '', 'y'], ['c', '', '3', 'z'], ] records = textfile.read_separated_lines(file_path, separator='\t', keep_empty=True) assert expected == records
def load_books_of_speaker(self, corpus, path, speaker): """ Load all utterances for the speaker at the given path. """ utt_ids = [] for book_path in MailabsReader.get_folders(path): meta_path = os.path.join(book_path, 'metadata.csv') wavs_path = os.path.join(book_path, 'wavs') meta = textfile.read_separated_lines(meta_path, separator='|', max_columns=3) for entry in meta: file_basename = entry[0] transcription_raw = entry[1] transcription_clean = entry[2] if speaker is None: idx = file_basename utt_speaker = issuers.Speaker(idx) speaker_idx = idx corpus.import_issuers(utt_speaker) else: idx = '{}-{}'.format(speaker.idx, file_basename) speaker_idx = speaker.idx wav_name = '{}.wav'.format(file_basename) wav_path = os.path.join(wavs_path, wav_name) if os.path.isfile( wav_path) and idx not in self.invalid_utterance_ids: corpus.new_file(wav_path, idx) ll_raw = annotations.LabelList.create_single( transcription_raw, idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW) ll_clean = annotations.LabelList.create_single( transcription_clean, idx=audiomate.corpus.LL_WORD_TRANSCRIPT) utterance = corpus.new_utterance(idx, idx, speaker_idx) utterance.set_label_list(ll_raw) utterance.set_label_list(ll_clean) utt_ids.append(utterance.idx) return utt_ids
def read_labels(path, corpus): label_reference_file = os.path.join(path, LABEL_FILE) label_references = textfile.read_separated_lines(label_reference_file, separator=' ', max_columns=3) for record in label_references: utt_idx = record[0] label_path = os.path.join(path, record[1]) label_idx = None if len(record) > 2: label_idx = record[2] ll = audacity.read_label_list(label_path) ll.idx = label_idx ll.apply(extract_meta_from_label_value) corpus.utterances[utt_idx].set_label_list(ll)
def test_write_wav_scp(self, writer, tmpdir): ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) content = textfile.read_separated_lines(os.path.join(path, 'wav.scp'), separator=' ', max_columns=2) wav_base = resources.get_resource_path(['wav_files']) wav_base = os.path.abspath(wav_base) assert content[0][0] == 'wav-1' assert content[0][1] == os.path.join(wav_base, 'wav_1.wav') assert content[1][0] == 'wav_2' assert content[1][1] == os.path.join(wav_base, 'wav_2.wav') assert content[2][0] == 'wav_3' assert content[2][1] == os.path.join(wav_base, 'wav_3.wav') assert content[3][0] == 'wav_4' assert content[3][1] == os.path.join(wav_base, 'wav_4.wav')
def test_save_spk2gender(self, writer, tmpdir): writer = io.KaldiWriter(create_spk2gender=True) ds = resources.create_dataset() path = tmpdir.strpath writer.save(ds, path) assert 'spk2gender' in os.listdir(path) content = textfile.read_separated_lines(os.path.join( path, 'spk2gender'), separator=' ', max_columns=2) assert content[0][0] == 'spk-1' assert content[0][1] == 'm' assert content[1][0] == 'spk-2' assert content[1][1] == 'f' assert content[2][0] == 'spk-3' assert content[2][1] == 'm'
def read_tracks_from_audio_containers(audio_path, corpus): if os.path.isfile(audio_path): base_path = os.path.dirname(audio_path) audio_tracks = textfile.read_separated_lines(audio_path, separator=' ', max_columns=3) audio_containers = {} for entry in audio_tracks: track_idx = entry[0] container_path = entry[1] key = entry[2] if container_path in audio_containers.keys(): container = audio_containers[key] else: abs_path = os.path.abspath(os.path.join(base_path, container_path)) container = containers.AudioContainer(abs_path) track = tracks.ContainerTrack(track_idx, container, key) corpus.import_tracks(track)
def _load(self, path): corpus = audiomate.Corpus(path=path) for part in ['TEST', 'TRAIN']: part_path = os.path.join(path, part) part_utt_ids = set() for region in os.listdir(part_path): region_path = os.path.join(part_path, region) if os.path.isdir(region_path): for speaker_abbr in os.listdir(region_path): speaker_path = os.path.join(region_path, speaker_abbr) speaker_idx = speaker_abbr[1:] if speaker_idx not in corpus.issuers.keys(): issuer = assets.Speaker(speaker_idx) if speaker_abbr[:1] == 'M': issuer.gender = assets.Gender.MALE elif speaker_abbr[:1] == 'F': issuer.gender = assets.Gender.FEMALE corpus.import_issuers(issuer) for wav_path in glob.glob( os.path.join(speaker_path, '*.WAV')): sentence_idx = os.path.splitext( os.path.basename(wav_path))[0] utt_idx = '{}-{}-{}'.format( region, speaker_abbr, sentence_idx).lower() part_utt_ids.add(utt_idx) raw_text_path = os.path.join( speaker_path, '{}.TXT'.format(sentence_idx)) raw_text = textfile.read_separated_lines( raw_text_path, separator=' ', max_columns=3)[0][2] words_path = os.path.join( speaker_path, '{}.WRD'.format(sentence_idx)) words = textfile.read_separated_lines( words_path, separator=' ', max_columns=3) phones_path = os.path.join( speaker_path, '{}.PHN'.format(sentence_idx)) phones = textfile.read_separated_lines( phones_path, separator=' ', max_columns=3) corpus.new_file(wav_path, utt_idx) utt = corpus.new_utterance(utt_idx, utt_idx, speaker_idx) raw_ll = assets.LabelList.create_single( raw_text, idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW) utt.set_label_list(raw_ll) word_ll = assets.LabelList( idx=audiomate.corpus.LL_WORD_TRANSCRIPT) for record in words: start = int(record[0]) / 16000 end = int(record[1]) / 16000 word_ll.append( assets.Label(record[2], start=start, end=end)) utt.set_label_list(word_ll) phone_ll = assets.LabelList( idx=audiomate.corpus.LL_PHONE_TRANSCRIPT) for record in phones: start = int(record[0]) / 16000 end = int(record[1]) / 16000 phone_ll.append( assets.Label(record[2], start=start, end=end)) utt.set_label_list(phone_ll) filter = subset.MatchingUtteranceIdxFilter( utterance_idxs=part_utt_ids) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(part, subview) return corpus
def load_meta_data(path): file_path = os.path.join(path, META_FILE_PATH) lines = textfile.read_separated_lines(file_path, separator=',') return lines[1:]