Пример #1
0
    def _create_subviews(path, corpus):
        """ Load the subviews based on testing_list.txt and validation_list.txt """
        test_list_path = os.path.join(path, 'testing_list.txt')
        dev_list_path = os.path.join(path, 'validation_list.txt')

        test_list = textfile.read_separated_lines(test_list_path,
                                                  separator='/',
                                                  max_columns=2)
        dev_list = textfile.read_separated_lines(dev_list_path,
                                                 separator='/',
                                                 max_columns=2)

        test_set = set([
            '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in test_list
        ])
        dev_set = set([
            '{}_{}'.format(os.path.splitext(x[1])[0], x[0]) for x in dev_list
        ])
        inv_train_set = test_set.union(dev_set)

        train_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs=inv_train_set, inverse=True)
        train_view = subview.Subview(corpus, filter_criteria=train_filter)
        corpus.import_subview('train', train_view)

        dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=dev_set,
                                                        inverse=False)
        dev_view = subview.Subview(corpus, filter_criteria=dev_filter)
        corpus.import_subview('dev', dev_view)

        test_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs=test_set, inverse=False)
        test_view = subview.Subview(corpus, filter_criteria=test_filter)
        corpus.import_subview('test', test_view)
Пример #2
0
    def test_write_utt2spk(self, writer, tmpdir):
        ds = resources.create_dataset()

        # Add utt without issuer
        # so in utt2spk it ends up with "utt-idx utt-idx"
        ds.new_file('/random/path', 'wav-33')
        ds.new_utterance('utt-23', 'wav-33')

        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'utt2spk'),
                                                separator=' ',
                                                max_columns=2)

        assert content[0][0] == 'spk-1-utt-1'
        assert content[0][1] == 'spk-1'

        assert content[1][0] == 'spk-1-utt-2'
        assert content[1][1] == 'spk-1'

        assert content[2][0] == 'spk-2-utt-3'
        assert content[2][1] == 'spk-2'

        assert content[3][0] == 'spk-2-utt-4'
        assert content[3][1] == 'spk-2'

        assert content[4][0] == 'spk-3-utt-5'
        assert content[4][1] == 'spk-3'

        assert content[5][0] == 'utt-23'
        assert content[5][1] == 'utt-23'
Пример #3
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_file_path = os.path.join(path, 'metadata', 'UrbanSound8K.csv')
        meta_data = textfile.read_separated_lines(meta_file_path,
                                                  separator=',',
                                                  max_columns=8)[1:]

        folds = collections.defaultdict(set)

        for record in meta_data:
            file_name = record[0]
            fold = record[5]
            label = record[7]

            file_path = os.path.join(path, 'audio', 'fold{}'.format(fold),
                                     file_name)
            if os.path.isfile(file_path):
                basename = os.path.splitext(file_name)[0]

                corpus.new_file(file_path, basename)
                utt = corpus.new_utterance(basename, basename)
                utt.set_label_list(
                    annotations.LabelList.create_single(
                        label, idx=audiomate.corpus.LL_SOUND_CLASS))
                folds['fold{}'.format(fold)].add(basename)

        for fold_idx, fold_utterance_ids in folds.items():
            utt_filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=fold_utterance_ids)
            subview = subset.Subview(corpus, filter_criteria=[utt_filter])

            corpus.import_subview(fold_idx, subview)

        return corpus
Пример #4
0
    def read_labels(path, corpus):
        label_reference_file = os.path.join(path, LABEL_FILE)
        label_references = textfile.read_separated_lines(label_reference_file,
                                                         separator=' ',
                                                         max_columns=3)

        for record in label_references:
            utt_idx = record[0]
            label_path = os.path.join(path, record[1])
            label_idx = None

            if len(record) > 2:
                label_idx = record[2]

            ll = annotations.LabelList(idx=label_idx)

            for label in audacity.read_label_file(label_path):
                start = label[0]
                end = label[1]
                value = label[2]

                if end < 0:
                    end = float('inf')

                ll.addl(value, start, end)

            ll.apply(extract_meta_from_label_value)
            corpus.utterances[utt_idx].set_label_list(ll)
Пример #5
0
    def test_write_segments(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == -1

        assert content[1][0] == 'utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == -1

        assert content[2][0] == 'utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == -1
Пример #6
0
    def test_save_subset_dev(self, writer, tmpdir):
        ds = resources.create_dataset()
        writer.save(ds, tmpdir.strpath)

        all_path = os.path.join(tmpdir.strpath, 'dev.csv')

        assert os.path.isfile(all_path)

        records = textfile.read_separated_lines(all_path, separator=',')

        assert len(records) == 3

        # HEADER
        assert len(records[0]) == 3
        assert records[0][1] == 'wav_filesize'
        assert records[0][2] == 'transcript'

        # DATA RECORDS
        utts = {r[0]: (r[1], r[2]) for r in records[1:]}

        path = os.path.join(tmpdir.strpath, 'audio', 'utt-4.wav')
        assert len(utts[path]) == 2
        assert utts[path][0] == '32044'
        assert utts[path][1] == ds.utterances['utt-4'].label_lists[
            corpus.LL_WORD_TRANSCRIPT].labels[0].value

        path = ds.utterances['utt-5'].track.path
        assert len(utts[path]) == 2
        assert utts[path][0] == '83090'
        assert utts[path][1] == ds.utterances['utt-5'].label_lists[
            corpus.LL_WORD_TRANSCRIPT].labels[0].value
Пример #7
0
    def test_write_segments_no_speaker_prefix(self, tmpdir):
        writer = io.KaldiWriter(prefix_utterances_with_speaker=False)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == -1

        assert content[1][0] == 'utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == -1

        assert content[2][0] == 'utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == -1
Пример #8
0
    def test_write_segments_absolute_times(self, writer, tmpdir):
        writer = io.KaldiWriter(use_absolute_times=True)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'segments'),
                                                separator=' ',
                                                max_columns=4)

        assert content[0][0] == 'spk-1-utt-1'
        assert content[0][1] == 'wav-1'
        assert float(content[0][2]) == 0
        assert float(content[0][3]) == pytest.approx(2.5951875)

        assert content[1][0] == 'spk-1-utt-2'
        assert content[1][1] == 'wav_2'
        assert float(content[1][2]) == 0
        assert float(content[1][3]) == pytest.approx(2.5951875)

        assert content[2][0] == 'spk-2-utt-3'
        assert content[2][1] == 'wav_3'
        assert float(content[2][2]) == pytest.approx(0)
        assert float(content[2][3]) == pytest.approx(1.5)

        assert content[3][0] == 'spk-2-utt-4'
        assert content[3][1] == 'wav_3'
        assert float(content[3][2]) == pytest.approx(1.5)
        assert float(content[3][3]) == pytest.approx(2.5)

        assert content[4][0] == 'spk-3-utt-5'
        assert content[4][1] == 'wav_4'
        assert float(content[4][2]) == 0
        assert float(content[4][3]) == pytest.approx(2.5951875)
Пример #9
0
    def test_read_separated_lines(self):
        file_path = os.path.join(os.path.dirname(__file__),
                                 'multi_column_file.txt')

        expected = [['a', '1', 'x'], ['b', '2', 'y'], ['c', '3', 'z']]

        records = textfile.read_separated_lines(file_path, separator='\t')

        self.assertListEqual(expected, records)
Пример #10
0
def test_read_separated_keeping_empty_columns():
    file_path = os.path.join(os.path.dirname(__file__), 'empty_column_file.txt')

    expected = [
        ['a', '1', 'x', '', ''],
        ['b', '2', '', 'y'],
        ['c', '', '3', 'z'],
    ]

    records = textfile.read_separated_lines(file_path, separator='\t', keep_empty=True)

    assert expected == records
Пример #11
0
    def load_books_of_speaker(self, corpus, path, speaker):
        """
        Load all utterances for the speaker at the given path.
        """
        utt_ids = []

        for book_path in MailabsReader.get_folders(path):
            meta_path = os.path.join(book_path, 'metadata.csv')
            wavs_path = os.path.join(book_path, 'wavs')

            meta = textfile.read_separated_lines(meta_path,
                                                 separator='|',
                                                 max_columns=3)

            for entry in meta:
                file_basename = entry[0]
                transcription_raw = entry[1]
                transcription_clean = entry[2]

                if speaker is None:
                    idx = file_basename
                    utt_speaker = issuers.Speaker(idx)
                    speaker_idx = idx
                    corpus.import_issuers(utt_speaker)
                else:
                    idx = '{}-{}'.format(speaker.idx, file_basename)
                    speaker_idx = speaker.idx

                wav_name = '{}.wav'.format(file_basename)
                wav_path = os.path.join(wavs_path, wav_name)

                if os.path.isfile(
                        wav_path) and idx not in self.invalid_utterance_ids:
                    corpus.new_file(wav_path, idx)

                    ll_raw = annotations.LabelList.create_single(
                        transcription_raw,
                        idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)

                    ll_clean = annotations.LabelList.create_single(
                        transcription_clean,
                        idx=audiomate.corpus.LL_WORD_TRANSCRIPT)

                    utterance = corpus.new_utterance(idx, idx, speaker_idx)
                    utterance.set_label_list(ll_raw)
                    utterance.set_label_list(ll_clean)

                    utt_ids.append(utterance.idx)

        return utt_ids
Пример #12
0
    def read_labels(path, corpus):
        label_reference_file = os.path.join(path, LABEL_FILE)
        label_references = textfile.read_separated_lines(label_reference_file, separator=' ', max_columns=3)

        for record in label_references:
            utt_idx = record[0]
            label_path = os.path.join(path, record[1])
            label_idx = None

            if len(record) > 2:
                label_idx = record[2]

            ll = audacity.read_label_list(label_path)
            ll.idx = label_idx
            ll.apply(extract_meta_from_label_value)

            corpus.utterances[utt_idx].set_label_list(ll)
Пример #13
0
    def test_write_wav_scp(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'wav.scp'),
                                                separator=' ',
                                                max_columns=2)

        wav_base = resources.get_resource_path(['wav_files'])
        wav_base = os.path.abspath(wav_base)

        assert content[0][0] == 'wav-1'
        assert content[0][1] == os.path.join(wav_base, 'wav_1.wav')
        assert content[1][0] == 'wav_2'
        assert content[1][1] == os.path.join(wav_base, 'wav_2.wav')
        assert content[2][0] == 'wav_3'
        assert content[2][1] == os.path.join(wav_base, 'wav_3.wav')
        assert content[3][0] == 'wav_4'
        assert content[3][1] == os.path.join(wav_base, 'wav_4.wav')
Пример #14
0
    def test_save_spk2gender(self, writer, tmpdir):
        writer = io.KaldiWriter(create_spk2gender=True)
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        assert 'spk2gender' in os.listdir(path)

        content = textfile.read_separated_lines(os.path.join(
            path, 'spk2gender'),
                                                separator=' ',
                                                max_columns=2)

        assert content[0][0] == 'spk-1'
        assert content[0][1] == 'm'

        assert content[1][0] == 'spk-2'
        assert content[1][1] == 'f'

        assert content[2][0] == 'spk-3'
        assert content[2][1] == 'm'
Пример #15
0
    def read_tracks_from_audio_containers(audio_path, corpus):
        if os.path.isfile(audio_path):
            base_path = os.path.dirname(audio_path)
            audio_tracks = textfile.read_separated_lines(audio_path,
                                                         separator=' ',
                                                         max_columns=3)

            audio_containers = {}

            for entry in audio_tracks:
                track_idx = entry[0]
                container_path = entry[1]
                key = entry[2]

                if container_path in audio_containers.keys():
                    container = audio_containers[key]
                else:
                    abs_path = os.path.abspath(os.path.join(base_path, container_path))
                    container = containers.AudioContainer(abs_path)

                track = tracks.ContainerTrack(track_idx, container, key)
                corpus.import_tracks(track)
Пример #16
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in ['TEST', 'TRAIN']:
            part_path = os.path.join(path, part)
            part_utt_ids = set()

            for region in os.listdir(part_path):
                region_path = os.path.join(part_path, region)

                if os.path.isdir(region_path):

                    for speaker_abbr in os.listdir(region_path):
                        speaker_path = os.path.join(region_path, speaker_abbr)
                        speaker_idx = speaker_abbr[1:]

                        if speaker_idx not in corpus.issuers.keys():
                            issuer = assets.Speaker(speaker_idx)

                            if speaker_abbr[:1] == 'M':
                                issuer.gender = assets.Gender.MALE
                            elif speaker_abbr[:1] == 'F':
                                issuer.gender = assets.Gender.FEMALE

                            corpus.import_issuers(issuer)

                        for wav_path in glob.glob(
                                os.path.join(speaker_path, '*.WAV')):
                            sentence_idx = os.path.splitext(
                                os.path.basename(wav_path))[0]
                            utt_idx = '{}-{}-{}'.format(
                                region, speaker_abbr, sentence_idx).lower()
                            part_utt_ids.add(utt_idx)

                            raw_text_path = os.path.join(
                                speaker_path, '{}.TXT'.format(sentence_idx))
                            raw_text = textfile.read_separated_lines(
                                raw_text_path, separator=' ',
                                max_columns=3)[0][2]

                            words_path = os.path.join(
                                speaker_path, '{}.WRD'.format(sentence_idx))
                            words = textfile.read_separated_lines(
                                words_path, separator=' ', max_columns=3)

                            phones_path = os.path.join(
                                speaker_path, '{}.PHN'.format(sentence_idx))
                            phones = textfile.read_separated_lines(
                                phones_path, separator=' ', max_columns=3)

                            corpus.new_file(wav_path, utt_idx)
                            utt = corpus.new_utterance(utt_idx, utt_idx,
                                                       speaker_idx)

                            raw_ll = assets.LabelList.create_single(
                                raw_text,
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)
                            utt.set_label_list(raw_ll)

                            word_ll = assets.LabelList(
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT)

                            for record in words:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                word_ll.append(
                                    assets.Label(record[2],
                                                 start=start,
                                                 end=end))

                            utt.set_label_list(word_ll)

                            phone_ll = assets.LabelList(
                                idx=audiomate.corpus.LL_PHONE_TRANSCRIPT)

                            for record in phones:
                                start = int(record[0]) / 16000
                                end = int(record[1]) / 16000
                                phone_ll.append(
                                    assets.Label(record[2],
                                                 start=start,
                                                 end=end))

                            utt.set_label_list(phone_ll)

            filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=part_utt_ids)
            subview = subset.Subview(corpus, filter_criteria=[filter])
            corpus.import_subview(part, subview)

        return corpus
Пример #17
0
 def load_meta_data(path):
     file_path = os.path.join(path, META_FILE_PATH)
     lines = textfile.read_separated_lines(file_path, separator=',')
     return lines[1:]