Exemplo n.º 1
0
def create_sample_dataset(temp_dir):
    ds = audiomate.Corpus(str(temp_dir))

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.sample_wav_file('wav_2.wav')
    file_3_path = resources.get_resource_path(
        ['audio_formats', 'flac_1_16k_16b.flac'])

    file_1 = ds.new_file(file_1_path, track_idx='wav_1')
    file_2 = ds.new_file(file_2_path, track_idx='wav_2')
    file_3 = ds.new_file(file_3_path, track_idx='wav_3')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_3 = ds.new_utterance('utt-3',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    # 5.0416875
    utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you')]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    return ds
Exemplo n.º 2
0
    def test_validate_passes(self):
        corpus = audiomate.Corpus()
        corpus.new_file(resources.sample_wav_file('wav_1.wav'), 'wav1')
        corpus.new_file(resources.sample_wav_file('wav_2.wav'), 'wav2')

        val = validation.TrackReadValidator()
        res = val.validate(corpus)

        assert res.passed
Exemplo n.º 3
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for item in glob.glob('{}/*.wav'.format(path), recursive=True):
            basename, __ = os.path.splitext(os.path.basename(item))
            corpus.new_file(item, basename)
            corpus.new_utterance(basename, basename)

        return corpus
Exemplo n.º 4
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for folder in os.scandir(path):
            if folder.is_dir() and not folder.name.startswith('_'):
                SpeechCommandsReader._load_folder(folder, corpus)
                SpeechCommandsReader._create_subviews(path, corpus)

        return corpus
Exemplo n.º 5
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        speaker_path = os.path.join(path, 'data', 'speaker_demographics.csv')
        speakers = FluentSpeechReader.load_speakers(speaker_path)

        FluentSpeechReader.load_part(path, 'train', corpus, speakers)
        FluentSpeechReader.load_part(path, 'valid', corpus, speakers)
        FluentSpeechReader.load_part(path, 'test', corpus, speakers)

        return corpus
Exemplo n.º 6
0
    def test_validate_doesnt_pass(self):
        corpus = audiomate.Corpus()
        corpus.new_file(resources.sample_wav_file('wav_1.wav'), 'wav1')
        corpus.new_file(resources.sample_wav_file('invalid_audio.wav'), 'wav2')

        val = validation.TrackReadValidator()
        res = val.validate(corpus)

        assert not res.passed
        assert len(res.invalid_items) == 1
        assert 'wav2' in res.invalid_items
Exemplo n.º 7
0
    def test_load_with_default_reader_when_reader_unspecified(self):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('default'))

        assert corpus.name == 'default'
        assert corpus.path == resources.sample_corpus_path('default')
        assert corpus.num_files == 4
        assert 'file-1' in corpus.files
        assert 'file-2' in corpus.files
        assert 'file-3' in corpus.files
        assert 'file-4' in corpus.files
Exemplo n.º 8
0
    def _load(self, path):
        corpus = audiomate.Corpus()

        article_paths = sorted(self.get_articles(path))
        reader_map = {}
        file_map = {}

        for article_path in article_paths:
            audio_files = self.get_audio_file_info(article_path)
            reader_name, reader_gender = self.get_reader_info(article_path)
            segments = self.get_segments(article_path)

            if reader_name not in reader_map.keys():
                speaker = issuers.Speaker('{:0>8}'.format(len(reader_map)),
                                          gender=reader_gender)
                reader_map[reader_name] = speaker
                corpus.import_issuers(speaker)
            else:
                speaker = reader_map[reader_name]

            for start, end, text in segments:
                file_path = self.find_audio_file_for_segment(
                    start, end, audio_files)

                if file_path is not None:
                    if file_path not in file_map.keys():
                        track = tracks.FileTrack(
                            '{:0>10}'.format(len(file_map)), file_path)
                        file_map[file_path] = track
                        corpus.import_tracks(track)
                    else:
                        track = file_map[file_path]

                    track_offset = audio_files[file_path]
                    utt_start = start - track_offset
                    utt_end = end - track_offset

                    utt_idx = '{}_{}_{}_{}'.format(speaker.idx, track.idx,
                                                   int(start * 1000),
                                                   int(end * 1000))

                    if utt_idx not in self.invalid_utterance_ids:
                        utt = corpus.new_utterance(utt_idx,
                                                   track.idx,
                                                   issuer_idx=speaker.idx,
                                                   start=utt_start,
                                                   end=utt_end)

                        ll = annotations.LabelList.create_single(
                            text, audiomate.corpus.LL_WORD_TRANSCRIPT)

                        utt.set_label_list(ll)

        return audiomate.Corpus.from_corpus(corpus)
Exemplo n.º 9
0
    def test_from_corpus_only_utterances_and_files(self):
        ds = audiomate.Corpus()
        ds.new_file('/random/path', 'file_1')
        ds.new_file('/random/path2', 'file_2')
        ds.new_utterance('utt_1', 'file_1')
        ds.new_utterance('utt_2', 'file_2')

        copy = audiomate.Corpus.from_corpus(ds)

        assert copy.num_files == 2
        assert copy.num_utterances == 2
        assert copy.num_issuers == 0
Exemplo n.º 10
0
def corpus():
    corpus = audiomate.Corpus()

    ex_file = tracks.FileTrack('existing_file', '../any/path.wav')
    ex_issuer = issuers.Issuer('existing_issuer')
    ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer)

    corpus.tracks['existing_file'] = ex_file
    corpus.issuers['existing_issuer'] = ex_issuer
    corpus.utterances['existing_utt'] = ex_utterance

    return corpus
Exemplo n.º 11
0
    def test_load_with_custom_reader_specified_by_instance(self):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('musan'),
                             reader=MusanReader())

        assert corpus.name == 'musan'
        assert corpus.path == resources.sample_corpus_path('musan')
        assert corpus.num_files == 5
        assert 'music-fma-0000' in corpus.files
        assert 'noise-free-sound-0000' in corpus.files
        assert 'noise-free-sound-0001' in corpus.files
        assert 'speech-librivox-0000' in corpus.files
        assert 'speech-librivox-0001' in corpus.files
Exemplo n.º 12
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        self.corpus = audiomate.Corpus(self.tempdir)

        self.ex_file = assets.File('existing_file', '../any/path.wav')
        self.ex_issuer = assets.Issuer('existing_issuer')
        self.ex_utterance = assets.Utterance('existing_utt',
                                             self.ex_file,
                                             issuer=self.ex_issuer)

        self.corpus.files['existing_file'] = self.ex_file
        self.corpus.issuers['existing_issuer'] = self.ex_issuer
        self.corpus.utterances['existing_utt'] = self.ex_utterance
Exemplo n.º 13
0
    def test_save_at_path_throws_exception_when_writer_does_not_exist(self):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('default'))

        assert corpus.name == 'default'
        assert corpus.path == resources.sample_corpus_path('default')
        assert corpus.num_files == 4

        tempdir_contents = os.listdir(self.tempdir)
        assert len(tempdir_contents) == 0

        with pytest.raises(UnknownWriterException):
            corpus.save_at(self.tempdir, writer='does_not_exist')

        assert len(os.listdir(self.tempdir)) == 0
Exemplo n.º 14
0
    def _load(self, path):
        wav_file_path = os.path.join(path, WAV_FILE_NAME)
        spk2gender_path = os.path.join(path, SPK2GENDER_FILE_NAME)
        utt2spk_path = os.path.join(path, UTT2SPK_FILE_NAME)
        segments_path = os.path.join(path, SEGMENTS_FILE_NAME)
        text_path = os.path.join(path, TRANSCRIPTION_FILE_NAME)

        corpus = audiomate.Corpus(path=path)

        default.DefaultReader.read_files(wav_file_path, corpus)
        KaldiReader.read_genders(spk2gender_path, corpus)
        utt2spk = default.DefaultReader.read_utt_to_issuer_mapping(utt2spk_path, corpus)
        KaldiReader.read_utterances(segments_path, corpus, utt2spk)
        KaldiReader.read_transcriptions(text_path, corpus)

        return corpus
Exemplo n.º 15
0
    def _load(self, path):
        file_path = os.path.join(path, FILES_FILE_NAME)
        issuer_path = os.path.join(path, ISSUER_FILE_NAME)
        utt_issuer_path = os.path.join(path, UTT_ISSUER_FILE_NAME)
        utterance_path = os.path.join(path, UTTERANCE_FILE_NAME)
        feat_path = os.path.join(path, FEAT_CONTAINER_FILE_NAME)

        corpus = audiomate.Corpus(path=path)

        default.DefaultReader.read_files(file_path, corpus)
        default.DefaultReader.read_issuers(issuer_path, corpus)
        utt_id_to_issuer = default.DefaultReader.read_utt_to_issuer_mapping(utt_issuer_path, corpus)
        default.DefaultReader.read_utterances(utterance_path, corpus, utt_id_to_issuer)
        BroadcastReader.read_labels(path, corpus)
        default.DefaultReader.read_feature_containers(feat_path, corpus)

        return corpus
Exemplo n.º 16
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in SUBSETS:
            sub_path = os.path.join(path, part)
            ids = TudaReader.get_ids_from_folder(sub_path, part)

            for idx in ids:
                TudaReader.load_file(sub_path, idx, corpus)

            subview_filter = subview.MatchingUtteranceIdxFilter(
                utterance_idxs=ids)
            subview_corpus = subview.Subview(corpus,
                                             filter_criteria=[subview_filter])
            corpus.import_subview(part, subview_corpus)

        return corpus
Exemplo n.º 17
0
    def test_save_at_corpus_path_throws_exception_when_writer_does_not_exist(
            self, tmpdir):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('default'))

        assert corpus.name == 'default'
        assert corpus.path == resources.sample_corpus_path('default')
        assert corpus.num_tracks == 6

        tempdir_contents = os.listdir(tmpdir.strpath)
        assert len(tempdir_contents) == 0

        corpus.path = tmpdir.strpath
        with pytest.raises(UnknownWriterException):
            corpus.save(writer='does_not_exist')

        assert len(os.listdir(tmpdir.strpath)) == 0
Exemplo n.º 18
0
    def _load(self, path):
        create_or_get_issuer = {
            'music': self._create_or_get_music_issuer,
            'noise': self._create_or_get_noise_issuer,
            'speech': self._create_or_get_speech_issuer,
        }

        corpus = audiomate.Corpus(path=path)

        for type_name, type_directory in self._directories(path).items():
            for _, source_directory in self._directories(
                    type_directory).items():
                annotations_path = os.path.join(source_directory,
                                                ANN_FILE_NAME_)
                annotations = {}

                if os.path.exists(annotations_path):
                    annotations = textfile.read_separated_lines_with_first_key(
                        annotations_path,
                        separator=' ',
                        max_columns=ANN_NUM_COLUMS_[type_name])

                it = os.scandir(source_directory)

                for entry in it:
                    if not entry.name.endswith('.wav'):
                        continue

                    file_path = os.path.join(source_directory, entry.name)
                    file_idx = entry.name[0:-4]  # chop of .wav
                    utterance_idx = file_idx  # every file is a separate utterance
                    issuer_idx = create_or_get_issuer[type_name](corpus,
                                                                 file_idx,
                                                                 annotations)

                    corpus.new_file(file_path,
                                    file_idx=file_idx,
                                    copy_file=False)
                    utterance = corpus.new_utterance(utterance_idx, file_idx,
                                                     issuer_idx)
                    utterance.set_label_list(
                        assets.LabelList.create_single(
                            type_name, idx=audiomate.corpus.LL_DOMAIN))

        return corpus
Exemplo n.º 19
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        speaker_info_path = os.path.join(path, 'SPEAKERS.TXT')
        speakers = LibriSpeechReader.load_speakers(speaker_info_path)

        sf = LibriSpeechReader.available_subfolders

        for subset_idx, subset_path in sf(path, SUBSETS.keys()).items():
            subset_utt_ids = set()

            for speaker_idx, speaker_path in sf(subset_path).items():
                corpus.import_issuers(speakers[speaker_idx])

                for chapter_idx, chapter_path in sf(speaker_path).items():
                    transcript_path = os.path.join(
                        chapter_path,
                        '{}-{}.trans.txt'.format(speaker_idx, chapter_idx)
                    )
                    transcripts = LibriSpeechReader.load_transcripts(transcript_path)

                    for utt_idx, transcript in transcripts.items():
                        file_path = os.path.join(chapter_path, '{}.flac'.format(utt_idx))
                        corpus.new_file(file_path, utt_idx)

                        utterance = corpus.new_utterance(
                            utt_idx,
                            utt_idx,
                            speaker_idx
                        )

                        utterance.set_label_list(
                            annotations.LabelList.create_single(
                                transcript,
                                idx=audiomate.corpus.LL_WORD_TRANSCRIPT
                            )
                        )

                        subset_utt_ids.add(utt_idx)

            filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids))
            subview = subset.Subview(corpus, filter_criteria=[filter])
            corpus.import_subview(subset_idx, subview)

        return corpus
Exemplo n.º 20
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        test_folder = os.path.join(path, 'test')
        train_folder = os.path.join(path, 'train')

        test_utterance_ids = AEDReader.load_folder(test_folder, corpus)
        train_utterance_ids = AEDReader.load_folder(train_folder, corpus)

        test_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=test_utterance_ids)
        train_filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=train_utterance_ids)

        test_subset = subset.Subview(corpus, filter_criteria=[test_filter])
        train_subset = subset.Subview(corpus, filter_criteria=[train_filter])

        corpus.import_subview('test', test_subset)
        corpus.import_subview('train', train_subset)

        return corpus
Exemplo n.º 21
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for dir_path in sorted(VoxforgeReader.data_folders(path)):
            item = os.path.basename(dir_path)
            etc_folder = os.path.join(dir_path, 'etc')
            wav_folder = os.path.join(dir_path, 'wav')
            readme_path = os.path.join(etc_folder, 'README')

            # LOAD ISSUER
            issuer = VoxforgeReader.parse_speaker_info(readme_path)

            if issuer.idx is None or issuer.idx == 'anonymous':
                issuer.idx = item

            # LOAD TRANSCRIPTIONS
            prompts, prompts_orig = VoxforgeReader.parse_prompts(etc_folder)

            # LOAD FILES/UTTS
            for file_name in os.listdir(wav_folder):
                wav_path = os.path.join(wav_folder, file_name)
                basename, ext = os.path.splitext(file_name)
                idx = '{}-{}'.format(item, basename)

                is_valid_wav = os.path.isfile(wav_path) and ext == '.wav' \
                    and idx not in self.invalid_utterance_ids
                has_transcription = basename in prompts.keys()

                if is_valid_wav and has_transcription:
                    if issuer.idx not in corpus.issuers.keys():
                        corpus.import_issuers([issuer])

                    corpus.new_file(wav_path, idx)
                    utt = corpus.new_utterance(idx, idx, issuer.idx)
                    utt.set_label_list(annotations.LabelList.create_single(prompts[basename],
                                                                           idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

                    if basename in prompts_orig.keys():
                        raw = annotations.LabelList.create_single(prompts_orig[basename],
                                                                  idx=audiomate.corpus.LL_WORD_TRANSCRIPT_RAW)
                        utt.set_label_list(raw)

        return corpus
Exemplo n.º 22
0
    def _load(self, path):
        file_path = os.path.join(path, FILES_FILE_NAME)
        audio_path = os.path.join(path, AUDIO_CONTAINER_FILE_NAME)
        issuer_path = os.path.join(path, ISSUER_FILE_NAME)
        utt_issuer_path = os.path.join(path, UTT_ISSUER_FILE_NAME)
        utterance_path = os.path.join(path, UTTERANCE_FILE_NAME)
        feat_path = os.path.join(path, FEAT_CONTAINER_FILE_NAME)

        corpus = audiomate.Corpus(path=path)

        DefaultReader.read_files(file_path, corpus)
        DefaultReader.read_tracks_from_audio_containers(audio_path, corpus)
        DefaultReader.read_issuers(issuer_path, corpus)
        utt_id_to_issuer = DefaultReader.read_utt_to_issuer_mapping(utt_issuer_path, corpus)
        DefaultReader.read_utterances(utterance_path, corpus, utt_id_to_issuer)
        DefaultReader.read_labels(path, corpus)
        DefaultReader.read_feature_containers(feat_path, corpus)
        DefaultReader.read_subviews(path, corpus)

        return corpus
Exemplo n.º 23
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)
        regex = re.compile(LABEL_PATTERN)

        for file_name in os.listdir(path):
            base_name, ext = os.path.splitext(file_name)

            if ext == '.wav':
                file_path = os.path.join(path, file_name)

                match = regex.match(base_name)
                label = match.group(1)

                corpus.new_file(file_path, base_name)
                utt = corpus.new_utterance(base_name, base_name)
                ll = annotations.LabelList.create_single(
                    label, idx=audiomate.corpus.LL_SOUND_CLASS)
                utt.set_label_list(ll)

        return corpus
Exemplo n.º 24
0
def prepare():
    rand = random.Random(x=234)

    target_corpus = audiomate.Corpus()

    issuers = resources.generate_issuers(1000, rand=rand)
    target_corpus.import_issuers(issuers)

    tracks = resources.generate_tracks(1000, rand=rand)
    target_corpus.import_tracks(tracks)

    utterances = []

    for issuer, track in zip(issuers, tracks):
        utts = resources.generate_utterances(track,
                                             issuer,
                                             10, (3, 3), (3, 3),
                                             rand=rand)
        utterances.extend(utts)

    return target_corpus, utterances
Exemplo n.º 25
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for part in SUBSETS:
            sub_path = os.path.join(path, part)
            ids = TudaReader.get_ids_from_folder(sub_path, part)
            utt_ids = []

            for idx in ids:
                add_ids = TudaReader.load_file(sub_path, idx, corpus)
                utt_ids.extend(add_ids)

            subview_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs=utt_ids)
            subview_corpus = subview.Subview(corpus, filter_criteria=[subview_filter])
            corpus.import_subview(part, subview_corpus)

            TudaReader.create_wav_type_subviews(corpus, utt_ids, prefix='{}_'.format(part))

        TudaReader.create_wav_type_subviews(corpus, corpus.utterances.keys())

        return corpus
Exemplo n.º 26
0
    def test_save_at_path_with_default_writer_when_writer_unspecified(self):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('default'))

        assert corpus.name == 'default'
        assert corpus.path == resources.sample_corpus_path('default')
        assert corpus.num_files == 4

        tempdir_contents = os.listdir(self.tempdir)
        assert len(tempdir_contents) == 0

        corpus.save_at(self.tempdir)

        tempdir_contents = os.listdir(self.tempdir)
        assert len(tempdir_contents) == 9

        assert 'files.txt' in tempdir_contents
        assert 'issuers.json' in tempdir_contents
        assert 'labels_raw_text.txt' in tempdir_contents
        assert 'labels_text.txt' in tempdir_contents
        assert 'utt_issuers.txt' in tempdir_contents
        assert 'utterances.txt' in tempdir_contents
Exemplo n.º 27
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        meta_data = ESC50Reader.load_meta_data(path)

        folds = collections.defaultdict(list)
        esc10_utt_ids = []

        for record in meta_data:
            file_name = record[0]
            file_id = os.path.splitext(file_name)[0]
            file_path = os.path.abspath(os.path.join(path, 'audio', file_name))
            fold = record[1]
            category = record[3]
            esc10 = record[4]

            corpus.new_file(file_path, file_id)
            utt = corpus.new_utterance(file_id, file_id)
            utt.set_label_list(
                assets.LabelList.create_single(
                    category, idx=audiomate.corpus.LL_SOUND_CLASS))

            folds['fold-{}'.format(fold)].append(file_id)

            if esc10 == 'True':
                esc10_utt_ids.append(file_id)

        for fold_id, fold_utt_ids in folds.items():
            fold_filter = subset.MatchingUtteranceIdxFilter(
                utterance_idxs=set(fold_utt_ids))
            fold_sv = subset.Subview(corpus, filter_criteria=[fold_filter])
            corpus.import_subview(fold_id, fold_sv)

        esc10_filter = subset.MatchingUtteranceIdxFilter(
            utterance_idxs=set(esc10_utt_ids))
        esc10_sv = subset.Subview(corpus, filter_criteria=[esc10_filter])
        corpus.import_subview('esc-10', esc10_sv)

        return corpus
Exemplo n.º 28
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)

        for file_path in glob.glob(os.path.join(path, 'recordings', '*.wav')):
            file_idx = os.path.splitext(os.path.basename(file_path))[0]

            corpus.new_file(file_path, file_idx)

            idx_parts = file_idx.split('_')
            digit = idx_parts[0]
            issuer_idx = '_'.join(idx_parts[1:-1])

            if issuer_idx not in corpus.issuers.keys():
                issuer = issuers.Speaker(issuer_idx)
                corpus.import_issuers(issuer)

            utterance = corpus.new_utterance(file_idx, file_idx, issuer_idx)
            utterance.set_label_list(
                annotations.LabelList.create_single(
                    str(digit), idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return corpus
Exemplo n.º 29
0
    def test_save_at_path_with_writer_specified_by_instance(self, tmpdir):
        corpus = audiomate.Corpus()
        corpus = corpus.load(resources.sample_corpus_path('kaldi'),
                             reader='kaldi')

        assert corpus.name == 'kaldi'
        assert corpus.path == resources.sample_corpus_path('kaldi')
        assert corpus.path != tmpdir.strpath
        assert corpus.num_tracks == 4

        tempdir_contents = os.listdir(tmpdir.strpath)
        assert len(tempdir_contents) == 0

        corpus.save_at(tmpdir.strpath, writer=KaldiWriter())

        assert corpus.path == tmpdir.strpath

        tempdir_contents = os.listdir(tmpdir.strpath)
        assert len(tempdir_contents) == 4

        assert 'segments' in tempdir_contents
        assert 'text' in tempdir_contents
        assert 'utt2spk' in tempdir_contents
        assert 'wav.scp' in tempdir_contents
Exemplo n.º 30
0
    def _load(self, path):
        corpus = audiomate.Corpus(path=path)
        data_path = os.path.join(path, 'data')
        meta_data = AudioMNISTReader.load_speaker_meta(path)

        for speaker_idx in os.listdir(data_path):
            speaker_path = os.path.join(data_path, speaker_idx)

            if os.path.isdir(speaker_path):

                for file_path in glob.glob(os.path.join(speaker_path,
                                                        '*.wav')):
                    file_idx = os.path.splitext(os.path.basename(file_path))[0]

                    corpus.new_file(file_path, file_idx)

                    idx_parts = file_idx.split('_')
                    digit = idx_parts[0]

                    if speaker_idx not in corpus.issuers.keys():
                        issuer = issuers.Speaker(
                            speaker_idx,
                            gender=AudioMNISTReader.get_gender(
                                meta_data, speaker_idx),
                            age_group=AudioMNISTReader.get_age_group(
                                meta_data, speaker_idx))
                        corpus.import_issuers(issuer)

                    utterance = corpus.new_utterance(file_idx, file_idx,
                                                     speaker_idx)
                    utterance.set_label_list(
                        annotations.LabelList.create_single(
                            str(digit),
                            idx=audiomate.corpus.LL_WORD_TRANSCRIPT))

        return corpus