Пример #1
0
    def feature_scp_generator(path):
        """ Return a generator over all feature matrices defined in a scp. """

        scp_entries = textfile.read_key_value_lines(path, separator=' ')

        for utterance_id, rx_specifier in scp_entries.items():
            yield utterance_id, KaldiDatasetLoader.read_float_matrix(
                rx_specifier)
Пример #2
0
    def _load_wavs(self, loading_dataset):
        wavs_file_path = os.path.join(loading_dataset.path, WAV_FILE_NAME)

        for wav_id, wav_path in textfile.read_key_value_lines(
                wavs_file_path).items():
            loading_dataset.add_file(os.path.abspath(
                os.path.join(loading_dataset.path, wav_path)),
                                     file_idx=wav_id)
Пример #3
0
    def test_read_key_value_lines(self):
        file_path = os.path.join(os.path.dirname(__file__),
                                 'key_value_file.txt')

        expected = {'a': '1', 'b': '2', 'c': '3'}

        records = textfile.read_key_value_lines(file_path, separator=" ")

        self.assertDictEqual(expected, records)
Пример #4
0
    def _load_transcriptions(self, loading_dataset):
        transcriptions_path = os.path.join(loading_dataset.path,
                                           TRANSCRIPTION_FILE_NAME)
        transcriptions_raw_path = os.path.join(loading_dataset.path,
                                               TRANSCRIPTION_RAW_FILE_NAME)

        if os.path.isfile(transcriptions_path):
            for utt_id, transcription in textfile.read_key_value_lines(
                    transcriptions_path).items():
                loading_dataset.add_segmentation(
                    utt_id,
                    segments=transcription,
                    key=data.Segmentation.TEXT_SEGMENTATION)

        if os.path.isfile(transcriptions_raw_path):
            for utt_id, transcription_raw in textfile.read_key_value_lines(
                    transcriptions_raw_path).items():
                loading_dataset.add_segmentation(
                    utt_id,
                    segments=transcription_raw,
                    key=data.Segmentation.RAW_TEXT_SEGMENTATION)
Пример #5
0
    def _load_speakers(self, loading_dataset):
        utt2spk_path = os.path.join(loading_dataset.path, UTT2SPK_FILE_NAME)
        spk_info_path = os.path.join(loading_dataset.path,
                                     SPEAKER_INFO_FILE_NAME)

        if os.path.isfile(spk_info_path):
            for spk_id, spk_info in jsonfile.read_json_file(
                    spk_info_path).items():
                spk_obj = loading_dataset.add_speaker(speaker_idx=spk_id)
                spk_obj.load_speaker_info_from_dict(spk_info)

        if os.path.isfile(utt2spk_path):
            for utt_id, spk_id in textfile.read_key_value_lines(
                    utt2spk_path).items():
                loading_dataset.utterances[utt_id].speaker_idx = spk_id
Пример #6
0
    def create_dummy_reco2file(self, data_folder):
        data_folder = os.path.abspath(data_folder)

        wav_file = os.path.join(data_folder, 'wav.scp')

        wavs = textfile.read_key_value_lines(
            wav_file,
            separator=' ',
        )

        out = []

        for rec_id, rec_path in wavs.items():
            filename = os.path.splitext(os.path.basename(rec_path))[0]

            out.append([rec_id, filename, 'A'])

        reco_file = os.path.join(data_folder, 'reco2file_and_channel')
        textfile.write_separated_lines(reco_file, out, separator=' ')
Пример #7
0
    def _load(self, dataset):
        # load wavs
        wav_file_path = os.path.join(dataset.path, WAV_FILE_NAME)
        for file_idx, file_path in textfile.read_key_value_lines(
                wav_file_path, separator=' ').items():
            dataset.add_file(file_path, file_idx=file_idx)

        # load utterances
        utt2spk_path = os.path.join(dataset.path, UTT2SPK_FILE_NAME)
        utt2spk = {}

        if os.path.isfile(utt2spk_path):
            utt2spk = textfile.read_key_value_lines(utt2spk_path,
                                                    separator=' ')

        segments_path = os.path.join(dataset.path, SEGMENTS_FILE_NAME)

        if os.path.isfile(segments_path):
            for utt_id, utt_info in textfile.read_separated_lines_with_first_key(
                    segments_path, separator=' ', max_columns=4).items():
                start = None
                end = None

                if len(utt_info) > 1:
                    start = utt_info[1]

                if len(utt_info) > 2:
                    end = utt_info[2]

                speaker_idx = None

                if utt_id in utt2spk.keys():
                    speaker_idx = utt2spk[utt_id]
                    if speaker_idx not in dataset.speakers.keys():
                        dataset.add_speaker(speaker_idx=speaker_idx)

                dataset.add_utterance(utt_info[0],
                                      utterance_idx=utt_id,
                                      speaker_idx=speaker_idx,
                                      start=start,
                                      end=end)
        else:
            for file_idx in dataset.files.keys():
                speaker_idx = None

                if file_idx in utt2spk.keys():
                    speaker_idx = utt2spk[file_idx]
                    if speaker_idx not in dataset.speakers.keys():
                        dataset.add_speaker(speaker_idx=speaker_idx)

                dataset.add_utterance(file_idx,
                                      utterance_idx=file_idx,
                                      speaker_idx=speaker_idx)

        # load transcriptions
        text_path = os.path.join(dataset.path, TRANSCRIPTION_FILE_NAME)
        for utt_id, transcription in textfile.read_key_value_lines(
                text_path, separator=' ').items():
            dataset.add_segmentation(utt_id, segments=transcription)

        # load genders
        gender_path = os.path.join(dataset.path, SPK2GENDER_FILE_NAME)
        for spk_id, gender in textfile.read_key_value_lines(
                gender_path, separator=' ').items():
            if spk_id in dataset.speakers.keys():
                spk = dataset.speakers[spk_id]

                if gender == 'm':
                    spk.gender = data.Gender.MALE
                elif gender == 'f':
                    spk.gender = data.Gender.FEMALE
Пример #8
0
    def _load(self, loading_dataset):
        # Read files
        file_path = os.path.join(loading_dataset.path, FILES_FILE_NAME)
        for file_idx, file_path in textfile.read_key_value_lines(
                file_path, separator=' ').items():
            loading_dataset.add_file(os.path.abspath(
                os.path.join(loading_dataset.path, file_path)),
                                     file_idx=file_idx,
                                     copy_file=False)

        # Read speakers
        speaker_path = os.path.join(loading_dataset.path,
                                    SPEAKER_INFO_FILE_NAME)
        for speaker_idx, speaker_info in jsonfile.read_json_file(
                speaker_path).items():
            speaker = loading_dataset.add_speaker(speaker_idx=speaker_idx)
            speaker.load_speaker_info_from_dict(speaker_info)

        # Read utt2spk
        utt2spk_path = os.path.join(loading_dataset.path, UTT2SPK_FILE_NAME)
        if os.path.isfile(utt2spk_path):
            utt2spk = textfile.read_key_value_lines(utt2spk_path,
                                                    separator=' ')

        # Read utterances
        utterance_path = os.path.join(loading_dataset.path,
                                      UTTERANCE_FILE_NAME)
        for utterance_idx, utt_info in textfile.read_separated_lines_with_first_key(
                utterance_path, separator=' ', max_columns=4).items():
            start = None
            end = None

            if len(utt_info) > 1:
                start = float(utt_info[1])

            if len(utt_info) > 2:
                end = float(utt_info[2])

            if utterance_idx in utt2spk.keys():
                speaker_idx = utt2spk[utterance_idx]
                loading_dataset.add_utterance(utt_info[0],
                                              utterance_idx=utterance_idx,
                                              speaker_idx=speaker_idx,
                                              start=start,
                                              end=end)

        # Read segmentations
        for seg_file in glob.glob(
                os.path.join(loading_dataset.path, 'segmentation_*.txt')):
            file_name = os.path.basename(seg_file)
            key = file_name[len('segmentation_'):len(file_name) - len('.txt')]

            utterance_segments = collections.defaultdict(list)

            for record in textfile.read_separated_lines_generator(
                    seg_file, separator=' ', max_columns=4):
                utterance_segments[record[0]].append(
                    data.Token(record[3], float(record[1]), float(record[2])))

            for utterance_idx, segments in utterance_segments.items():
                loading_dataset.add_segmentation(utterance_idx,
                                                 segments=segments,
                                                 key=key)

        # Read subviews
        for subview_file in glob.glob(
                os.path.join(loading_dataset.path, 'subview_*.txt')):
            file_name = os.path.basename(subview_file)
            sv_name = file_name[len('subview_'):len(file_name) - len('.txt')]

            sv = dataset.Subview()

            for key, value in textfile.read_separated_lines_with_first_key(
                    subview_file, separator=' ').items():
                if key == 'filtered_utt_ids':
                    sv.filtered_utterance_idxs = set(value)
                elif key == 'filtered_speaker_ids':
                    sv.filtered_speaker_idxs = set(value)
                elif key == 'utterance_idx_patterns':
                    sv.utterance_idx_patterns = set(value)
                elif key == 'speaker_idx_patterns':
                    sv.speaker_idx_patterns = set(value)
                elif key == 'utterance_idx_not_patterns':
                    sv.utterance_idx_not_patterns = set(value)
                elif key == 'speaker_idx_not_patterns':
                    sv.speaker_idx_not_patterns = set(value)

            loading_dataset.add_subview(sv_name, sv)

        # Read features
        feat_path = os.path.join(loading_dataset.path,
                                 FEAT_CONTAINER_FILE_NAME)

        if os.path.isfile(feat_path):
            for container_name, container_path in textfile.read_key_value_lines(
                    feat_path, separator=' ').items():
                loading_dataset.create_feature_container(
                    container_name, container_path)