示例#1
0
    def write_labels(path, corpus):
        records = collections.defaultdict(list)

        for utterance in corpus.utterances.values():
            for label_list_idx, label_list in utterance.label_lists.items():
                utt_records = []
                for l in label_list:
                    start = l.start
                    end = l.end

                    if end == float('inf'):
                        end = -1

                    if len(l.meta) > 0:
                        value = '{} [{}]'.format(
                            l.value, json.dumps(l.meta, sort_keys=True))
                        utt_records.append((utterance.idx, start, end, value))
                    else:
                        utt_records.append(
                            (utterance.idx, start, end, l.value))

                records[label_list_idx].extend(utt_records)

        for label_list_idx, label_list_records in records.items():
            file_path = os.path.join(
                path, '{}_{}.txt'.format(LABEL_FILE_PREFIX, label_list_idx))
            textfile.write_separated_lines(file_path,
                                           label_list_records,
                                           separator=' ')
示例#2
0
文件: kaldi.py 项目: ynop/audiomate
    def write_tracks(file_path, corpus, path):
        file_records = []

        export_path = os.path.join(path, 'audio')

        for track in corpus.tracks.values():
            if isinstance(track, tracks.FileTrack):
                file_records.append(
                    [track.idx,
                     KaldiWriter.extended_filename(track)])

            elif isinstance(track, tracks.ContainerTrack):
                if not os.path.isdir(export_path):
                    os.makedirs(export_path)

                target_path = os.path.join(export_path,
                                           '{}.wav'.format(track.idx))

                max_value = np.iinfo(np.int16).max
                samples = (track.read_samples() * max_value).astype(np.int16)
                sampling_rate = track.sampling_rate
                scipy.io.wavfile.write(target_path, sampling_rate, samples)

                file_records.append([track.idx, target_path])

        textfile.write_separated_lines(file_path,
                                       file_records,
                                       separator=' ',
                                       sort_by_column=0)
示例#3
0
    def _save(self, corpus, path):
        records = []
        subset_utterance_ids = {
            idx: list(subset.utterances.keys())
            for idx, subset in corpus.subviews.items()
        }
        subset_records = collections.defaultdict(list)

        audio_folder = os.path.join(path, 'audio')
        os.makedirs(audio_folder, exist_ok=True)

        for utterance_idx in sorted(corpus.utterances.keys()):
            utterance = corpus.utterances[utterance_idx]
            export_audio = False

            if utterance.start != 0 or utterance.end != float('inf'):
                export_audio = True
            elif utterance.sampling_rate != 16000:
                # We force sr=16000, since this is expected from wav2letter
                export_audio = True

            if export_audio:
                audio_path = os.path.join(audio_folder,
                                          '{}.wav'.format(utterance.idx))
                data = utterance.read_samples(sr=16000)
                data = (data * 32768).astype(np.int16)
                num_samples = data.size
                scipy.io.wavfile.write(audio_path, 16000, data)
            else:
                audio_path = utterance.track.path
                num_samples = utterance.num_samples()

            transcript = utterance.label_lists[
                self.transcription_label_list_idx].join()

            # Add to the full list
            record = [utterance_idx, audio_path, num_samples, transcript]
            records.append(record)

            # Check / Add to subview lists
            for subset_idx, utt_ids in subset_utterance_ids.items():
                if utterance_idx in utt_ids:
                    subset_records[subset_idx].append(record)

        # Write full list
        records_path = os.path.join(path, 'all.lst')
        textfile.write_separated_lines(records_path,
                                       records,
                                       separator=' ',
                                       sort_by_column=-1)

        # Write subset lists
        for subset_idx, records in subset_records.items():
            if len(records) > 0:
                subset_file_path = os.path.join(path,
                                                '{}.lst'.format(subset_idx))
                textfile.write_separated_lines(subset_file_path,
                                               records,
                                               separator=' ',
                                               sort_by_column=-1)
示例#4
0
    def test_write_separated_lines_sorted(self):
        data = {
            'hallo-0_103': 'hallo-0_1',
            'hallo-0_122': 'hallo-0',
            'hallo-0_1031': 'hallo-0_1',
            'hallo-0_1322': 'hallo-0',
            'hallo-0_1224': 'hallo-0'
        }

        f, path = tempfile.mkstemp(text=True)
        os.close(f)

        textfile.write_separated_lines(path,
                                       data,
                                       separator=' ',
                                       sort_by_column=1)

        f = open(path, 'r')
        value = f.read()
        f.close()

        lines = value.strip().split('\n')

        self.assertEqual(5, len(lines))

        self.assertTrue(lines[0].endswith('hallo-0'))
        self.assertTrue(lines[1].endswith('hallo-0'))
        self.assertTrue(lines[2].endswith('hallo-0'))
        self.assertTrue(lines[3].endswith('hallo-0_1'))
        self.assertTrue(lines[4].endswith('hallo-0_1'))
示例#5
0
 def write_files(file_path, corpus, path):
     file_records = [[file.idx, os.path.relpath(file.path, path)]
                     for file in corpus.files.values()]
     textfile.write_separated_lines(file_path,
                                    file_records,
                                    separator=' ',
                                    sort_by_column=0)
示例#6
0
    def _download(self, target_path):
        temp_path = os.path.join(target_path, 'temp')
        os.makedirs(temp_path, exist_ok=True)

        sentence_ark = os.path.join(temp_path, 'sentences.tar.bz2')
        sentence_list = os.path.join(temp_path, 'sentences.csv')
        audio_ark = os.path.join(temp_path, 'sentences_with_audio.tar.bz2')
        audio_list = os.path.join(temp_path, 'sentences_with_audio.csv')

        download.download_file(SENTENCE_LIST_URL, sentence_ark)
        download.download_file(AUDIO_LIST_URL, audio_ark)

        download.extract_tar(sentence_ark, temp_path)
        download.extract_tar(audio_ark, temp_path)

        audio_entries = self._load_audio_list(audio_list)
        sentences = self._load_sentence_list(sentence_list)

        valid_sentence_ids = set(audio_entries.keys()).intersection(
            set(sentences.keys()))

        # sent-id, username, lang, transcript
        all_records = [(k, audio_entries[k][0], sentences[k][0],
                        sentences[k][1]) for k in valid_sentence_ids]

        meta_path = os.path.join(target_path, META_FILENAME)
        textfile.write_separated_lines(meta_path,
                                       all_records,
                                       separator='\t',
                                       sort_by_column=0)

        self._download_audio_files(all_records, target_path)

        shutil.rmtree(temp_path, ignore_errors=True)
示例#7
0
    def write_utt_to_issuer_mapping(utt_issuer_path, corpus):
        utt_issuer_records = {}

        for utterance in corpus.utterances.values():
            if utterance.issuer is not None:
                utt_issuer_records[utterance.idx] = utterance.issuer.idx

        textfile.write_separated_lines(utt_issuer_path, utt_issuer_records, separator=' ', sort_by_column=0)
示例#8
0
 def write_feature_containers(container_path, corpus):
     feat_records = [
         (idx, container.path)
         for idx, container in corpus.feature_containers.items()
     ]
     textfile.write_separated_lines(container_path,
                                    feat_records,
                                    separator=' ')
示例#9
0
    def _save(self, corpus, path):
        records = []
        subset_utterance_ids = {
            idx: list(subset.utterances.keys())
            for idx, subset in corpus.subviews.items()
        }
        subset_records = collections.defaultdict(list)

        audio_folder = os.path.join(path, 'audio')
        os.makedirs(audio_folder, exist_ok=True)

        for utterance_idx in sorted(corpus.utterances.keys()):
            utterance = corpus.utterances[utterance_idx]

            if utterance.start == 0 and utterance.end == -1:
                audio_path = utterance.file.path
            else:
                audio_path = os.path.join(audio_folder,
                                          '{}.wav'.format(utterance.idx))
                sampling_rate = utterance.sampling_rate
                data = utterance.read_samples()

                data = (data * 32768).astype(np.int16)

                scipy.io.wavfile.write(audio_path, sampling_rate, data)

            size = os.stat(audio_path).st_size
            transcript = utterance.label_lists[
                self.transcription_label_list_idx][0].value

            # Add to the full list
            record = [audio_path, size, transcript]
            records.append(record)

            # Check / Add to subview lists
            for subset_idx, utt_ids in subset_utterance_ids.items():
                if utterance_idx in utt_ids:
                    subset_records[subset_idx].append(record)

        # Write full list
        records.insert(0, ['wav_filename', 'wav_filesize', 'transcript'])
        records_path = os.path.join(path, 'all.csv')
        textfile.write_separated_lines(records_path,
                                       records,
                                       separator=',',
                                       sort_by_column=-1)

        # Write subset lists
        for subset_idx, records in subset_records.items():
            if len(records) > 0:
                records.insert(0,
                               ['wav_filename', 'wav_filesize', 'transcript'])
                subset_file_path = os.path.join(path,
                                                '{}.csv'.format(subset_idx))
                textfile.write_separated_lines(subset_file_path,
                                               records,
                                               separator=',',
                                               sort_by_column=-1)
示例#10
0
 def write_utterances(utterance_path, corpus):
     utterance_records = {
         utterance.idx:
         [utterance.file.idx, utterance.start, utterance.end]
         for utterance in corpus.utterances.values()
     }
     textfile.write_separated_lines(utterance_path,
                                    utterance_records,
                                    separator=' ',
                                    sort_by_column=0)
示例#11
0
    def write_file_tracks(file_path, corpus, path):
        file_records = []

        for file in corpus.tracks.values():
            if isinstance(file, tracks.FileTrack):
                file_records.append([
                    file.idx,
                    os.path.relpath(file.path, path)
                ])

        textfile.write_separated_lines(file_path, file_records, separator=' ', sort_by_column=0)
示例#12
0
    def write_container_tracks(audio_path, corpus, path):
        container_records = set({})

        for track in corpus.tracks.values():
            if isinstance(track, tracks.ContainerTrack):
                rel_path = os.path.relpath(track.container.path, path)
                container_records.add((track.idx, rel_path, track.key))

        textfile.write_separated_lines(audio_path,
                                       container_records,
                                       separator=' ',
                                       sort_by_column=0)
示例#13
0
def write_label_list(path, label_list):
    """
    Writes the given `label_list` to an audacity label file.

    Args:
        path (str): Path to write the file to.
        label_list (audiomate.corpus.assets.LabelList): Label list
    """
    entries = []
    for label in label_list:
        entries.append([label.start, label.end, label.value])

    textfile.write_separated_lines(path, entries, separator='\t')
示例#14
0
    def _write_transcriptions(self, text_path, corpus):
        transcriptions = {}

        for utterance in corpus.utterances.values():
            if self.main_label_list_idx in utterance.label_lists.keys():
                label_list = utterance.label_lists[self.main_label_list_idx]
                transcriptions[utterance.idx] = ' '.join(
                    [l.value for l in label_list])

        textfile.write_separated_lines(text_path,
                                       transcriptions,
                                       separator=' ',
                                       sort_by_column=0)
示例#15
0
文件: kaldi.py 项目: ynop/audiomate
    def _write_utt_to_issuer_mapping(self, utt_issuer_path, corpus):
        utt_issuer_records = {}

        for utterance in corpus.utterances.values():
            utt_idx = self._get_utt_idx(utterance)
            if utterance.issuer is not None:
                utt_issuer_records[utt_idx] = utterance.issuer.idx
            elif self.use_utt_idx_if_no_speaker_available:
                utt_issuer_records[utt_idx] = utt_idx

        textfile.write_separated_lines(utt_issuer_path,
                                       utt_issuer_records,
                                       separator=' ',
                                       sort_by_column=0)
示例#16
0
    def _save(self, corpus, path):
        target_audio_path = os.path.join(path, 'audio')
        os.makedirs(target_audio_path, exist_ok=True)

        # Convert all files
        if not self.no_audio_check:
            corpus = self.converter.convert(corpus, target_audio_path)

        records = []

        subset_utterance_ids = {
            idx: set(subset.utterances.keys())
            for idx, subset in corpus.subviews.items()
        }
        subset_records = collections.defaultdict(list)

        for utterance_idx in sorted(corpus.utterances.keys()):
            utterance = corpus.utterances[utterance_idx]
            transcript = utterance.label_lists[
                self.transcription_label_list_idx].join()
            audio_path = utterance.track.path
            size = os.stat(audio_path).st_size

            record = [audio_path, size, transcript]
            records.append(record)

            # Check / Add to subview lists
            for subset_idx, utt_ids in subset_utterance_ids.items():
                if utterance_idx in utt_ids:
                    subset_records[subset_idx].append(record)

        # Write full list
        records.insert(0, ['wav_filename', 'wav_filesize', 'transcript'])
        records_path = os.path.join(path, 'all.csv')
        textfile.write_separated_lines(records_path,
                                       records,
                                       separator=',',
                                       sort_by_column=-1)

        # Write subset lists
        for subset_idx, records in subset_records.items():
            if len(records) > 0:
                records.insert(0,
                               ['wav_filename', 'wav_filesize', 'transcript'])
                subset_file_path = os.path.join(path,
                                                '{}.csv'.format(subset_idx))
                textfile.write_separated_lines(subset_file_path,
                                               records,
                                               separator=',',
                                               sort_by_column=-1)
示例#17
0
    def write_utterances(utterance_path, corpus):
        utterance_records = {}

        for utterance in corpus.utterances.values():
            track_idx = utterance.track.idx
            start = utterance.start
            end = utterance.end

            if end == float('inf'):
                end = -1

            utterance_records[utterance.idx] = [track_idx, start, end]

        textfile.write_separated_lines(utterance_path, utterance_records, separator=' ', sort_by_column=0)
示例#18
0
    def _write_genders(self, gender_path, corpus):
        genders = {}

        for issuer in corpus.issuers.values():
            if type(issuer) == issuers.Speaker:
                if issuer.gender == issuers.Gender.MALE:
                    genders[issuer.idx] = 'm'
                elif issuer.gender == issuers.Gender.FEMALE:
                    genders[issuer.idx] = 'f'

        if len(genders) > 0:
            textfile.write_separated_lines(gender_path,
                                           genders,
                                           separator=' ',
                                           sort_by_column=0)
示例#19
0
    def _save(self, corpus, path):
        target_audio_path = os.path.join(path, 'audio')
        os.makedirs(target_audio_path, exist_ok=True)

        # Convert all files
        corpus = self.converter.convert(corpus, target_audio_path)
        records = []

        subset_utterance_ids = {
            idx: list(subset.utterances.keys())
            for idx, subset in corpus.subviews.items()
        }
        subset_records = collections.defaultdict(list)

        for utterance_idx in sorted(corpus.utterances.keys()):
            utterance = corpus.utterances[utterance_idx]
            transcript = utterance.label_lists[
                self.transcription_label_list_idx].join()
            audio_path = utterance.track.path
            num_samples = int(utterance.duration * self.sampling_rate)

            # Add to the full list
            record = [utterance_idx, audio_path, num_samples, transcript]
            records.append(record)

            # Check / Add to subview lists
            for subset_idx, utt_ids in subset_utterance_ids.items():
                if utterance_idx in utt_ids:
                    subset_records[subset_idx].append(record)

        # Write full list
        records_path = os.path.join(path, 'all.lst')
        textfile.write_separated_lines(records_path,
                                       records,
                                       separator=' ',
                                       sort_by_column=-1)

        # Write subset lists
        for subset_idx, records in subset_records.items():
            if len(records) > 0:
                subset_file_path = os.path.join(path,
                                                '{}.lst'.format(subset_idx))
                textfile.write_separated_lines(subset_file_path,
                                               records,
                                               separator=' ',
                                               sort_by_column=-1)
示例#20
0
文件: kaldi.py 项目: val260/audiomate
    def write_segments(utterance_path, corpus):
        utterances = corpus.utterances.values()
        utterance_records = {}

        for u in utterances:
            track_idx = u.track.idx
            start = u.start
            end = u.end_abs

            if end == float('inf'):
                end = -1

            utterance_records[u.idx] = [track_idx, start, end]

        textfile.write_separated_lines(utterance_path,
                                       utterance_records,
                                       separator=' ',
                                       sort_by_column=0)
示例#21
0
def write_label_file(path, entries):
    """
    Writes an audacity label file. Start and end times are in seconds.

    Args:
        path (str): Path to write the file to.
        entries (list): List with entries to write.

    Example::

        >>> data = [
        >>>     [0.0, 0.2, 'sie'],
        >>>     [0.2, 2.2, 'hallo']
        >>> ]
        >>>
        >>> write_label_file('/some/path/to/file.txt', data)
    """

    textfile.write_separated_lines(path, entries, separator='\t')
示例#22
0
文件: ctm.py 项目: xjc90s/audiomate
def write_file(path, entries):
    """
    Writes a ctm file.

    Args:
        path (str): Path to write the file to.
        entries (list): List with entries to write. (entries -> wave-file, channel, start (seconds),
                        duration (seconds), label)

    Example::

        >>> data = [
        >>>     ["wave-ab", '1', 0.0, 0.82, "duda"],
        >>>     ["wave-xy", '1', 0.82, 0.57, "Jacques"],
        >>> ]
        >>>
        >>> write_file('/path/to/file.txt', data)
    """

    textfile.write_separated_lines(path, entries, separator=' ')
示例#23
0
文件: kaldi.py 项目: ynop/audiomate
    def _write_segments(self, utterance_path, corpus):
        utterances = corpus.utterances.values()
        utterance_records = {}

        for u in utterances:
            utt_idx = self._get_utt_idx(u)
            track_idx = u.track.idx
            start = u.start
            end = u.end

            if end == float('inf'):
                if self.use_absolute_times:
                    end = u.end_abs
                else:
                    end = -1

            utterance_records[utt_idx] = [track_idx, start, end]

        textfile.write_separated_lines(utterance_path,
                                       utterance_records,
                                       separator=' ',
                                       sort_by_column=0)