def read_labels(path, corpus): for label_file in glob.glob( os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))): file_name = os.path.basename(label_file) key = file_name[len('{}_'.format(LABEL_FILE_PREFIX) ):len(file_name) - len('.txt')] utterance_labels = collections.defaultdict(list) labels = textfile.read_separated_lines_generator(label_file, separator=' ', max_columns=4) for record in labels: label = record[3] start = float(record[1]) end = float(record[2]) meta = None meta_match = META_PATTERN.match(label) if meta_match is not None: meta_json = meta_match.group(2) meta = json.loads(meta_json) label = meta_match.group(1) utterance_labels[record[0]].append( assets.Label(label, start, end, meta=meta)) for utterance_idx, labels in utterance_labels.items(): ll = assets.LabelList(idx=key, labels=labels) corpus.utterances[utterance_idx].set_label_list(ll)
def load_subset(corpus, path, subset_idx): """ Load subset into corpus. """ csv_file = os.path.join(path, '{}.tsv'.format(subset_idx)) subset_utt_ids = [] entries = textfile.read_separated_lines_generator( csv_file, separator='\t', max_columns=8, ignore_lines_starting_with=['client_id'], keep_empty=True ) for entry in entries: file_idx = CommonVoiceReader.create_assets_if_needed( corpus, path, entry ) subset_utt_ids.append(file_idx) filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(subset_utt_ids)) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(subset_idx, subview)
def _load(self, path): corpus = audiomate.Corpus(path=path) meta_file = os.path.join(path, META_FILENAME) records = textfile.read_separated_lines_generator(meta_file, separator='\t', max_columns=4) for record in records: idx = record[0] speaker_idx = record[1] language = record[2] transcript = record[3] file_path = os.path.join(path, 'audio', language, '{}.mp3'.format(idx)) corpus.new_file(file_path, idx) if speaker_idx not in corpus.issuers.keys(): issuer = issuers.Speaker(speaker_idx) corpus.import_issuers(issuer) utterance = corpus.new_utterance(idx, idx, speaker_idx) utterance.set_label_list( annotations.LabelList.create_single( transcript, idx=audiomate.corpus.LL_WORD_TRANSCRIPT)) return corpus
def load_speakers(path): entries = textfile.read_separated_lines_generator( path, separator='|', max_columns=5, ignore_lines_starting_with=[';']) speakers = {} for item in entries: idx = item[0].strip() gender_str = item[1].strip() if gender_str == 'M': gender = issuers.Gender.MALE elif gender_str == 'F': gender = issuers.Gender.FEMALE else: gender = issuers.Gender.UNKNOWN issuer = issuers.Speaker(idx, gender=gender) speakers[idx] = issuer return speakers
def _load_audio_list(self, path): """ Load and filter the audio list. Args: path (str): Path to the audio list file. Returns: dict: Dictionary of filtered sentences (id : username, license, attribution-url) """ result = {} for entry in textfile.read_separated_lines_generator(path, separator='\t', max_columns=4): for index, _ in enumerate(entry): if entry[index] == '\\N': entry[index] = None if len(entry) < 4: entry.extend([None] * (4 - len(entry))) if not self.include_empty_licence and entry[2] is None: continue if self.include_licenses is not None and entry[ 2] not in self.include_licenses: continue result[entry[0]] = entry[1:] return result
def read_label_file(path): """ Read the labels from an audacity label file. Args: path (str): Path to the label file. Returns: list: List of labels (start [sec], end [sec], label) Example:: >>> read_label_file('/path/to/label/file.txt') [ [0.0, 0.2, 'sie'], [0.2, 2.2, 'hallo'] ] """ labels = [] for record in textfile.read_separated_lines_generator(path, separator='\t', max_columns=3): value = '' if len(record) > 2: value = str(record[2]) labels.append([ float(_clean_time(record[0])), float(_clean_time(record[1])), value ]) return labels
def load_subset(corpus, path, subset_idx): """ Load subset into corpus. """ csv_file = os.path.join(path, '{}.csv'.format(subset_idx)) utt_ids = [] for entry in textfile.read_separated_lines_generator( csv_file, separator=',', max_columns=8, ignore_lines_starting_with=['filename']): rel_file_path = entry[0] filename = os.path.split(rel_file_path)[1] basename = os.path.splitext(filename)[0] transcription = entry[1] age = CommonVoiceReader.map_age(entry[4]) gender = CommonVoiceReader.map_gender(entry[5]) idx = '{}-{}'.format(subset_idx, basename) file_path = os.path.join(path, rel_file_path) corpus.new_file(file_path, idx) issuer = assets.Speaker(idx, gender=gender, age_group=age) corpus.import_issuers(issuer) utterance = corpus.new_utterance(idx, idx, issuer.idx) utterance.set_label_list( assets.LabelList.create_single( transcription, idx=audiomate.corpus.LL_WORD_TRANSCRIPT)) utt_ids.append(idx) filter = subset.MatchingUtteranceIdxFilter(utterance_idxs=set(utt_ids)) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(subset_idx, subview)
def load_transcripts(path): entries = textfile.read_separated_lines_generator( path, separator=' ', max_columns=2 ) return {x[0].strip(): x[1].strip() for x in entries}
def read_training_transcripts(path): transcripts = [] for entry in textfile.read_separated_lines_generator( path, separator=',', max_columns=3, ignore_lines_starting_with=['wav_filename']): transcripts.append(entry[2]) return transcripts
def read_training_transcripts(path): transcripts = [] lines = textfile.read_separated_lines_generator( path, separator=",", max_columns=3, ignore_lines_starting_with=["wav_filename"]) for entry in tqdm.tqdm(list(lines)): transcripts.append(entry[2]) return transcripts
def load_speakers(path): entries = textfile.read_separated_lines_generator( path, separator=',', max_columns=6, ignore_lines_starting_with=['speakerId']) idx_to_speaker = {} for entry in entries: spk = FluentSpeechReader.parse_speaker_record(entry) idx_to_speaker[spk.idx] = spk return idx_to_speaker
def load_part(base_path, part_name, corpus, speakers): part_file_path = os.path.join(base_path, 'data', '{}_data.csv'.format(part_name)) entries = textfile.read_separated_lines_generator( part_file_path, separator=',', max_columns=7, ignore_lines_starting_with=[',']) part_ids = [] for entry in entries: file_path = entry[1] file_base = os.path.basename(file_path) idx = os.path.splitext(file_base)[0] speaker_idx = entry[2] part_ids.append(idx) if speaker_idx not in corpus.issuers.keys(): corpus.import_issuers(speakers[speaker_idx]) track = corpus.new_file(os.path.join(base_path, file_path), idx) utt = corpus.new_utterance(idx, track.idx, speaker_idx) transcription = annotations.LabelList.create_single( entry[3], idx=audiomate.corpus.LL_WORD_TRANSCRIPT) utt.set_label_list(transcription) if entry[4] != 'none': action = annotations.LabelList.create_single(entry[4], idx='action') utt.set_label_list(action) if entry[5] != 'none': object_label = annotations.LabelList.create_single( entry[5], idx='object') utt.set_label_list(object_label) if entry[6] != 'none': location = annotations.LabelList.create_single(entry[6], idx='location') utt.set_label_list(location) filter = subset.MatchingUtteranceIdxFilter( utterance_idxs=set(part_ids)) subview = subset.Subview(corpus, filter_criteria=[filter]) corpus.import_subview(part_name, subview)
def _load_sentence_list(self, path): """ Load and filter the sentence list. Args: path (str): Path to the sentence list. Returns: dict: Dictionary of sentences (id : language, transcription) """ result = {} for entry in textfile.read_separated_lines_generator(path, separator='\t', max_columns=3): if self.include_languages is None or entry[ 1] in self.include_languages: result[entry[0]] = entry[1:] return result
def read_file(path): """ Reads a ctm file. Args: path (str): Path to the file Returns: (dict): Dictionary with entries. Example:: >>> read_file('/path/to/file.txt') { 'wave-ab': [ ['1', 0.00, 0.07, 'HI', 1], ['1', 0.09, 0.08, 'AH', 1] ], 'wave-xy': [ ['1', 0.00, 0.07, 'HI', 1], ['1', 0.09, 0.08, 'AH', 1] ] } """ gen = textfile.read_separated_lines_generator( path, max_columns=6, ignore_lines_starting_with=[';;']) utterances = collections.defaultdict(list) for record in gen: values = record[1:len(record)] for i in range(len(values)): if i == 1 or i == 2 or i == 4: values[i] = float(values[i]) utterances[record[0]].append(values) return utterances
def load_projections(projections_file): """ Loads projections defined in the given `projections_file`. The `projections_file` is expected to be in the following format:: old_label_1 | new_label_1 old_label_1 old_label_2 | new_label_2 old_label_3 | You can define one projection per line. Each projection starts with a list of one or multiple old labels (separated by a single whitespace) that are separated from the new label by a pipe (`|`). In the code above, the segment labeled with `old_label_1` will be labeled with `new_label_1` after applying the projection. Segments that are labeled with `old_label_1` **and** `old_label_2` concurrently are relabeled to `new_label_2`. All segments labeled with `old_label_3` are dropped. Combinations of multiple labels are automatically sorted in natural order. Args: projections_file (str): Path to the file with projections Returns: dict: Dictionary where the keys are tuples of labels to project to the key's value Example: >>> load_projections('/path/to/projections.txt') {('b',): 'foo', ('a', 'b'): 'a_b', ('a',): 'bar'} """ projections = {} for parts in textfile.read_separated_lines_generator( projections_file, '|'): combination = tuple( sorted([label.strip() for label in parts[0].split(' ')])) new_label = parts[1].strip() projections[combination] = new_label return projections