def make_meta(self, root_dir: str): # Use all audio files # directory names print('Lookup files ...') mixture_list = glob.glob( os.path.join(root_dir, '*', '*', 'mixture.*.npy')) # It only extract vocals. If you wanna use other source, override it. vocals_list = glob.glob( os.path.join(root_dir, '*', '*', 'vocals.*.npy')) # make meta dict print('Make meta information ...') # setup meta self._meta['mixture_filename'] = sorted(mixture_list) self._meta['voice_filename'] = sorted(vocals_list) # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def make_meta(self, mixtures, vocals, backgrounds): # lookup subset files and concatenate all mixtures = [ f for parent in mixtures for f in glob.glob(parent.replace('.npy', '.*.npy')) ] vocals = [ f for parent in vocals for f in glob.glob(parent.replace('.npy', '.*.npy')) ] backgrounds = [ f for parent in backgrounds for f in glob.glob(parent.replace('.npy', '.*.npy')) ] # setup meta self._meta['mixture_filename'] = sorted(mixtures) self._meta['voice_filename'] = sorted(vocals) self._meta['background_filename'] = sorted(backgrounds) # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def make_meta(self, wav_file_list: List[str], text_file_list: List[str]): # make mapper info = {'audio_filename': {}, 'text': {}, 'phoneme': {}, 'speaker': {}} speakers = [] # parse wav keys for wav_path in wav_file_list: key = os.path.basename(wav_path).replace('.wav', '') info['audio_filename'][key] = wav_path speaker = key.split('_')[0] speakers.append(speaker) # speaker speakers = {spk: idx for idx, spk in enumerate(list(set(speakers)))} for key in info['audio_filename'].keys(): speaker = key.split('_')[0] info['speaker'][key] = speakers[speaker] # parse texts # kog2p kog2p = KoG2P() for text_path in text_file_list: # read text file with open(text_path, 'r') as r: for line in r.readlines(): spl = line.split(' ') key, text = spl[0], ' '.join(spl[1:]) info['text'][key] = text info['phoneme'][key] = kog2p.g2p(text) # change meta obj self._meta = pd.DataFrame(info) self._meta['pass'] = [True] * len(self._meta) # read duration print('Check durations on wave files ...') dur_list = self._process_duration(self._meta['audio_filename'].values, settings.MIN_WAV_RATE, settings.MAX_WAV_RATE) self._meta['duration'] = dur_list # filter passed rows self._meta = self._meta[self._meta['pass'].values] self._meta = self._meta.dropna() # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.05) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def make_meta(self, root_dir: str, min_wav_rate: int, max_wav_rate: int, min_txt_rate: float): # speakers print('list up speakers') speakers = os.listdir(root_dir) # look up files print('lookup files...') wav_file_list = [] speaker_mult = [] for speaker in tqdm(speakers): file_temp = glob.glob( os.path.join(root_dir, speaker, 'wav', '*.wav')) wav_file_list.extend(file_temp) speaker_mult.extend(list(repeat(speaker, len(file_temp)))) print('Update meta infos') speaker_mappings = { spk: idx for idx, spk in enumerate(sorted(speakers)) } # update infos self._meta['speaker'] = [speaker_mappings[idx] for idx in speaker_mult] self._meta['audio_filename'] = wav_file_list self._meta['pass'] = [True] * len(speaker_mult) # read duration print('Check durations on wave files ...') dur_list = self._process_duration(wav_file_list, min_wav_rate, max_wav_rate) self._meta['duration'] = dur_list # text process print('Text pre-process ... ') txt_file_list = [ file_path.replace('wav', 'txt') for file_path in wav_file_list ] self._process_txt(txt_file_list, dur_list, min_txt_rate) # filter passed rows self._meta = self._meta[self._meta['pass'].values] # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def make_meta(self, wav_file_list: List[str], txt_info: pd.DataFrame): # dictionary for making data frame infos = {'audio_filename': {}, 'text': {}} # make wav file / id mapping wav_mapping = { os.path.basename(wav_file_path).split('.')[0]: wav_file_path for wav_file_path in wav_file_list } # mapping texts for row in txt_info.iterrows(): val_series = row[1] # ['id', 'text', 'normalized_text'] id_, norm_text = val_series['id'], val_series['normalized_text'] wav_file_path = wav_mapping[id_] infos['audio_filename'][id_] = wav_file_path infos['text'][id_] = norm_text # change meta obj self._meta = pd.DataFrame(infos) self._meta['pass'] = [True] * len(self._meta) # read duration print('Check durations on wave files ...') dur_list = self._process_duration(self._meta['audio_filename'].values, settings.MIN_WAV_RATE, settings.MAX_WAV_RATE) self._meta['duration'] = dur_list # filter passed rows self._meta = self._meta[self._meta['pass'].values] self._meta = self._meta.dropna() # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def make_meta(self): # convert from related path to absolute path audio_file_names = self._meta['audio_filename'] midi_file_names = self._meta['midi_filename'] new_audio_files = [ os.path.join(self.root_dir, name) for name in audio_file_names ] new_midi_files = [ os.path.join(self.root_dir, name) for name in midi_file_names ] self._meta['audio_filename'] = new_audio_files self._meta['midi_filename'] = new_midi_files # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.root_dir, self._meta, train_meta, val_meta)
def make_meta(self, chunk_file_list, speakers, val_rate: float = 0.1): # make dict info = {'audio_filename': chunk_file_list, 'speaker': speakers} # change meta obj self._meta = pd.DataFrame(info) # make speaker as indices speaker_mappings = { spk: idx for idx, spk in enumerate( sorted(list(set(self._meta['speaker'].values)))) } # update infos self._meta['speaker'] = [ speaker_mappings[spk] for spk in self._meta['speaker'].values ] self._meta['pass'] = [True] * len(self._meta) # read duration print('Check durations on wave files ...') dur_list = self._process_duration(self._meta['audio_filename'].values, 0, 0) self._meta['duration'] = dur_list # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=val_rate) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta) # save speaker map as json spk_json_path = os.path.join(self.meta_path, 'speaker_map.json') with open(spk_json_path, 'w') as w: json.dump(speaker_mappings, w)
def make_meta(self, root_dir: str, wav_file_list: List[str] = None, txt_file_list: List[str] = None): # composed by three keys # train or valid # ㄴ id # ㄴ ㄴ data type # lookup files print('Lookup if not provided lists') if not wav_file_list: wav_file_list = glob.glob( os.path.join(root_dir, '*', 'wav', '*.wav')) if not txt_file_list: txt_file_list = glob.glob( os.path.join(root_dir, '*', 'txt', '*.txt')) infos = {'speaker': {}, 'audio_filename': {}, 'text': {}} # match wav_match_dict = { os.path.basename(wav_file_path).replace('.wav', ''): wav_file_path for wav_file_path in wav_file_list } txt_match_dict = { os.path.basename(txt_file_path).replace('.txt', ''): txt_file_path for txt_file_path in txt_file_list } print('Mapping information with their keys') for key, wav_file_path in tqdm(wav_match_dict.items()): if key in txt_match_dict: speaker = wav_file_path.split('/')[-3] infos['speaker'][key] = speaker infos['audio_filename'][key] = wav_file_path infos['text'][key] = txt_match_dict[key] print('Matching is completed ...') # change meta obj self._meta = pd.DataFrame(infos) # make speaker as indices speaker_mappings = { spk: idx for idx, spk in enumerate( sorted(list(set(self._meta['speaker'].values)))) } # update infos self._meta['speaker'] = [ speaker_mappings[spk] for spk in self._meta['speaker'].values ] self._meta['pass'] = [True] * len(self._meta) # read duration print('Check durations on wave files ...') dur_list = self._process_duration(self._meta['audio_filename'].values, settings.MIN_WAV_RATE, settings.MAX_WAV_RATE) self._meta['duration'] = dur_list # text process print('Text pre-process ... ') self._process_txt(self._meta['text'].values, dur_list, 0.0) # filter passed rows self._meta = self._meta[self._meta['pass'].values] # split train / val print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1) # save data frames print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta)
def libri_light2(self, all_meta: str, out_dir: str, min_duration: float = 2, max_duration: float = 15.): # mkdir chunk_dir = os.path.join(out_dir, 'chunks') meta_dir = os.path.join(out_dir, 'meta') os.makedirs(chunk_dir, exist_ok=True) os.makedirs(meta_dir, exist_ok=True) # load meta df = pd.read_json(all_meta) # split df into two frames df_under = df[df['duration'] <= max_duration] df_upper = df[df['duration'] > max_duration] # chunking upper_input_list = df_upper['audio_filename'] upper_output_list = [ os.path.join(chunk_dir, os.path.basename(p)) for p in upper_input_list ] print('Start Audio Processing ...') results = Parallel(n_jobs=__class__.num_workers)( delayed(split_and_save)(*args, min_duration * settings.SAMPLE_RATE, max_duration * settings.SAMPLE_RATE) for args in tqdm( list(zip(upper_input_list, upper_output_list, df['speaker'])))) # return list of tuples chunk_paths, chunk_lens, speakers = [], [], [] for cp, cl, spk in results: chunk_paths.append(cp) chunk_lens.append(cl) speakers.append(spk) # spread out chunk_paths = [path for subset in chunk_paths for path in subset] chunk_lens = [ chunk_len for subset in chunk_lens for chunk_len in subset ] speakers = [spk for subset in speakers for spk in subset] assert len(chunk_paths) == len(chunk_lens) # make df upper_chunk_info = { 'audio_filename': chunk_paths, 'duration': chunk_lens, 'speaker': speakers, 'pass': [True] * len(chunk_paths) } upper_chunks_df = pd.DataFrame(upper_chunk_info) # update indices df_under.index = pd.Index(list(range(len(df_under)))) upper_chunks_df.index = pd.Index( list(range(len(df_under), len(df_under) + len(upper_chunks_df)))) # combine with original df new_df = pd.concat([df_under, upper_chunks_df]) # filter minimum sr new_df = new_df[new_df['duration'] >= min_duration] # make train/valid df print('Make train / val meta') train_meta, val_meta = split_train_val_frame(new_df, val_rate=0.1) # save data frames print('Save meta frames on {}'.format(' '.join( LibriLightMeta.frame_file_names))) LibriLightMeta.save_meta(LibriLightMeta.frame_file_names, meta_dir, new_df, train_meta, val_meta)
def make_meta(self, root_dir: str, meta_dir: str = None, filter_non_vocal: bool = True): if not meta_dir: meta_dir = MEDLEYDB_META_DIR # 1. load meta print('Load MedleyDB meta info') meta = load_meta(meta_dir) # 2. filter meta from given audio files meta_match_mixkey = { record['mix_filename'].replace('.wav', '.npy'): record for record in meta } # 3. load all mixture files print('Lookup mix files') mix_file_list = [ p.replace('.wav', '.npy') for p in glob.glob(os.path.join(root_dir, '**', '**', '*.wav')) ] # 4. get mix/vocal pairs print('Matching mix / vocal pairs') pair_meta = get_mix_vocal_pairs(mix_file_list, meta_match_mixkey, ext='npy') if filter_non_vocal: pair_meta = { key: voices for key, voices in pair_meta.items() if voices } # Each songs have a vocal track or several vocal tracks. # So, If the song has several tracks, It can be merged as one file being able to load their own fastly. # "pair_meta" looks like, {"mixture_filename": ["first_vocal_filename", "second..."]} # Make a function to load and to merge vocal files mix_paths, voice_paths = map(list, zip(*pair_meta.items())) # do parallel print('Merging multi-vocal-tracks ...') results = go_multiprocess(load_and_merge_audios, list(pair_meta.items())) out_path_list, source_numbers = map(list, zip(*results)) # make meta values filtered_zips = [ (m, v, s) for m, v, s in zip(mix_paths, out_path_list, source_numbers) if v != -1 ] mix_results, voice_results, source_numbers = map( list, zip(*filtered_zips)) # make track length column voice_track_lengths = [] for v, s in zip(voice_results, source_numbers): if s < 2: voice_track_lengths.append(s) else: voice_track_lengths.append(2) # make meta self._meta['mixture_filename'] = mix_results self._meta['voice_filename'] = voice_results self._meta['voice_tracks'] = voice_track_lengths print('Make train / val meta') train_meta, val_meta = split_train_val_frame(self._meta, val_rate=0.1, label_key='voice_tracks') print('Save meta frames on {}'.format(' '.join(self.frame_file_names))) self.save_meta(self.frame_file_names, self.meta_path, self._meta, train_meta, val_meta) print('Done')