def feat_extraction_magphase(in_wav_dir, file_id_list, cfg, logger, b_multiproc=False): sys.path.append(cfg.magphase_bindir) import libutils as lu import magphase as mp def feat_extraction_magphase_one_file(in_wav_dir, file_name_token, acous_feats_dir, cfg, logger): # Logging: logger.info('Analysing waveform: %s.wav' % (file_name_token)) # File setup: wav_file = os.path.join(in_wav_dir, file_name_token + '.wav') # Feat extraction: mp.analysis_for_acoustic_modelling(wav_file, out_dir=acous_feats_dir, mag_dim=cfg.mag_dim, phase_dim=cfg.real_dim, b_const_rate=cfg.magphase_const_rate) return if b_multiproc: lu.run_multithreaded(feat_extraction_magphase_one_file, in_wav_dir, file_id_list, cfg.acous_feats_dir, cfg, logger) else: for file_name_token in file_id_list: feat_extraction_magphase_one_file(in_wav_dir, file_name_token, cfg.acous_feats_dir, cfg, logger) return
def feat_extraction_magphase(in_wav_dir, file_id_list, cfg, logger, b_multiproc=False): sys.path.append(cfg.magphase_bindir) import libutils as lu import magphase as mp def feat_extraction_magphase_one_file(in_wav_dir, file_name_token, acous_feats_dir, cfg, logger): # Logging: logger.info('Analysing waveform: %s.wav' % (file_name_token)) # File setup: wav_file = os.path.join(in_wav_dir, file_name_token + '.wav') # Feat extraction: mp.analysis_for_acoustic_modelling( wav_file, out_dir=acous_feats_dir, mag_dim=cfg.mag_dim, phase_dim=cfg.real_dim, b_const_rate=cfg.magphase_const_rate) return if b_multiproc: lu.run_multithreaded(feat_extraction_magphase_one_file, in_wav_dir, file_id_list, cfg.acous_feats_dir, cfg, logger) else: for file_name_token in file_id_list: feat_extraction_magphase_one_file(in_wav_dir, file_name_token, cfg.acous_feats_dir, cfg, logger) return
lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0') # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin. lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift') return if __name__ == '__main__': # CONSTANTS: So far, the vocoder has been tested only with the following constants:=== fft_len = 4096 fs = 48000 # INPUT:============================================================================== files_scp = '../data/file_id.scp' # List of file names (tokens). Format used by Merlin. in_wav_dir = '../data/wavs_nat' # Directory with the wavfiles to extract the features from. out_feats_dir = '../data/params' # Output directory that will contain the extracted features. mvf = 4500 # Maximum voiced frequency (Hz) # FILES SETUP:======================================================================== lu.mkdir(out_feats_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, out_feats_dir, fft_len, mvf) print('Done!')
# Read file list: file_id_list = pars_acous_train['Paths']['file_id_list'] l_file_tokns = lu.read_text_file2(file_id_list, dtype='string', comments='#').tolist() acoustic_feats_path = pars_acous_train['Paths']['in_acous_feats_dir'] # Acoustic Feature Extraction:------------------------------------------------------------- if b_feat_extr: # Extract features: lu.mkdir(acoustic_feats_path) if b_feat_ext_multiproc: lu.run_multithreaded( feat_extraction, join(exper_path, 'acoustic_model', 'data', 'wav'), l_file_tokns, acoustic_feats_path, d_mp_opts) else: for file_name_token in l_file_tokns: feat_extraction( join(exper_path, 'acoustic_model', 'data', 'wav'), file_name_token, acoustic_feats_path, d_mp_opts) # Labels Conversion to Variable Frame Rate:------------------------------------------------ if b_conv_labs_rate and not d_mp_opts[ 'b_const_rate']: # NOTE: The script ./script/label_st_align_to_var_rate.py can be also called from comand line directly. label_state_align = join(exper_path, 'acoustic_model', 'data', 'label_state_align') label_state_align_var_rate = pars_acous_train['Labels']['label_align'] fs = int(pars_acous_train['Waveform']['samplerate']) ltvr.convert(file_id_list, label_state_align, acoustic_feats_path, fs,
# File setup: wav_file = os.path.join(in_wav_dir, file_name_token + '.wav') mp.analysis_compressed(wav_file, out_dir=out_feats_dir) return if __name__ == '__main__': # INPUT:============================================================================== files_scp = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin. in_wav_dir = '../data_48k/wavs_nat' # Directory with the wavfiles to extract the features from. out_feats_dir = '../data_48k/params' # Output directory that will contain the extracted features. # FILES SETUP:======================================================================== lu.mkdir(out_feats_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, out_feats_dir) # For debug (Don't remove): #for file_name_token in l_file_tokns: # feat_extraction(in_wav_dir, file_name_token, out_feats_dir) print('Done!')
return def get_wav_filelist(wav_dir): wav_files = [] for file in os.listdir(wav_dir): whole_filepath = os.path.join(wav_dir, file) if os.path.isfile(whole_filepath) and str(whole_filepath).endswith( ".wav"): wav_files.append(whole_filepath) elif os.path.isdir(whole_filepath): wav_files += get_wav_filelist(whole_filepath) wav_files.sort() return wav_files # FILES SETUP:======================================================================== lu.mkdir(out_dir) l_wavfiles = get_wav_filelist(wav_dir) # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, l_wavfiles, out_dir) # For debugging (don't delete): #for wavfile in l_wavfiles: # feat_extraction(wavfile, out_dir) print('Done!')
shutil.copytree(os.path.dirname(mp.__file__), os.path.join(exper_path, 'backup_magphase_code')) shutil.copy2(__file__, os.path.join(exper_path, 'conf')) # Read file list: l_file_tokns = lu.read_text_file2(os.path.join(exper_path, file_id_list), dtype='string', comments='#').tolist() if b_feat_extr: # Extract features: acoustic_feats_path = os.path.join(exper_path, acoustic_feats_dir) lu.mkdir(acoustic_feats_path) if b_feat_ext_multiproc: lu.run_multithreaded(feat_extraction, in_wav_dir, l_file_tokns, acoustic_feats_path, d_mp_opts) else: for file_name_token in l_file_tokns: feat_extraction(in_wav_dir, file_name_token, acoustic_feats_path, d_mp_opts) if b_config_merlin or b_wavgen: # Edit Merlin's config file: parser = configparser.ConfigParser() parser.optionxform = str parser.read([os.path.join(exper_path, 'conf/config_base.conf')]) parser['DEFAULT']['TOPLEVEL'] = exper_path parser['Paths']['file_id_list'] = "%(work)s/" + file_id_list parser['Labels']['question_file_name'] = os.path.join( exper_path, question_file_name)
raise except: print2('Error while extracting features from', path.join(wavdir, sentence + '.wav'), 'to', outdir) flush2() else: print1(sentence) flush1() if __name__ == '__main__': p = ArgumentParser() p.add_argument('-s', '--senlst', dest='senlst', required=True) p.add_argument('-c', '--config', dest='config', required=True) p.add_argument('-d', '--debug', dest='debug', action='store_true') a = p.parse_args() load_config(a.config) from __init__ import * CONST_RATE = cfg_data.get('const', True) with open(a.senlst) as f: sentences = [l.rstrip() for l in f] if a.debug: for s in sentences: extract(s, WAVDIR, ACO1DIR, CONST_RATE) else: lu.run_multithreaded(extract, sentences, WAVDIR, ACO1DIR, CONST_RATE)
# INPUT:============================================================================== files_scp = '../demos/data_48k/file_id_predict.scp' # List of file names (tokens). Format used by Merlin. in_feats_dir = '../demos/data_48k/params_predicted' # Input directory that contains the predicted features. out_syn_dir = '../demos/data_48k/wavs_syn_from_predicted' # Where the synthesised waveform will be stored. mag_dim = 60 # Number of Mel-scaled frequency bins. phase_dim = 45 # Number of Mel-scaled frequency bins kept for phase features (real and imag). It must be <= mag_dim pf_type = 'magphase' # "magphase": MagPhase's own postfilter (in development) # "merlin": Merlin's style postfilter. # "no": No postfilter. b_multiproc = False # If True, it synthesises using all the available cores in parallel. If False, it just uses one core (slower). # FILES SETUP:======================================================================== lu.mkdir(out_syn_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # PROCESSING:========================================================================= if b_multiproc: lu.run_multithreaded(synthesis, in_feats_dir, l_file_tokns, out_syn_dir, mag_dim, phase_dim, fs, pf_type) else: for file_tokn in l_file_tokns: synthesis(in_feats_dir, file_tokn, out_syn_dir, mag_dim, phase_dim, fs, pf_type) print('Done!')
files_scp = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin. in_feats_dir = '../data_48k/params' # Input directory that contains the predicted features. out_syn_dir = '../data_48k/wavs_syn_merlin' # Where the synthesised waveform will be stored. nbins_mel = 60 # Number of Mel-scaled frequency bins. nbins_phase = 45 # Number of Mel-scaled frequency bins kept for phase features (real and imag). It must be <= nbins_mel b_postfilter = True # If True, the MagPhase vocoder post-filter is applied. Note: If you want to use the one included in Merlin, disable this one. b_parallel = False # If True, it synthesises using all the available cores in parallel. If False, it just uses one core (slower). # FILES SETUP:======================================================================== lu.mkdir(out_syn_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # PROCESSING:========================================================================= if b_parallel: lu.run_multithreaded(synthesis, in_feats_dir, l_file_tokns, out_syn_dir, nbins_mel, nbins_phase, fs, fft_len, b_postfilter) else: for file_tokn in l_file_tokns: synthesis(in_feats_dir, file_tokn, out_syn_dir, nbins_mel, nbins_phase, fs, fft_len, b_postfilter) print('Done!')
fs = 48000 # INPUT:============================================================================== files_scp = '../data_48k/file_id.scp' # List of file names (tokens). Format used by Merlin. in_feats_dir = '../data_48k/params' # Input directory that contains the predicted features. out_syn_dir = '../data_48k/wavs_syn_merlin' # Where the synthesised waveform will be stored. nbins_mel = 60 # Number of Mel-scaled frequency bins. nbins_phase = 45 # Number of Mel-scaled frequency bins kept for phase features (real and imag). It must be <= nbins_mel b_postfilter = True # If True, the MagPhase vocoder post-filter is applied. Note: If you want to use the one included in Merlin, disable this one. b_parallel = False # If True, it synthesises using all the available cores in parallel. If False, it just uses one core (slower). # FILES SETUP:======================================================================== lu.mkdir(out_syn_dir) l_file_tokns = lu.read_text_file2(files_scp, dtype='string', comments='#').tolist() # PROCESSING:========================================================================= if b_parallel: lu.run_multithreaded(synthesis, in_feats_dir, l_file_tokns, out_syn_dir, nbins_mel, nbins_phase, fs, b_postfilter) else: for file_tokn in l_file_tokns: synthesis(in_feats_dir, file_tokn, out_syn_dir, nbins_mel, nbins_phase, fs, b_postfilter) print('Done!')
save_config(pars_acous_synth, join(acous_model_conf_path, 'acous_synth.conf')) copy2(join(this_dir, 'conf_base', 'logging_config.conf'), join(exper_path, 'acoustic_model', 'conf', 'logging_config.conf')) # Read file list: file_id_list = pars_acous_train['Paths']['file_id_list'] l_file_tokns = lu.read_text_file2(file_id_list, dtype='string', comments='#').tolist() acoustic_feats_path = pars_acous_train['Paths']['in_acous_feats_dir'] # Acoustic Feature Extraction:------------------------------------------------------------- if b_feat_extr: # Extract features: lu.mkdir(acoustic_feats_path) if b_feat_ext_multiproc: lu.run_multithreaded(feat_extraction, join(exper_path, 'acoustic_model', 'data', 'wav'), l_file_tokns, acoustic_feats_path, d_mp_opts) else: for file_name_token in l_file_tokns: feat_extraction(join(exper_path, 'acoustic_model', 'data', 'wav'), file_name_token, acoustic_feats_path, d_mp_opts) # Labels Conversion to Variable Frame Rate:------------------------------------------------ if b_conv_labs_rate and not d_mp_opts['b_const_rate']: # NOTE: The script ./script/label_st_align_to_var_rate.py can be also called from comand line directly. label_state_align = join(exper_path, 'acoustic_model', 'data', 'label_state_align') label_state_align_var_rate = pars_acous_train['Labels']['label_align'] fs = int(pars_acous_train['Waveform']['samplerate']) ltvr.convert(file_id_list,label_state_align, acoustic_feats_path, fs, label_state_align_var_rate) # Run duration training:------------------------------------------------------------------- if b_dur_train: call([submit_path, run_merlin_path, join(dur_model_conf_path, 'dur_train.conf')])
return def get_wav_filelist(wav_dir): wav_files = [] for file in os.listdir(wav_dir): whole_filepath = os.path.join(wav_dir, file) if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".wav"): wav_files.append(whole_filepath) elif os.path.isdir(whole_filepath): wav_files += get_wav_filelist(whole_filepath) wav_files.sort() return wav_files # FILES SETUP:======================================================================== lu.mkdir(out_dir) l_wavfiles = get_wav_filelist(wav_dir) # MULTIPROCESSING EXTRACTION:========================================================== lu.run_multithreaded(feat_extraction, l_wavfiles, out_dir) # For debugging (don't delete): #for wavfile in l_wavfiles: # feat_extraction(wavfile, out_dir) print('Done!')