def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0): #Pre: temp_sp = lu.ins_pid('temp.sp') temp_mgc = lu.ins_pid('temp.mgc') # Writing input data: lu.write_binfile(m_sp, temp_sp) if fft_len is 0: # case fft automatic fft_len = 2 * (np.size(m_sp, 1) - 1) # MCEP: curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % ( alpha, n_coeffs - 1, fft_len, in_type, temp_sp, temp_mgc) call(curr_cmd, shell=True) # Read MGC File: m_mgc = lu.read_binfile(temp_mgc, n_coeffs) # Deleting temp files: os.remove(temp_sp) os.remove(temp_mgc) #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc return m_mgc
def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0): #Pre: temp_sp = lu.ins_pid('temp.sp') temp_mgc = lu.ins_pid('temp.mgc') # Writing input data: lu.write_binfile(m_sp, temp_sp) if fft_len is 0: # case fft automatic fft_len = 2*(np.size(m_sp,1) - 1) # MCEP: curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % (alpha, n_coeffs-1, fft_len, in_type, temp_sp, temp_mgc) call(curr_cmd, shell=True) # Read MGC File: m_mgc = lu.read_binfile(temp_mgc , n_coeffs) # Deleting temp files: os.remove(temp_sp) os.remove(temp_mgc) #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc return m_mgc
def wavgen_magphase(gen_dir, file_id_list, cfg, logger): # Import MagPhase and libraries: sys.path.append(cfg.magphase_bindir) import libutils as lu import libaudio as la import magphase as mp nfiles = len(file_id_list) for nxf in xrange(nfiles): filename_token = file_id_list[nxf] logger.info('Creating waveform for %4d of %4d: %s' % (nxf + 1, nfiles, filename_token)) # Post-Filter: if cfg.do_post_filtering and not cfg.use_magphase_pf: mcep_file = os.path.join(gen_dir, filename_token + '.mcep') mcep_file_pf = os.path.join(gen_dir, filename_token + '_pf.mcep') mag_file = os.path.join(gen_dir, filename_token + '.mag') # Mag to Mcep: m_mag_mel_log = lu.read_binfile(mag_file, dim=cfg.mag_dim) m_mcep = la.rceps(m_mag_mel_log, in_type='log', out_type='compact') lu.write_binfile(m_mcep, mcep_file) # Apply post-filter: post_filter(mcep_file, mcep_file_pf, cfg.mag_dim, cfg.pf_coef, cfg.fw_alpha, cfg.co_coef, cfg.fl, gen_dir, cfg) # Mcep to Mag: m_mcep_pf = lu.read_binfile(mcep_file_pf, dim=cfg.mag_dim) m_mag_mel_log_pf = la.mcep_to_sp_cosmat(m_mcep_pf, cfg.mag_dim, alpha=0.0, out_type='log') # Protection agains possible nans: m_mag_mel_log_pf[np.isnan(m_mag_mel_log_pf)] = la.MAGIC # Saving to file: lu.write_binfile(m_mag_mel_log_pf, mag_file) # Removing temp files: os.remove(mcep_file) os.remove(mcep_file_pf) # Synthesis: mp.synthesis_from_acoustic_modelling( gen_dir, filename_token, gen_dir, cfg.mag_dim, cfg.real_dim, cfg.sr, b_postfilter=(cfg.do_post_filtering and cfg.use_magphase_pf)) return
def feat_extraction(wav_file, out_feats_dir): # Constants: fft_len = 4096 mvf = 4500 nbins_mel = 60 nbins_phase = 45 # Parsing path: file_name_token = os.path.basename(os.path.splitext(wav_file)[0]) # Display: print("Analysing file: " + file_name_token + '.wav' + '................................') # Files setup: est_file = os.path.join(out_feats_dir, file_name_token + '.est') # Epochs detection: la.reaper(wav_file, est_file) # Feature extraction: m_mag_mel_log, m_real_mel, m_imag_mel, v_shift, v_lf0, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files2( wav_file, est_file, fft_len, mvf, f0_type='lf0', mag_mel_nbins=nbins_mel, cmplx_ph_mel_nbins=nbins_phase) if fs != fs_expected: print( "The wavefile's sample rate (%dHz) does not match the expected sample rate (%dHz)." % (fs, fs_expected)) sys.exit(1) # Zeros for unvoiced segments in phase features: v_voi = (np.exp(v_lf0) > 5.0).astype(int) # 5.0: tolerance (just in case) m_real_mel_zeros = m_real_mel * v_voi[:, None] m_imag_mel_zeros = m_imag_mel * v_voi[:, None] # Saving features: lu.write_binfile(m_mag_mel_log, out_feats_dir + '/' + file_name_token + '.mag') lu.write_binfile(m_real_mel_zeros, out_feats_dir + '/' + file_name_token + '.real') lu.write_binfile(m_imag_mel_zeros, out_feats_dir + '/' + file_name_token + '.imag') lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0') # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin. lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift') return
def feat_extraction(in_wav_dir, file_name_token, out_feats_dir, fft_len, mvf, nbins_mel=60, nbins_phase=45): # Display: print("Analysing file: " + file_name_token + '.wav') # Files setup: wav_file = in_wav_dir + '/' + file_name_token + '.wav' est_file = out_feats_dir + '/' + file_name_token + '.est' # Epochs detection: la.reaper(wav_file, est_file) # Feature extraction: m_mag_mel_log, m_real_mel, m_imag_mel, v_shift, v_lf0, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files2( wav_file, est_file, fft_len, mvf, f0_type='lf0', mag_mel_nbins=nbins_mel, cmplx_ph_mel_nbins=nbins_phase) # Zeros for unvoiced segments in phase features: v_voi = (np.exp(v_lf0) > 5.0).astype(int) # 5.0: tolerance (just in case) m_real_mel_zeros = m_real_mel * v_voi[:, None] m_imag_mel_zeros = m_imag_mel * v_voi[:, None] # Saving features: lu.write_binfile(m_mag_mel_log, out_feats_dir + '/' + file_name_token + '.mag') lu.write_binfile(m_real_mel_zeros, out_feats_dir + '/' + file_name_token + '.real') lu.write_binfile(m_imag_mel_zeros, out_feats_dir + '/' + file_name_token + '.imag') lu.write_binfile(v_lf0, out_feats_dir + '/' + file_name_token + '.lf0') # Saving auxiliary feature shift (hop length). It is useful for posterior modifications of labels in Merlin. lu.write_binfile(v_shift, out_feats_dir + '/' + file_name_token + '.shift') return
load_config(a.config) from __init__ import * with open(a.senlst) as f: sentences = [l.rstrip() for l in f if l] hts2 = [path.join(HTS2DIR, s + '.lab') for s in sentences] lab1 = [path.join(LAB1DIR, s + '.lab') for s in sentences] lab2 = [path.join(LAB2DIR, s + '.lab') for s in sentences] lab3 = [path.join(LAB3DIR, s + '.lab') for s in sentences] binarizer = HTSLabelNormalisation( question_file_name=path.join(RESDIR, '600.hed')) binarizer.perform_normalisation(hts2, lab1) remover = SilenceRemover(n_cmp=binarizer.dimension, silence_pattern=['*-#+*']) remover.remove_silence(lab1, hts2, lab2) normalizer = MinMaxNormalisation(feature_dimension=binarizer.dimension, min_value=0.01, max_value=0.99) normalizer.find_min_max_values(lab2) print1(normalizer.min_vector) print1(normalizer.max_vector) lu.write_binfile(normalizer.min_vector, path.join(LABSDIR, 'min')) lu.write_binfile(normalizer.max_vector, path.join(LABSDIR, 'max')) # normalizer.normalise_data(lab2, lab3)
def magphase_analysis(wav_file, outdir='', fft_len=None, nbins_mel=60, nbins_phase=45, pm_dir='', skip_low=False, cepstra=False): ''' Function to combine Felipe's analysis_lossless and analysis_compressed with little redundancy, and storing pitchmark files. ''' try: outdir_hi = os.path.join(outdir, 'high') outdir_lo = os.path.join(outdir, 'low') file_id = os.path.basename(wav_file).split(".")[0] # Read file: v_sig, fs = sf.read(wav_file) if not pm_dir: # Epoch detection: est_file = os.path.join(outdir, 'pm', file_id + '.pm') la.reaper(wav_file, est_file) else: est_file = os.path.join(pm_dir, file_id + '.pm') v_pm_sec, v_voi = la.read_reaper_est_file(est_file, check_len_smpls=len(v_sig), fs=fs) v_pm_smpls = v_pm_sec * fs # Spectral analysis: m_fft, v_shift = mp.analysis_with_del_comp_from_pm(v_sig, fs, v_pm_smpls, fft_len=fft_len) # Getting high-ress magphase feats: m_mag, m_real, m_imag, v_f0 = mp.compute_lossless_feats(m_fft, v_shift, v_voi, fs) ### write high-dimensional data: lu.write_binfile(m_mag, os.path.join(outdir_hi, 'mag', file_id + '.mag')) lu.write_binfile(m_real, os.path.join(outdir_hi, 'real', file_id + '.real')) lu.write_binfile(m_imag, os.path.join(outdir_hi, 'imag', file_id + '.imag')) lu.write_binfile(v_f0, os.path.join(outdir_hi, 'f0', file_id + '.f0')) lu.write_binfile(v_shift, os.path.join(outdir, 'shift', file_id + '.shift')) if not skip_low: # Low dimension (Formatting for Acoustic Modelling): m_mag_mel_log, m_real_mel, m_imag_mel, v_lf0_smth = mp.format_for_modelling(m_mag, m_real, m_imag, v_f0, fs, mag_dim=nbins_mel, phase_dim=nbins_phase) # fft_len = 2*(np.size(m_mag,1) - 1) ### write low-dim data: lu.write_binfile(m_mag_mel_log, os.path.join(outdir_lo, 'mag', file_id + '.mag')) lu.write_binfile(m_real_mel, os.path.join(outdir_lo, 'real', file_id + '.real')) lu.write_binfile(m_imag_mel, os.path.join(outdir_lo, 'imag', file_id + '.imag')) lu.write_binfile(v_lf0_smth, os.path.join(outdir_lo, 'lf0', file_id + '.lf0')) if cepstra: alpha = {48000: 0.77, 16000: 58}[fs] m_mag_mcep = la.sp_to_mcep(m_mag, n_coeffs=nbins_mel, alpha=alpha, in_type=3) m_real_mcep = la.sp_to_mcep(m_real, n_coeffs=nbins_phase, alpha=alpha, in_type=2) m_imag_mcep = la.sp_to_mcep(m_imag, n_coeffs=nbins_phase, alpha=alpha, in_type=2) lu.write_binfile(m_mag_mcep, os.path.join(outdir_lo, 'mag_cc', file_id + '.mag_cc')) lu.write_binfile(m_real_mcep, os.path.join(outdir_lo, 'real_cc', file_id + '.real_cc')) lu.write_binfile(m_imag_mcep, os.path.join(outdir_lo, 'imag_cc', file_id + '.imag_cc')) except KeyboardInterrupt, e: pass
mag = lu.read_binfile(path.join(ACO2DIR, s + '.mag'), dim=ax.MAG_DIM) real = lu.read_binfile(path.join(ACO2DIR, s + '.real'), dim=ax.REAL_DIM) imag = lu.read_binfile(path.join(ACO2DIR, s + '.imag'), dim=ax.IMAG_DIM) lf0 = lu.read_binfile(path.join(ACO2DIR, s+'.lf0'), dim=ax.LF0_DIM)\ .reshape([-1,ax.LF0_DIM]) x = ax.acoustic(mag=mag, real=real, imag=imag, lf0=lf0) if ds.EXP_F0: x[:, ax.LF0] = np.exp(x[:, ax.LF0]) v = ax.voicing(x, 1.0) else: v = ax.voicing(x, 0.0) x = ax.interpolate_f0(x) if ds.USE_DELTA: dx = ax.velocity(x) ddx = ax.acceleration(x) x = np.concatenate([x, dx, ddx, v], axis=1) else: x = np.concatenate([x, v], axis=1) lu.write_binfile(x, path.join(ACO3DIR, s + '.aco')) except (KeyboardInterrupt, SystemExit): raise except Exception as e: print2(e) pass else: print1(s)
x[:,ax.LF0][v <= mean[-1]] = 0.0 x[:,ax.LF0] = np.log(x[:,ax.LF0]) else: x[:,ax.LF0][v <= mean[-1]] = float('-Inf') # if a.plot_f0: # t = [n * 0.005 for n in range(x.shape[0])] # x2 = lu.read_binfile(path.join(ACO3DIR, sentence+'.aco'), dim=ds.AX_DIM) # x2[:,ax.LF0][x2[:,-1] == 0.0] = 0.0 # pyplot.plot(t, x2[:len(t),1]) # pyplot.plot(t, x[:,1])#np.exp(x[:,ax.LF0])) # # pyplot.plot(t, [y if y > 0 else 0.0 for y in x2[:len(t),0]]) # pyplot.savefig(path.join(outdir, sentence+'_mag.pdf')) # pyplot.close() lu.write_binfile(x[:,ax.MAG], path.join(outdir, sentence+'.mag')) lu.write_binfile(x[:,ax.REAL], path.join(outdir, sentence+'.real')) lu.write_binfile(x[:,ax.IMAG], path.join(outdir, sentence+'.imag')) lu.write_binfile(x[:,ax.LF0], path.join(outdir, sentence+'.lf0')) try: call('{script} -s {sentence} -o {outdir} -m {magdim} -p {phadim} -f merlin {const_rate}'\ .format(script=path.join(SRCDIR, 'wavify.py'), sentence=sentence, #vocdir=outdir, outdir=outdir, magdim=ax.MAG_DIM, phadim=ax.PHASE_DIM, const_rate='' if ds.CONST_RATE else '-v').split()) except Exception as e: print2(e)
from argparse import ArgumentParser if __name__ == '__main__': p = ArgumentParser() p.add_argument('-s', '--senlst', dest='senlst', required=True) p.add_argument('-c', '--config', dest='config', required=True) a = p.parse_args() load_config(a.config) from __init__ import * import acoustic as ax import dataset as ds with open(a.senlst) as f: sentences = [l.rstrip() for l in f if l] mean = np.zeros([1, ds.AX_DIM]) stddev = np.zeros([1, ds.AX_DIM]) n = 0 for s in sentences: data = lu.read_binfile(path.join(ACO3DIR, s + '.aco'), dim=ds.AX_DIM) for dt in data: mean += dt stddev += np.multiply(dt, dt) n += 1 mean /= n stddev = np.sqrt(stddev / n - np.multiply(mean, mean)) lu.write_binfile(mean, path.join(STTDIR, 'mean')) lu.write_binfile(stddev, path.join(STTDIR, 'stddev'))