def test_file_wav(self, file_name, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ stft, mel = self.read_wav_file(file_name) singer_1 = utils.get_embedding_GE2E(file_name) speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index]] mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2)) out_mel = self.process_file(mel, singer_1, singer_2, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}} self.plot_features(plot_dict) out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) audio_out_2 = sig_process.feats_to_audio(mel_2) file_name = file_name.split('/')[-1].split('.')[0] sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name, config.singers[speaker_index])), audio_out, config.fs) sf.write(os.path.join(config.output_dir,'./{}_{}_target.wav'.format(file_name,config.singers[speaker_index])), audio_out_2, config.fs) audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'./{}_ori.wav'.format(file_name)), audio, config.fs)
def test_file_hdf5(self, file_name): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_hdf5_file(file_name) out_mel, out_f0, out_vuv = self.process_file(stft, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} synth = utils.query_yes_no("Synthesize output? ") if synth: out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) sf.write(os.path.join(config.output_dir,'{}_{}_SIN.wav'.format(file_name[:-4], config.singers[speaker_index_2])), audio_out, config.fs) synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def test_file_hdf5(self, file_name, speaker_index_2): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, notes = self.read_hdf5_file(file_name) speaker_name = file_name.split('_')[1] speaker_index = config.singers.index(speaker_name) speaker_gender = config.genders[speaker_name] print("Original singer is {}, a human {}".format( speaker_name, speaker_gender)) speaker_2_gender = config.genders[config.singers[speaker_index_2]] print("Target singer is {}, a human {}".format( config.singers[speaker_index_2], speaker_2_gender)) if speaker_gender == "F" and speaker_2_gender == "M": notes[:, 0] = notes[:, 0] - 12 elif speaker_gender == "M" and speaker_2_gender == "F": notes[:, 0] = notes[:, 0] + 12 out_mel, out_f0, out_vuv = self.process_file(mel, speaker_index, speaker_index_2, notes, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0, "notes": notes[:,0]}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, './{}_{}_autovc_notes.wav'.format( file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join( config.output_dir, './{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)
def test_file_wav_f0(self, file_name, f0_file): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_wav_file(file_name) # import pdb;pdb.set_trace() timestamps = np.arange(0, len(mel)*config.hoptime, config.hoptime) f0 = midi_process.open_f0_file(f0_file) f1 = vamp_notes.note2traj(f0, timestamps) f1 = sig_process.process_pitch(f1[:,0]) out_mel, out_f0, out_vuv = self.process_file(stft, self.sess) # plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ # "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": f1[:,1], "op": out_vuv}} # self.plot_features(plot_dict) file_name = file_name.split('/')[-1] # synth_sac = utils.query_yes_no("Synthesize with SAC f0? ") # if synth_sac: out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_SACf0.wav'.format(file_name[:-4])), audio_out, config.fs) # synth = utils.query_yes_no("Synthesize output? ") f2 = f1[:,0:1] + np.random.rand(f1[:,0:1].shape[0])[:,np.newaxis] # if synth: out_featss = np.concatenate((out_mel[:f1.shape[0]], f1[:,0:1], out_vuv[:f1.shape[0]]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_f0_{}.wav'.format(file_name[:-4], f0_file.split('/')[-1])), audio_out, config.fs) out_featss = np.concatenate((out_mel[:f1.shape[0]], f2, out_vuv[:f1.shape[0]]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_f0_{}_noise.wav'.format(file_name[:-4], f0_file.split('/')[-1])), audio_out, config.fs)
def test_file_hdf5(self, file_name, speaker_index_2): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, singer_1 = self.read_hdf5_file(file_name) speaker_name = file_name.split('_')[1] speaker_index = config.singers.index(speaker_name) # speaker_file = [x for x in os.listdir(config.emb_dir) if x.endswith('npy') and x.split('_')[1] == speaker_name] speaker_gender = "M" print("Original singer is {}, a human {}".format(speaker_name, speaker_gender)) speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index_2]] mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2)) speaker_2_gender = "M" print("Target singer is {}, a human {}".format(config.singers[speaker_index_2], speaker_2_gender)) out_mel = self.process_file(mel, singer_1, singer_2, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: if speaker_gender == "F" and speaker_2_gender == "M": out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]-12, mel[:out_mel.shape[0],-1:]), axis = -1) elif speaker_gender == "M" and speaker_2_gender == "F": out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]+12, mel[:out_mel.shape[0],-1:]), axis = -1) else: out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) audio_out_2 = sig_process.feats_to_audio(mel_2) sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs) sf.write(os.path.join(config.output_dir,'./{}_target.wav'.format(file_name[:-5])), audio_out_2, config.fs) synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'./{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)
def solo_unison_file_wav(self, file_name, std=0.5, num_singers=4, timing: int=5): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ part = file_name.split('/')[-1].split('_')[1] # import pdb;pdb.set_trace() stft, mel = self.read_wav_file(file_name) singer_1 = utils.get_embedding_GE2E(file_name) # import pdb;pdb.set_trace() # speaker_indecis = [config.singers.index(x) for x in config.singers if x.startswith(part[:-1]) and x != part] audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'./{}_ori.wav'.format(file_name.split('/')[-1])), audio, config.fs) stft, mel = self.read_wav_file(file_name) # vuv = mel[:,-1] # diffs = np.diff(vuv) output = audio output_nc = audio for count in range(num_singers): if part[:-1] in ["soprano", "alto"]: singer_2 = self.read_hdf5_file_emb("female_{}.hdf5".format(count+1)) elif part[:-1] in ["tenor", "bass"]: singer_2 = self.read_hdf5_file_emb("male_{}.hdf5".format(count+1)) out_mel = self.process_file(mel, singer_1, singer_2, self.sess) f0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])[:,np.newaxis]* std out_featss = np.concatenate((out_mel[:mel.shape[0]], f0, mel[:out_mel.shape[0],-1:]), axis = -1) out_featss_nochange = np.concatenate((mel[:mel.shape[0]], f0, mel[:out_mel.shape[0],-1:]), axis = -1) if timing>0: out_featss = np.roll(out_featss, np.random.randint(-timing,timing),0) out_featss_nochange = np.roll(out_featss_nochange, np.random.randint(-timing,timing),0) audio_out = sig_process.feats_to_audio(out_featss) audio_out_nochange = sig_process.feats_to_audio(out_featss_nochange) output = output[:len(audio_out)] output+=audio_out output_nc = output_nc[:len(audio_out_nochange)] output_nc+=audio_out_nochange output = output/num_singers output_nc = output_nc/num_singers sf.write(os.path.join(config.output_dir,'./{}_{}_{}_{}_unison.wav'.format(file_name.split('/')[-1][:-5], std, num_singers, timing)), output, config.fs) sf.write(os.path.join(config.output_dir,'./{}_{}_{}_{}_unison_notimbre.wav'.format(file_name.split('/')[-1][:-5], std, num_singers, timing)), output_nc, config.fs) audio = sig_process.feats_to_audio(mel)
def test_file_wav(self, file_name, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_wav_file(file_name) out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") file_name = file_name.split('/')[-1] if synth: gen_change = utils.query_yes_no("Change in gender? ") if gen_change: female_male = utils.query_yes_no("Female to male?") if female_male: out_featss = np.concatenate( (out_mel, out_f0 - 12, out_vuv), axis=-1) else: out_featss = np.concatenate( (out_mel, out_f0 + 12, out_vuv), axis=-1) else: out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, '{}_{}_SDN.wav'.format(file_name[:-4], config.singers[speaker_index])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join(config.output_dir, '{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def test_file_wav_f0(self, file_name, f0_file, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_wav_file(file_name) f0 = midi_process.open_f0_file(f0_file) timestamps = np.arange(0, len(mel) * config.hoptime, config.hoptime) f1 = vamp_notes.note2traj(f0, timestamps) f1 = sig_process.process_pitch(f1[:, 0]) out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") file_name = file_name.split('/')[-1] if synth: out_featss = np.concatenate((out_mel[:f1.shape[0]], f1), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, '{}_{}_SDN_f0_{}.wav'.format(file_name[:-4], config.singers[speaker_index], f0_file.split('/')[-1])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join(config.output_dir, '{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def solo_unison_file_hdf5(self, file_name, std=0.5, num_singers=4, timing: int = 5): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ part = file_name.split('_')[1] # import pdb;pdb.set_trace() if part[:-1] not in ["soprano", "alto", "tenor", "bass" ] or not file_name.startswith("csd"): raise Exception("Input Error") if part[:-1] in ["soprano", "alto"]: speaker_indecis = [ config.singers.index(x) for x in config.genders.keys() if config.genders[x] == "F" and x in config.nus_singers ] elif part[:-1] in ["tenor", "bass"]: speaker_indecis = [ config.singers.index(x) for x in config.genders.keys() if config.genders[x] == "M" and x in config.nus_singers ] # ["tenor", "bass"] stft, mel = self.read_hdf5_file(file_name) speaker_name = file_name.split('_')[1] speaker_index = config.singers.index(speaker_name) # speaker_indecis = [config.singers.index(x) for x in config.singers if x.startswith(part[:-1]) and x != part] audio = sig_process.feats_to_audio(mel) sf.write( os.path.join(config.output_dir, './{}_ori.wav'.format(file_name[:-5])), audio, config.fs) mel = self.read_hdf5_file(file_name) # vuv = mel[:,-1] # diffs = np.diff(vuv) output = audio for count in range(num_singers): speaker_index_2 = random.choice(speaker_indecis) out_mel = self.process_file(mel, speaker_index, speaker_index_2, self.sess) f0 = mel[:out_mel.shape[0], -2:-1] + np.random.rand( mel[:out_mel.shape[0], -2:-1].shape[0])[:, np.newaxis] * std out_featss = np.concatenate( (mel[:mel.shape[0]], f0, mel[:out_mel.shape[0], -1:]), axis=-1) if timing > 0: out_featss = np.roll(out_featss, np.random.randint(-timing, timing), 0) audio_out = sig_process.feats_to_audio(out_featss) output = output[:len(audio_out)] output += audio_out output = output / num_singers sf.write( os.path.join( config.output_dir, './{}_{}_{}_{}_unison_notimbre.wav'.format( file_name[:-5], std, num_singers, timing)), output, config.fs) audio = sig_process.feats_to_audio(mel)
def test_file_wav(self, file_name, speaker_index_1, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ speaker_gender = config.genders[config.singers[speaker_index_1]] print("Original singer is {}, a human {}".format( config.singers[speaker_index_1], speaker_gender)) speaker_2_gender = config.genders[config.singers[speaker_index]] print("Target singer is {}, a human {}".format( config.singers[speaker_index], speaker_2_gender)) stft, mel = self.read_wav_file(file_name) out_mel = self.process_file(mel, speaker_index_1, speaker_index, self.sess) plot_dict = { "Spec Envelope": { "gt": mel[:, :-6], "op": out_mel[:, :-4] }, "Aperiodic": { "gt": mel[:, -6:-2], "op": out_mel[:, -4:] } } self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: if speaker_gender == "F" and speaker_2_gender == "M": out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] - 12, mel[:out_mel.shape[0], -1:]), axis=-1) elif speaker_gender == "M" and speaker_2_gender == "F": out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] + 12, mel[:out_mel.shape[0], -1:]), axis=-1) else: out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1], mel[:out_mel.shape[0], -1:]), axis=-1) # 0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])* 0.5 audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, './{}_{}_autovc.wav'.format( file_name[:-5], config.singers[speaker_index])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join( config.output_dir, './{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index_1])), audio, config.fs)