def walk_directory(singer_name: str, mode: str='sing'): """ Go through a singer directory, processing all files in the directory. Arguments: singer_name: The name of the singer. mode: either sing or read. """ print("Processing data for NUS singer {}".format(singer_name)) print("Processing the {} directory".format(mode)) wav_dir = config.raw_dirs['nus'] full_dir = os.path.join(wav_dir, singer_name, mode) sing_wav_files = [x for x in os.listdir(full_dir) if x.endswith('.wav') and not x.startswith('.')] for count, lf in enumerate(sing_wav_files): utils.progress(count, len(sing_wav_files), "folder processed") audio, fs = audio_process.load_audio(os.path.join(full_dir, lf)) embedding = get_embedding_GE2E(os.path.join(full_dir, lf)) segments, timestamps, feat, note, stft = audio_process.process_audio(audio) phonemes = midi_process.open_lab_file(os.path.join(full_dir, lf[:-4]+".txt")) phos = np.array(midi_process.pho_segment_allign(phonemes, timestamps)) for j, (fea, nots, stf, pho) in enumerate(zip(feat, note, stft, phos)): singer_dict = {} featy, notey, stfty, phosy = utils.match_time([fea, nots, stf, pho]) singer_dict['embedding'] = embedding singer_dict['feats'] = featy singer_dict['notes'] = notey singer_dict['phons'] = phosy singer_dict['stfts'] = stfty write_data.write_data(singer_dict, "nus_{}_{}_{}.hdf5".format(singer_name, lf[:-4], j))
def walk_directory(wav_dir): """ Go through a song directory, processing all files in the directory. Arguments: singer_name: The name of the song. """ print("Processing data for the DAMP intonation dataset") full_dir = os.path.join(wav_dir, 'vocal_tracks') songs = [ x for x in os.listdir(full_dir) if x.endswith('m4a') and not x.startswith('.') ] df = pd.read_csv(os.path.join(wav_dir, "intonation.csv")) singers = [] for count, lf in enumerate(songs): song_name = lf.split('.')[0] singer_name = df.query('performance_id == "{}"'.format( songs[0].split('.')[0]))[' account_id'].values[0].strip() if singer_name in config.damp_singers: singers.append(singer_name) song_name = song_name.replace('_', '-') utils.progress(count, len(songs), "folder processed") audio, fs = audio_process.load_audio(os.path.join(full_dir, lf)) try: segments, timestamps, feat, note, stft = audio_process.process_audio( audio) for j, (fea, nots, stf) in enumerate(zip(feat, note, stft)): singer_dict = {} feat[j], note[j], stft[j] = utils.match_time( [fea, nots, stf]) singer_dict['feats'] = feat[j] singer_dict['notes'] = note[j] singer_dict['stfts'] = stft[j] write_data.write_data( singer_dict, "damp_{}_{}_{}.hdf5".format(singer_name, song_name, j)) except: print("Error in file {}".format(song_name))
def walk_directory(song_name: str): """ Go through a song directory, processing all files in the directory. Arguments: singer_name: The name of the song. """ print("Processing data for CSD song {}".format(song_name)) wav_dir = config.raw_dirs['choralsingingdataset'] full_dir = os.path.join(wav_dir, song_name, 'IndividualVoices') sing_wav_files = [ x for x in os.listdir(full_dir) if x.endswith('.wav') and not x.startswith('.') and not x.endswith('-24b.wav') ] for count, lf in enumerate(sing_wav_files): singer_name = lf.split('_')[1] + lf.split('_')[2].replace('.wav', '') utils.progress(count, len(sing_wav_files), "folder processed") audio, fs = audio_process.load_audio(os.path.join(full_dir, lf)) try: segments, timestamps, feat, note, stft = audio_process.process_audio( audio) for j, (fea, nots, stf) in enumerate(zip(feat, note, stft)): singer_dict = {} feat[j], note[j], stft[j] = utils.match_time([fea, nots, stf]) singer_dict['feats'] = feat[j] singer_dict['notes'] = note[j] singer_dict['stfts'] = stft[j] write_data.write_data( singer_dict, "csd_{}_{}_{}.hdf5".format(singer_name, song_name, j)) except: print(lf)
def train(self): """ Function to train the model, and save Tensorboard summary, for N epochs. """ start_epoch = int(self.sess.run(tf.train.get_global_step()) / (config.autovc_batches_per_epoch_train)) print("Start from: %d" % start_epoch) for epoch in range(start_epoch, config.autovc_num_epochs): data_generator = data_pipeline.data_gen_vc() val_generator = data_pipeline.data_gen_vc(mode = 'Val') epoch_final_loss = 0 epoch_recon_loss = 0 epoch_recon_0_loss = 0 epoch_content_loss = 0 val_final_loss = 0 val_recon_loss = 0 val_recon_0_loss = 0 val_content_loss = 0 batch_num = 0 start_time = time.time() with tf.variable_scope('Training'): for feats_targs, targets_speakers in data_generator: final_loss, recon_loss, recon_loss_0, content_loss, summary_str = self.train_model(feats_targs, targets_speakers, self.sess) epoch_final_loss+=final_loss epoch_recon_loss+=recon_loss epoch_recon_0_loss+=recon_loss_0 epoch_content_loss+=content_loss self.train_summary_writer.add_summary(summary_str, epoch) self.train_summary_writer.flush() utils.progress(batch_num,config.autovc_batches_per_epoch_train, suffix = 'training done') batch_num+=1 epoch_final_loss = epoch_final_loss/batch_num epoch_recon_loss = epoch_recon_loss/batch_num epoch_recon_0_loss = epoch_recon_0_loss/batch_num epoch_content_loss = epoch_content_loss/batch_num print_dict = {"Final Loss": epoch_final_loss} print_dict["Recon Loss"] = epoch_recon_loss print_dict["Recon Loss_0 "] = epoch_recon_0_loss print_dict["Content Loss"] = epoch_content_loss batch_num = 0 with tf.variable_scope('Validation'): for feats_targs, targets_speakers in val_generator: final_loss, recon_loss, recon_loss_0, content_loss, summary_str = self.validate_model(feats_targs, targets_speakers, self.sess) val_final_loss+=final_loss val_recon_loss+=recon_loss val_recon_0_loss+=recon_loss_0 val_content_loss+=content_loss self.val_summary_writer.add_summary(summary_str, epoch) self.val_summary_writer.flush() utils.progress(batch_num,config.autovc_batches_per_epoch_val, suffix = 'validation done') batch_num+=1 val_final_loss = val_final_loss/batch_num val_recon_loss = val_recon_loss/batch_num val_recon_0_loss = val_recon_0_loss/batch_num val_content_loss = val_content_loss/batch_num print_dict["Val Final Loss"] = val_final_loss print_dict["Val Recon Loss"] = val_recon_loss print_dict["Val Recon Loss_0 "] = val_recon_0_loss print_dict["Val Content Loss"] = val_content_loss end_time = time.time() if (epoch + 1) % config.print_every == 0: self.print_summary(print_dict, epoch, end_time-start_time) if (epoch + 1) % config.save_every == 0 or (epoch + 1) == config.autovc_num_epochs: self.save_model(self.sess, epoch+1, config.autovc_emb_log_dir)
def get_stats(): """ Get the maximum and minimum feat values for the datasets to use. """ datasets = "".join("_" + x.lower() for x in config.datasets) voc_list = [ x for x in os.listdir(config.feats_dir) if x.endswith('.hdf5') and x.split('_')[0].upper() in config.datasets ] max_feat = np.zeros(66) min_feat = np.ones(66) * 1000 count = 0 too_small = [] for count, voc_to_open in enumerate(voc_list): with h5py.File(os.path.join(config.feats_dir, voc_to_open), "r") as voc_file: feats = voc_file["feats"][()] if len(feats) <= config.max_phr_len: too_small.append(voc_to_open) import pdb pdb.set_trace() # os.remove(os.path.join(config.feats_dir,voc_to_open)) # print("Deleted file {}".format(voc_to_open)) else: f0 = feats[:, -2] med = np.median(f0[f0 > 0]) f0[f0 == 0] = med feats[:, -2] = f0 maxi_voc_feat = np.array(feats).max(axis=0) for i in range(len(maxi_voc_feat)): if maxi_voc_feat[i] > max_feat[i]: max_feat[i] = maxi_voc_feat[i] mini_voc_feat = np.array(feats).min(axis=0) for i in range(len(mini_voc_feat)): if mini_voc_feat[i] < min_feat[i]: min_feat[i] = mini_voc_feat[i] count += 1 utils.progress(count, len(voc_list), "Processed") with h5py.File(config.stat_file, mode='w') as hdf5_file: hdf5_file.create_dataset("feats_maximus", [66], np.float32) hdf5_file.create_dataset("feats_minimus", [66], np.float32) hdf5_file["feats_maximus"][:] = max_feat hdf5_file["feats_minimus"][:] = min_feat config.change_variable("stat_prep", "prep", "True")