def eval(logdir1, logdir2): # Load graph model = Net2() # dataflow df = Net2DataFlow(hp.test2.data_path, hp.test2.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # x_mfccs, y_spec, _ = next(df().get_data()) summ_loss, = predictor(next(df().get_data())) writer = tf.summary.FileWriter(logdir2) writer.add_summary(summ_loss) writer.close()
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() df = Net2DataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(logdir2) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def do_convert(args, logdir): # Load graph model = Net() df = NetDataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio = convert(predictor, df) soundfile.write("a.wav", y_audio[0], 16000, format="wav", subtype="PCM_16") soundfile.write("b.wav", audio[0], 16000, format="wav", subtype="PCM_16") # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) writer = tf.summary.FileWriter(logdir) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def set_enviroment(args, logdir1, logdir2): # Load graph set_env_s = datetime.datetime.now() ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) model = Net2ForConvert() session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("PredictConfig") pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits) ) print("OfflinePredictor") predictor = OfflinePredictor(pred_conf) set_env_e = datetime.datetime.now() set_env_t = set_env_e - set_env_s print("Setting Environment time:{}s".format(set_env_t.seconds)) return predictor
def do_convert(logdir1, logdir2, input_path, output_path): # Load graph model = Net2() model.actual_duration = librosa.core.get_duration(filename=input_path, sr=hp.default.sr) # TODO isolate out logdirs, uhh and how to pre-dl from s3? assert len(input_path) > 0, "must be non-empty input path" df = Net2DataFlow(data_path=input_path, batch_size=1) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] session_inits.append(SaverRestore(ckpt2)) session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) write_wav(audio[0], hp.default.sr, output_path)
def train(args, logdir1, logdir2): # model model = Net2() preprocessing(data_path, logdir2) # dataflow df = Net2DataFlow(data_path, hp.train2.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir2) # session_conf = tf.ConfigProto( # gpu_options=tf.GPUOptions( # allow_growth=True, # per_process_gpu_memory_fraction=0.6, # ), # ) dataset_size = len(glob.glob(data_path + '/wav/*.wav')) print("\t\data_path : ", data_path) print("\t\tDataset Size : ", dataset_size) print("\t\tBatch Size : ", hp.train2.batch_size) print("\t\tSteps per epoch : ", (dataset_size // hp.train2.batch_size)) from time import sleep sleep(10) session_inits = [] ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) train_conf = AutoResumeTrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=8)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=dataset_size // hp.train2.batch_size, session_init=ChainInit(session_inits)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) gpu_list = args.gpu.split(',') gpu_list = list(map(int, gpu_list)) #trainer = SimpleTrainer() trainer = SyncMultiGPUTrainerReplicated(gpu_list) #trainer = AsyncMultiGPUTrainer(gpu_list, False) launch_train_with_config(train_conf, trainer=trainer)
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() data = get_mfccs_and_spectrogram(args.file) ckpt1 = '{}/{}'.format( logdir1, args.net1) if args.net1 else tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.net2) if args.net2 else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, data) target_file = args.file.split('/')[-1] portion = os.path.splitext(target_file) # converted_file = target_file.split('.')[0] + '_converted.wav' converted_file = portion[0] + '.wav' write_wav(audio[0], hp.Default.sr, args.savepath + converted_file) # Write the result tf.summary.audio('A', y_audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) tf.summary.audio('B', audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(args.savepath) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def train(args, logdir1, logdir2): # model model = Net2() # dataflow df = Net2DataFlow(hp.train2.data_path, hp.train2.batch_size) # set logger for event and model saver logger.set_logger_dir(logdir2) session_conf = tf.ConfigProto( # log_device_placement=True, allow_soft_placement=True, gpu_options=tf.GPUOptions( # allow_growth=True, per_process_gpu_memory_fraction=0.6, ), ) session_inits = [] ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=hp.train2.steps_per_epoch, session_init=ChainInit(session_inits), session_config=session_conf ) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) #trainer = SyncMultiGPUTrainerParameterServer(hp.train2.num_gpu) trainer = SimpleTrainer() launch_train_with_config(train_conf, trainer=trainer)
def init_predictor(ckpt_dir): """ Initializes an OfflinePredictor for the 'Net1' Phoneme classifier, given a directory of tf-checkpoints. :param ckpt_dir: Checkpoint directory. :return: OfflinePredictor """ ckpt1 = tf.train.latest_checkpoint(ckpt_dir) assert ckpt1 is not None, "Failed to load checkpoint in '{}'".format( ckpt_dir) net1 = Net1() pred_conf = PredictConfig( model=net1, input_names=['x_mfccs'], output_names=['net1/ppgs'], session_init=ChainInit([SaverRestore(ckpt1, ignore=['global_step'])])) predictor = OfflinePredictor(pred_conf) return predictor
def do_convert(args, logdir1, logdir2, input_dir): # Load graph model = Net2() # input_dir = hp.convert.data_base_dir_original + hp.convert.data_path df = Net2DataFlow(input_dir, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # loop over all the audio files for wav_file in df.wav_files: # check if file is present audio out_path = wav_file.replace(hp.convert.data_base_dir_original, hp.convert.data_base_dir_convert) # change file extension from wv1/wv2 to wav out_path = out_path[:-2] + 'av' if os.path.isfile(out_path): # file is already present, move on to the next one. print("skipping " + wav_file) continue print("converting " + wav_file) # convert audio audio_len, feats = df.get_features(wav_file) audio_full = [] for feat in feats: input_arr = ([feat[0]], [feat[1]], [feat[2]]) audio, ppgs = convert(predictor, input_arr) audio_full.append( (audio[0] * hp.convert.amplitude_multiplier).astype(np.int16)) scipy.io.wavfile.write(out_path, hp.default.sr, np.concatenate(audio_full)[:audio_len])
def train(args, logdir2): # model model = Net2() # dataflow df = Net2DataFlow(hp.train2.mel_path, hp.train2.ppgs_path, hp.train2.batch_size) session_inits = [] ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) if ckpt2: session_inits.append(SaverRestore(ckpt2)) ''' ckpt1 = tf.train.latest_checkpoint(logdir1) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) ''' train_conf = TrainConfig( model=model, data=QueueInput(df(n_prefetch=1000, n_thread=4)), callbacks=[ # TODO save on prefix net2 ModelSaver(checkpoint_dir=logdir2), # ConvertCallback(logdir2, hp.train2.test_per_epoch), ], max_epoch=hp.train2.num_epochs, steps_per_epoch=hp.train2.steps_per_epoch, session_init=ChainInit(session_inits)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_conf.nr_tower = len(args.gpu.split(',')) trainer = SyncMultiGPUTrainerReplicated(hp.train2.num_gpu) print("strated trainer") launch_train_with_config(train_conf, trainer=trainer)
def eval(logdir): # Load graph model = Net() # dataflow df = NetDataFlow(hp.test.data_path, hp.test.batch_size) ckpt = tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) r_mel, t_spec, _ = next(df().get_data()) summ_loss, = predictor(r_mel, t_spec) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.close()
data=QueueInput(ProjDataFlow(Ppy)), callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=5), PeriodicTrigger(VolumeSaver(model), every_k_epochs=5), # prevent learning in the first epoch # MemInitHyperParamSetter('learning_rate_mask',(0,1)), # controls learning rate as a function of epoch HyperParamSetterWithFunc('learning_rate', learning_rate_fun), # GraphProfiler() # PeakMemoryTracker() # GPUUtilizationTracker(), ], steps_per_epoch=steps_per_epoch, max_epoch=200000, # first time load model from checkpoint and reset GRU state session_init=ChainInit([TryResumeTraining()]), #,ResetInit(model)]) # session_config=tf.ConfigProto(log_device_placement=True) #config_gpus(1) ) trainer = SimpleTrainer() # with tf.contrib.tfprof.ProfileContext(logger.get_logger_dir()) as pctx: launch_train_with_config(traincfg, trainer) ################# JUNK ############### # config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) # workaround # with tf.Session(config=config): # pass # Pin = tf.placeholder(tf.float32, shape=P_py.shape)
def ckpt2mel(predictor, ppgs_dir, mel_dir, save_dir): print("get into ckpt") for fi in os.listdir(ppgs_dir): print("fi",fi) #ppgs_name = os.path.join(ppgs_dir, fi) mel, ppgs = queue_input(fi, ppgs_dir, mel_dir) pred_mel = predictor(mel, ppgs) #print("pred_mel",pred_mel.size()) pred_mel = np.array(pred_mel) print("pred_mel",pred_mel.shape) length = pred_mel.shape[2] width = pred_mel.shape[3] pred_mel = pred_mel.reshape((length, width)) save_name = fi.split('.npy')[0] if hp.default.n_mels == 20: npy_dir = os.path.join(save_dir,'lpc20') if not os.path.exists(npy_dir): os.makedirs(npy_dir) npy_path = os.path.join(npy_dir, '%s_20.npy' %save_name) np.save(npy_path, pred_mel) print('saved',npy_dir)if hp.default.n_mels == 32: npy_dir = os.path.join(save_dir,'lpc32') if not os.path.exists(npy_dir): os.makedirs(npy_dir) npy_path = os.path.join(npy_dir, '%s_32.npy' %save_name) np.save(npy_path, pred_mel) print('saved',npy_dir)def do_convert(args, logdir2): # Load graph model = Net2() index = 0 ppgs_dir = hp.convert.ppgs_path mel_dir = hp.convert.mel_path #for fi in os.listdir(ppgs_dir): #print("fi",fi) #ppgs_path = os.path.join(ppgs_dir, fi) #df = Net2DataFlow(hp.convert.mel_path, ppgs_path, hp.convert.batch_size) #print("finish df") ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) print("ckpt2",ckpt2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) print("after predictor") #import pdb;pdb.set_trace() ckpt2mel(predictor, ppgs_dir, mel_dir, hp.convert.save_path) print("success") def get_arguments(): parser = argparse.ArgumentParser() parser.add_argument('case2', type=str, help='experiment case name of train2') parser.add_argument('-ckpt', help='checkpoint to load model.') arguments = parser.parse_args() return arguments if __name__ == '__main__': args = get_arguments() hp.set_hparam_yaml(args.case2) logdir_train2 = '{}/{}/train2'.format(hp.logdir_path, args.case2) print('case2: {},logdir2: {}'.format(args.case2, logdir_train2)) s = datetime.datetime.now() do_convert(args, logdir2=logdir_train2) e = datetime.datetime.now() diff = e - s print("Done. elapsed time:{}s".format(diff.seconds))
def train_child(model_cls, args, log_dir, child_dir, prev_dir): """ """ if not os.path.exists(child_dir): os.mkdir(child_dir) if os.path.basename(child_dir) == "0" and args.use_init_model: init_model_dir = os.path.join(args.data_dir, 'init_model', args.ds_name) if os.path.exists(init_model_dir): # This implies that there exists init_model_dir, and we are in first model # so we do not need to train. Copy the model and mark finished logger.info("Skip first model as this model is fully trained.") cmd = "mkdir -p {cdir} ; cp {pdir}/* {cdir}/ ".format(\ cdir=child_dir, pdir=args.init_model_dir) _ = subprocess.check_output(cmd, shell=True) return # get training params for train-config (model, args, starting_epoch, lr_schedule, ds_train, insrc_train, train_cbs) = get_training_params(model_cls, args) ## Model callbacks # loss weight update ls_cbs_func = getattr(model, 'compute_loss_select_callbacks', None) if callable(ls_cbs_func): train_cbs.extend(ls_cbs_func()) # extra callback for general logging/ update. extra_callbacks = DEFAULT_CALLBACKS() if not args.do_remote_child_inf_runner: extra_callbacks = \ [ecb for ecb in extra_callbacks if not isinstance(ecb, ProgressBar)] logger.info("Extra callbacks are {}".format( [ecb.__class__ for ecb in extra_callbacks])) # Logging for analysis model_str = model.net_info.to_str() logger.info('LayerInfoListString is :\n {}'.format(model_str)) train_callbacks = [ ModelSaver(checkpoint_dir=child_dir, max_to_keep=1, keep_checkpoint_every_n_hours=100), ] + train_cbs if lr_schedule: train_callbacks.append( ScheduledHyperParamSetter('learning_rate', lr_schedule)) logger.info('The updated params for training is \n{}'.format(args)) config = TrainConfig( data=insrc_train, dataflow=ds_train, callbacks=train_callbacks, extra_callbacks=extra_callbacks, model=model, monitors=[JSONWriter(), ScalarPrinter()], #, TFEventWriter()], steps_per_epoch=args.steps_per_epoch, max_epoch=args.max_epoch, starting_epoch=starting_epoch) for dn in [child_dir, prev_dir]: if dn is None: continue ckpt = tf.train.latest_checkpoint(dn) if ckpt: if args.search_cat_based: restore_cls = SaverRestoreSizeRelaxed else: restore_cls = SaverRestore _ignore = [DYNAMIC_WEIGHTS_NAME] _sess_init_load = restore_cls(ckpt, ignore=_ignore) if dn == child_dir: # loading from self keep global step config.session_init = _sess_init_load else: # loading from others. Set global_step to 0 config.session_init = ChainInit([ _sess_init_load, AssignGlobalStep(0), ]) break launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(args.nr_gpu)) return model
def do_convert(args, logdir1, logdir2): print("do_convert") # Load graph ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) model = Net2ForConvert() session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("PredictConfig") pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) print("OfflinePredictor") set_env_s = datetime.datetime.now() predictor = OfflinePredictor(pred_conf) set_env_e = datetime.datetime.now() set_env_t = set_env_e - set_env_s print("Setting Environment time:{}s".format(set_env_t.seconds)) input_name = '' while True: input_name = input("Write your audio file\'s path for converting : ") if input_name == 'quit': break elif len(glob.glob(input_name)) == 0: print("That audio file doesn't exist! Try something else.") continue convert_s = datetime.datetime.now() mfcc, spec, mel_spec = get_mfccs_and_spectrogram(input_name, trim=False, isConverting=True) mfcc = np.expand_dims(mfcc, axis=0) spec = np.expand_dims(spec, axis=0) mel_spec = np.expand_dims(mel_spec, axis=0) output_audio, ppgs = convert(predictor, mfcc, spec, mel_spec) input_audio, samplerate = load(input_name, sr=hp.default.sr, dtype=np.float64) """ # F0 adaptation with WORLD Vocoder f0_conv_s = datetime.datetime.now() output_audio = f0_adapt(input_audio, output_audio, logdir2, samplerate) f0_conv_e = datetime.datetime.now() f0_conv_time = f0_conv_e - f0_conv_s print("F0 Adapting Time:{}s".format(f0_conv_time.seconds)) """ # Saving voice-converted audio to 32-bit float wav file # print(audio.dtype) output_audio = output_audio.astype(np.float32) write_wav(path="./converted/" + input_name, y=output_audio, sr=hp.default.sr) # Saving PPGS data to Grayscale Image and raw binary file ppgs = np.squeeze(ppgs, axis=0) plt.imsave('./converted/debug/' + input_name + '.png', ppgs, cmap='binary') np.save('./converted/debug/' + input_name + '.npy', ppgs) convert_e = datetime.datetime.now() convert_time = convert_e - convert_s print("Total Converting Time:{}s".format(convert_time.seconds))