def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, feeder.stop_token_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. checkpoint_state = tf.train.get_checkpoint_state(log_dir) restore_path = '%s-%d' % (checkpoint_path, args.restore_step) if checkpoint_state is not None: saver.restore(sess, checkpoint_state.model_checkpoint_path) log('Resuming from checkpoint: %s at commit: %s' % (checkpoint_state.model_checkpoint_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) if args.slack_url != '': cmd = 'curl -F file=@%s -H "Authorization: Bearer xoxb-447699810339-447680344836-l9MSOu5h1NgPambAgN7tFnjo" -F channels=#tensorflow https://slack.com/api/files.upload' % os.path.join( log_dir, 'step-%d-audio.wav' % step) p = subprocess.Popen(cmd, shell=True) p.wait() cmd = 'curl -F file=@%s -H "Authorization: Bearer xoxb-447699810339-447680344836-l9MSOu5h1NgPambAgN7tFnjo" -F channels=#tensorflow https://slack.com/api/files.upload' % os.path.join( log_dir, 'step-%d-align.png' % step) p = subprocess.Popen(cmd, shell=True) p.wait() except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def save_alignment(path, attn): plot_alignment(attn.T, path, info="tacotron, step={}".format(global_step))
def train(log_dir, input_path, checkpoint_path, is_restore): # Log the info log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model('tacotron', hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if is_restore: # Restore from a checkpoint if the user requested it. restore_path = '%s' % (checkpoint_path) saver.restore(sess, restore_path) log('Resuming from checkpoint') else: log('Starting new training') feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_interval = time.time() - start_time message = 'Step %d, %.03f sec, loss=%.05f' % (step, loss, time_interval) log(message) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % hparams.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % hparams.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % ('tacotron', time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) ps_hosts = args.ps_hosts.split(",") worker_hosts = args.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=args.job_name, task_index=args.task_index) # Block further graph execution if current node is parameter server if args.job_name == "ps": server.join() with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % args.task_index, cluster=cluster)): # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2, sharded=True) hooks = [tf.train.StopAtStepHook(last_step=1000000)] # Train! # Monitored... automatycznie wznawia z checkpointu. is_chief = (args.task_index == 0) init_op = tf.global_variables_initializer() sv = tf.train.Supervisor(is_chief=(args.task_index == 0), logdir="train_logs", init_op=init_op, summary_op=stats, saver=saver, save_model_secs=600) with sv.managed_session(server.target) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(init_op) if args.restore_step and is_chief: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0 and is_chief: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
linear_dim=hparams.num_freq, r=hparams.outputs_per_step, padding_idx=hparams.padding_idx, use_memory_mask=hparams.use_memory_mask, ) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint["state_dict"]) model.decoder.max_decoder_steps = max_decoder_steps os.makedirs(dst_dir, exist_ok=True) with open(text_list_file_path, "rb") as f: lines = f.readlines() for idx, line in enumerate(lines): text = line.decode("utf-8")[:-1] words = nltk.word_tokenize(text) print("{}: {} ({} chars, {} words)".format(idx, text, len(text), len(words))) waveform, alignment, _ = tts(model, text) dst_wav_path = join(dst_dir, "{}{}.wav".format(idx, file_name_suffix)) dst_alignment_path = join(dst_dir, "{}_alignment.png".format(idx)) plot_alignment(alignment.T, dst_alignment_path, info="tacotron, {}".format(checkpoint_path)) audio.save_wav(waveform, dst_wav_path) print( "Finished! Check out {} for generated audio samples.".format(dst_dir)) sys.exit(0)
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s' % (restore_path), slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt, mel_loss, linear_loss = \ sess.run([global_step, model.loss, model.optimize, model.mel_loss, model.linear_loss]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, mel_loss=%.5f, linear_loss=%.5f]' % ( step, time_window.average, loss, loss_window.average, mel_loss, linear_loss) log(message, slack=(step % args.checkpoint_interval == 0)) if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) input_seq = sequence_to_text(input_seq) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), input_seq, info='%s, step=%d, loss=%.5f' % (args.model, step, loss), istrain=1) log('Input: %s' % input_seq) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') log('Checkpoint path: %s' % checkpoint_path) log('Using model: %s' % args.model) log(hparams_debug_string()) sequence_to_text = sequence_to_text2 with tf.Graph().as_default(), tf.device('/cpu:0'): # Multi-GPU settings GPUs_id = eval(args.GPUs_id) num_GPU = len(GPUs_id) hparams.num_GPU = num_GPU models = [] # Set up DataFeeder: coord = tf.train.Coordinator() if args.data_type == 'tfrecord': with open('./train_data_dict.json', 'r') as f: train_data_dict = json.load(f) train_data = args.train_data.split(',') file_list = [] pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+' id_num = 0 for item in train_data: file_list.append(train_data_dict[item]) id_num += int(re.findall(pattern, train_data_dict[item])[0]) log('train data:%s' % args.train_data) feeder = DataFeeder_tfrecord(hparams, file_list) inputs, input_lengths, linear_targets, mel_targets, n_frames, wavs, identities = feeder._get_batch_input( ) elif args.data_type == 'npy': with open('./train_npy_data_dict.json', 'r') as f: train_data_dict = json.load(f) train_data = args.train_data.split(',') file_list = [] pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+' id_num = 0 for item in train_data: file_list.append(train_data_dict[item]) id_num += int(re.findall(pattern, train_data_dict[item])[0]) log('train data:%s' % args.train_data) feeder = DataFeeder_npy(hparams, file_list, coord) inputs = feeder.inputs input_lengths = feeder.input_lengths mel_targets = feeder.mel_targets linear_targets = feeder.linear_targets wavs = feeder.wavs identities = feeder.identities else: raise ('not spificied the input data type') # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: for i, GPU_id in enumerate(GPUs_id): with tf.device('/gpu:%d' % GPU_id): with tf.name_scope('GPU_%d' % GPU_id): models.append(None) models[i] = create_model(args.model, hparams) models[i].initialize(inputs=inputs, input_lengths=input_lengths, mel_targets=mel_targets, linear_targets=linear_targets, identities=identities, id_num=id_num) models[i].add_loss() models[i].add_optimizer(global_step) stats = add_stats(models[i]) # Bookkeeping: step = 0 time_window = ValueWindow(250) loss_window = ValueWindow(1000) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=8) # Train! config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s' % restore_path) else: log('Starting new training run') if args.data_type == 'tfrecord': tf.train.start_queue_runners(sess=sess, coord=coord) feeder.start_threads(sess=sess, coord=coord) elif args.data_type == 'npy': feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt, loss_regularity = sess.run([ global_step, models[0].loss, models[0].optimize, models[0].loss_regularity, ]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f avg_sec/step, loss=%.05f, avg_loss=%.05f, lossw=%.05f]' % ( step, time_window.average, loss, loss_window.average, loss_regularity) log(message) # if the gradient seems to explode, then restore to the previous step if loss > 2 * loss_window.average or math.isnan(loss): log('recover to the previous checkpoint') restore_step = int( (step - 10) / args.checkpoint_interval ) * args.checkpoint_interval restore_path = '%s-%d' % (checkpoint_path, restore_step) saver.restore(sess, restore_path) continue if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: crrt_dir = os.path.join(log_dir, str(step)) os.makedirs(crrt_dir, exist_ok=True) log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment, wav_original, melspectogram, spec_original, mel_original, \ identity2 = sess.run([models[0].inputs[0], models[0].linear_outputs[0], models[0].alignments[0], wavs[0],models[0].mel_outputs[0], linear_targets[0], mel_targets[0], identities[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(crrt_dir, 'step-%d-audio.wav' % step)) audio.save_wav( wav_original, os.path.join( crrt_dir, 'step-%d-audio-original-%d.wav' % (step, identity2))) np.save(os.path.join(crrt_dir, 'spec.npy'), spectrogram, allow_pickle=False) np.save(os.path.join(crrt_dir, 'melspectogram.npy'), melspectogram, allow_pickle=False) np.save(os.path.join(crrt_dir, 'spec_original.npy'), spec_original, allow_pickle=False) np.save(os.path.join(crrt_dir, 'mel_original.npy'), mel_original, allow_pickle=False) plot.plot_alignment( alignment, os.path.join(crrt_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) #提取alignment, 看看对其效果如何 transition_params = [] for i in range(alignment.shape[0]): transition_params.append([]) for j in range(alignment.shape[0]): if i == j or j - i == 1: transition_params[-1].append(500) else: transition_params[-1].append(0.0) alignment[0][0] = 100000 alignment2 = np.argmax(alignment, axis=0) alignment3 = tf.contrib.crf.viterbi_decode( alignment.T, transition_params) alignment4 = np.zeros(alignment.shape) for i, item in enumerate(alignment3[0]): alignment4[item, i] = 1 plot.plot_alignment( alignment4, os.path.join(crrt_dir, 'step-%d-align2.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) crrt = 0 sample_crrt = 0 sample_last = 0 for i, item in enumerate(alignment3[0]): if item == crrt: sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step\ / 1000 if not item == crrt: crrt += 1 sample_crrt = int(sample_crrt) sample_last = int(sample_last) wav_crrt = waveform[:sample_crrt] wav_crrt2 = waveform[sample_last:sample_crrt] audio.save_wav( wav_crrt, os.path.join(crrt_dir, '%d.wav' % crrt)) audio.save_wav( wav_crrt2, os.path.join(crrt_dir, '%d-2.wav' % crrt)) sample_last = sample_crrt sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step \ / 1000 input_seq2 = [] input_seq3 = [] for item in alignment2: input_seq2.append(input_seq[item]) for item in alignment3[0]: input_seq3.append(input_seq[item]) #output alignment path_align1 = os.path.join(crrt_dir, 'step-%d-align1.txt' % step) path_align2 = os.path.join(crrt_dir, 'step-%d-align2.txt' % step) path_align3 = os.path.join(crrt_dir, 'step-%d-align3.txt' % step) path_seq1 = os.path.join(crrt_dir, 'step-%d-input1.txt' % step) path_seq2 = os.path.join(crrt_dir, 'step-%d-input2.txt' % step) path_seq3 = os.path.join(crrt_dir, 'step-%d-input3.txt' % step) with open(path_align1, 'w') as f: for row in alignment: for item in row: f.write('%.3f' % item) f.write('\t') f.write('\n') with open(path_align2, 'w') as f: for item in alignment2: f.write('%.3f' % item) f.write('\t') with open(path_align3, 'w') as f: for item in alignment3[0]: f.write('%.3f' % item) f.write('\t') with open(path_seq1, 'w') as f: f.write(sequence_to_text(input_seq)) with open(path_seq2, 'w') as f: f.write(sequence_to_text(input_seq2)) with open(path_seq3, 'w') as f: f.write(sequence_to_text(input_seq3)) log('Input: %s' % sequence_to_text(input_seq)) log('Input: %s' % str(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e) traceback.print_exc() coord.request_stop(e)
def gst_train(log_dir, args): commit = get_git_commit() if args.git else 'None' save_dir = os.path.join(log_dir, 'gst_pretrained/') checkpoint_path = os.path.join(save_dir, 'gst_model.ckpt') input_path = os.path.join(args.base_dir, args.gst_input) plot_dir = os.path.join(log_dir, 'plots') wav_dir = os.path.join(log_dir, 'wavs') mel_dir = os.path.join(log_dir, 'mel-spectrograms') eval_dir = os.path.join(log_dir, 'eval-dir') eval_plot_dir = os.path.join(eval_dir, 'plots') eval_wav_dir = os.path.join(eval_dir, 'wavs') os.makedirs(eval_dir, exist_ok=True) os.makedirs(plot_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(mel_dir, exist_ok=True) os.makedirs(eval_plot_dir, exist_ok=True) os.makedirs(eval_wav_dir, exist_ok=True) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log(hparams_debug_string()) #Start by setting a seed for repeatability tf.set_random_seed(hparams.random_seed) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) model, stats = model_train_mode(args, feeder, hparams, global_step) eval_model = model_test_mode(args, feeder, hparams, global_step) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) #Memory allocation on the GPU as needed config = tf.ConfigProto() config.gpu_options.allow_growth = True # Train! with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) checkpoint_state = False #saved model restoring if args.restore_step: #Restore saved model if the user requested it, Default = True. try: checkpoint_state = tf.train.get_checkpoint_state(save_dir) except tf.errors.OutOfRangeError as e: log('Cannot restore checkpoint: {}'.format(e)) if (checkpoint_state and checkpoint_state.model_checkpoint_path): log('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_state.model_checkpoint_path) else: if not args.restore_step: log('Starting new training!') else: log('No model to load at {}'.format(save_dir)) feeder.start_in_session(sess) while not coord.should_stop() and step < args.gst_train_steps: start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.eval_interval == 0: #Run eval and save eval stats log('\nRunning evaluation at step {}'.format(step)) eval_losses = [] linear_losses = [] #TODO: FIX TO ENCOMPASS MORE LOSS for i in tqdm(range(feeder.test_steps)): eloss, linear_loss, mel_p, mel_t, t_len, align, lin_p = sess.run([eval_model.loss, eval_model.linear_loss, eval_model.mel_outputs[0], eval_model.mel_targets[0], eval_model.targets_lengths[0], eval_model.alignments[0], eval_model.linear_outputs[0]]) eval_losses.append(eloss) linear_losses.append(linear_loss) eval_loss = sum(eval_losses) / len(eval_losses) linear_loss = sum(linear_losses) / len(linear_losses) wav = audio.inv_linear_spectrogram(lin_p.T) audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-linear.wav'.format(step))) log('Saving eval log to {}..'.format(eval_dir)) #Save some log to monitor model improvement on same unseen sequence wav = audio.inv_mel_spectrogram(mel_p.T) audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-mel.wav'.format(step))) plot.plot_alignment(align, os.path.join(eval_plot_dir, 'step-{}-eval-align.png'.format(step)), info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, eval_loss), max_len=t_len // hparams.outputs_per_step) plot.plot_spectrogram(mel_p, os.path.join(eval_plot_dir, 'step-{}-eval-mel-spectrogram.png'.format(step)), info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, eval_loss), target_spectrogram=mel_t, ) log('Eval loss for global step {}: {:.3f}'.format(step, eval_loss)) log('Writing eval summary!') add_eval_stats(summary_writer, step, linear_loss, eval_loss) if step % args.checkpoint_interval == 0 or step == args.gst_train_steps: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, mel_pred, alignment, target, target_len = sess.run([model.inputs[0], model.mel_outputs[0], model.alignments[0], model.mel_targets[0], model.targets_lengths[0], ]) #save predicted mel spectrogram to disk (debug) mel_filename = 'mel-prediction-step-{}.npy'.format(step) np.save(os.path.join(mel_dir, mel_filename), mel_pred.T, allow_pickle=False) #save griffin lim inverted wav for debug (mel -> wav) wav = audio.inv_mel_spectrogram(mel_pred.T) audio.save_wav(wav, os.path.join(wav_dir, 'step-{}-wave-from-mel.wav'.format(step))) #save alignment plot to disk (control purposes) plot.plot_alignment(alignment, os.path.join(plot_dir, 'step-{}-align.png'.format(step)), info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, loss), max_len=target_len // hparams.outputs_per_step) #save real and predicted mel-spectrogram plot to disk (control purposes) plot.plot_spectrogram(mel_pred, os.path.join(plot_dir, 'step-{}-mel-spectrogram.png'.format(step)), info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, loss), target_spectrogram=target, max_len=target_len) log('Input at step {}: {}'.format(step, sequence_to_text(input_seq))) log('GST Taco training complete after {} global steps!'.format(args.gst_train_steps)) return save_dir except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') ## input path is lists of both postive path and negtiva path input_path_pos = os.path.join(args.base_dir, args.input_pos) input_path_neg = os.path.join(args.base_dir, args.input_neg) log('Checkpoint path: %s' % checkpoint_path) log('Loading positive training data from: %s' % input_path_pos) log('Loading negative training data from: %s' % input_path_neg) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path_pos, input_path_neg, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs_pos, feeder.input_lengths_pos, feeder.mel_targets_pos, feeder.linear_targets_pos, feeder.mel_targets_neg, feeder.linear_targets_neg, feeder.labels_pos, feeder.labels_neg) model.add_loss() model.add_optimizer(global_step) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: #summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() # train d sess.run(model.d_optimize) # train g step, rec_loss, style_loss, d_loss, g_loss, _ = sess.run([ global_step, model.rec_loss, model.style_loss, model.d_loss, model.g_loss, model.g_optimize ]) time_window.append(time.time() - start_time) message = 'Step %-7d [%.03f sec/step, rec_loss=%.05f, style_loss=%.05f, d_loss=%.05f, g_loss=%.05f]' % ( step, time_window.average, rec_loss, style_loss, d_loss, g_loss) log(message, slack=(step % args.checkpoint_interval == 0)) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram_pos, spectrogram_neg, alignment_pos, alignment_neg = sess.run( [ model.inputs[0], model.linear_outputs_pos[0], model.linear_outputs_neg[0], model.alignments_pos[0], model.alignments_neg[0] ]) waveform_pos = audio.inv_spectrogram(spectrogram_pos.T) waveform_neg = audio.inv_spectrogram(spectrogram_neg.T) audio.save_wav( waveform_pos, os.path.join(log_dir, 'step-%d-audio_pos.wav' % step)) audio.save_wav( waveform_neg, os.path.join(log_dir, 'step-%d-audio_neg.wav' % step)) plot.plot_alignment( alignment_pos, os.path.join(log_dir, 'step-%d-align_pos.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, rec_loss)) plot.plot_alignment( alignment_neg, os.path.join(log_dir, 'step-%d-align_neg.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, rec_loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def main(): parser = argparse.ArgumentParser() #parser.add_argument('--base_dir', default=os.path.expanduser('./')) parser.add_argument('--wav_path', default='./wav_files', help='the wav files to be minic') parser.add_argument('--output_dir', default='./synthesis', help='the output dir') parser.add_argument('--output_prefix', default=' ', help='the prefix of the name of the output') parser.add_argument('--model_path', default=' ', help='path of the trained model') parser.add_argument('--prenet_layer1', default=256, type=int, help='batch_size') # parser.add_argument('--prenet_layer2', default=128, type=int, help='batch_size') # parser.add_argument('--gru_size', default=256, type=int, help='batch_size') # parser.add_argument('--attention_size', default=256, type=int, help='batch_size') # parser.add_argument('--rnn_size', default=256, type=int, help='batch_size') # parser.add_argument('--enable_fv1', default=True, type=bool, help='enable_fv1') # parser.add_argument('--enable_fv2', default=True, type=bool, help='enable_fv2') # args = parser.parse_args() hparams.prenet_layer1 = args.prenet_layer1 hparams.prenet_layer2 = args.prenet_layer2 hparams.gru_size = args.gru_size hparams.attention_size = args.attention_size hparams.rnn_size = args.rnn_size hparams.enable_fv1 = args.enable_fv1 hparams.enable_fv2 = args.enable_fv2 synthesizer = Synthesizer(hparams) synthesizer.load(args.model_path) for person_id in os.listdir(args.wav_path): #log_dir = os.path.join(args.base_dir, 'logs-%s-%s' % (run_name, args.description)) os.makedirs(os.path.join(args.output_dir, args.output_prefix + person_id), exist_ok=True) current_dir = os.path.join(args.output_dir, args.output_prefix + person_id) mel_spectrograms = [] for wav_file in os.listdir(os.path.join(args.wav_path, person_id)): # Load the audio to a numpy array: wav = audio.load_wav( os.path.join(args.wav_path, person_id, wav_file)) # Compute the linear-scale spectrogram from the wav: # spectrogram = audio.spectrogram(wav).astype(np.float32) # Compute a mel-scale spectrogram from the wav: mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T mel_spectrograms.append(mel_spectrogram) print(wav_file) print(np.shape(mel_spectrogram)) print(np.shape(mel_spectrograms)) mel_spectrograms = _prepare_targets(mel_spectrograms, 1) for text in sentences: wav, alignment = synthesizer.synthesize(text=text, mel_spec=mel_spectrograms) print(alignment.shape) plot.plot_alignment(alignment, os.path.join(current_dir, text + '.png')) out = os.path.join(current_dir, text + '.wav') audio.save_wav(wav, out)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # graph with tf.Graph().as_default(), tf.device('/cpu:0'): #new attributes of hparams #hparams.num_GPU = len(GPUs_id) #hparams.datasets = eval(args.datasets) hparams.datasets = eval(args.datasets) hparams.prenet_layer1 = args.prenet_layer1 hparams.prenet_layer2 = args.prenet_layer2 hparams.gru_size = args.gru_size hparams.attention_size = args.attention_size hparams.rnn_size = args.rnn_size hparams.enable_fv1 = args.enable_fv1 hparams.enable_fv2 = args.enable_fv2 if args.batch_size: hparams.batch_size = args.batch_size # Multi-GPU settings GPUs_id = eval(args.GPUs_id) hparams.num_GPU = len(GPUs_id) tower_grads = [] tower_loss = [] models = [] global_step = tf.Variable(-1, name='global_step', trainable=False) if hparams.decay_learning_rate: learning_rate = _learning_rate_decay(hparams.initial_learning_rate, global_step, hparams.num_GPU) else: learning_rate = tf.convert_to_tensor(hparams.initial_learning_rate) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: input_path = os.path.join(args.base_dir, args.input) feeder = DataFeeder(coord, input_path, hparams) inputs = feeder.inputs inputs = tf.split(inputs, hparams.num_GPU, 0) input_lengths = feeder.input_lengths input_lengths = tf.split(input_lengths, hparams.num_GPU, 0) mel_targets = feeder.mel_targets mel_targets = tf.split(mel_targets, hparams.num_GPU, 0) linear_targets = feeder.linear_targets linear_targets = tf.split(linear_targets, hparams.num_GPU, 0) # Set up model: with tf.variable_scope('model') as scope: optimizer = tf.train.AdamOptimizer(learning_rate, hparams.adam_beta1, hparams.adam_beta2) for i, GPU_id in enumerate(GPUs_id): with tf.device('/gpu:%d' % GPU_id): with tf.name_scope('GPU_%d' % GPU_id): if hparams.enable_fv1 or hparams.enable_fv2: net = ResCNN(data=mel_targets[i], batch_size=hparams.batch_size, hyparam=hparams) net.inference() voice_print_feature = tf.reduce_mean( net.features, 0) else: voice_print_feature = None models.append(None) models[i] = create_model(args.model, hparams) models[i].initialize( inputs=inputs[i], input_lengths=input_lengths[i], mel_targets=mel_targets[i], linear_targets=linear_targets[i], voice_print_feature=voice_print_feature) models[i].add_loss() """L2 weight decay loss.""" if args.weight_decay > 0: costs = [] for var in tf.trainable_variables(): #if var.op.name.find(r'DW') > 0: costs.append(tf.nn.l2_loss(var)) # tf.summary.histogram(var.op.name, var) weight_decay = tf.cast(args.weight_decay, tf.float32) cost = models[i].loss models[i].loss += tf.multiply( weight_decay, tf.add_n(costs)) cost_pure_wd = tf.multiply(weight_decay, tf.add_n(costs)) else: cost = models[i].loss cost_pure_wd = tf.constant([0]) tower_loss.append(models[i].loss) tf.get_variable_scope().reuse_variables() models[i].add_optimizer(global_step, optimizer) tower_grads.append(models[i].gradients) # calculate average gradient gradients = average_gradients(tower_grads) stats = add_stats(models[0], gradients, learning_rate) time.sleep(10) # apply average gradient with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): apply_gradient_op = optimizer.apply_gradients( gradients, global_step=global_step) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() model = models[0] step, loss, opt, loss_wd, loss_pure_wd = sess.run([ global_step, cost, apply_gradient_op, model.loss, cost_pure_wd ]) feeder._batch_in_queue -= 1 log('feed._batch_in_queue: %s' % str(feeder._batch_in_queue), slack=True) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, loss_wd=%.05f, loss_pure_wd=%.05f]' % ( step, time_window.average, loss, loss_window.average, loss_wd, loss_pure_wd) log(message, slack=(step % args.checkpoint_interval == 0)) #if the gradient seems to explode, then restore to the previous step if loss > 2 * loss_window.average or math.isnan(loss): log('recover to the previous checkpoint') #tf.reset_default_graph() restore_step = int( (step - 10) / args.checkpoint_interval ) * args.checkpoint_interval restore_path = '%s-%d' % (checkpoint_path, restore_step) saver.restore(sess, restore_path) continue if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') try: if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) except: pass if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, feeder.stop_token_targets, global_step) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True run_options = None #tf.RunOptions(report_tensor_allocations_upon_oom = True) with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. checkpoint_state = tf.train.get_checkpoint_state(log_dir) restore_path = '%s-%d' % (checkpoint_path, args.restore_step) if checkpoint_state is not None: saver.restore(sess, checkpoint_state.model_checkpoint_path) log('Resuming from checkpoint: %s at commit: %s' % (checkpoint_state.model_checkpoint_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) tf.train.write_graph(sess.graph.as_graph_def(), '.', os.path.join(log_dir, 'tacotron_model.pbtxt'), as_text=True) tf.train.write_graph(sess.graph.as_graph_def(), '.', os.path.join(log_dir, 'tacotron_model.pb'), as_text=False) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize], options=run_options) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, mel_outputs, mel_t, alignment = sess.run( [ model.inputs[0], model.linear_outputs[0], model.mel_outputs[0], model.mel_targets[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) plot.plot_spectrogram( mel_outputs, os.path.join( log_dir, 'step-{}-eval-mel-spectrogram.png'.format(step)), title='{}, {}, step={}, loss={:.5f}'.format( args.model, time_string(), step, loss), target_spectrogram=mel_t) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) parent_id = args.pid log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) if parent_id: log('Downloading model files from drive') download_checkpoints(parent_id) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = '%s |Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( time.asctime(), step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: list_files = [ os.path.join(log_dir, 'checkpoint'), os.path.join(log_dir, 'train.log') ] #files to be uploaded to drive log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) prefix = saver.save(sess, checkpoint_path, global_step=step) list_files.extend(glob.glob(prefix + '.*')) list_files.extend( glob.glob(os.path.join(log_dir, 'events.*'))) try: log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) info = '\n'.join( textwrap.wrap( '%s, %s, %s, %s, step=%d, loss=%.5f' % (sequence_to_text(input_seq), args.model, commit, time_string(), step, loss), 70, break_long_words=False)) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join( log_dir, 'step-%d-align.png' % step), info=info) log('Input: %s' % sequence_to_text(input_seq)) list_files.append( os.path.join(log_dir, 'step-%d-audio.wav' % step)) list_files.append( os.path.join(log_dir, 'step-%d-align.png' % step)) except Exception as e: log(str(e)) print(e) if parent_id: try: upload_to_drive(list_files, parent_id) except Exception as e: print(e) with open('drive_log.txt', 'a') as ferr: ferr.write('\n\n\n' + time.asctime()) ferr.write('\n' + ', '.join(list_files)) ferr.write('\n' + str(e)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') DATA_PATH = {'bznsyp': "BZNSYP", 'ljspeech': "LJSpeech-1.1"}[args.dataset] input_path = os.path.join(args.base_dir, 'DATA', DATA_PATH, 'training', 'train.txt') log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.lpc_targets, feeder.stop_token_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=999, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. checkpoint_state = tf.train.get_checkpoint_state(log_dir) # restore_path = '%s-%d' % (checkpoint_path, args.restore_step) if checkpoint_state is not None: saver.restore(sess, checkpoint_state.model_checkpoint_path) log('Resuming from checkpoint: %s at commit: %s' % (checkpoint_state.model_checkpoint_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) if args.restore_decoder: models = [ f for f in os.listdir('pretrain') if f.find('.meta') != -1 ] decoder_ckpt_path = os.path.join( 'pretrain', models[0].replace('.meta', '')) global_vars = tf.global_variables() var_list = [] valid_scope = [ 'model/inference/decoder', 'model/inference/post_cbhg', 'model/inference/dense', 'model/inference/memory_layer' ] for v in global_vars: if v.name.find('attention') != -1: continue if v.name.find('Attention') != -1: continue for scope in valid_scope: if v.name.startswith(scope): var_list.append(v) decoder_saver = tf.train.Saver(var_list) decoder_saver.restore(sess, decoder_ckpt_path) print('restore pretrained decoder ...') feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, lpc_targets, alignment = sess.run([ model.inputs[0], model.lpc_outputs[0], model.alignments[0] ]) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) np.save(os.path.join(log_dir, 'step-%d-lpc.npy' % step), lpc_targets) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = './jenie_Processed/amused/' log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) restore_path = './Trained Weights/model.ckpt-lj' print(restore_path) saver.restore(sess, restore_path) log('[INFO] Resuming from checkpoint: %s ' % (restore_path)) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step)) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) if args.model == 'tacotron2': model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, global_step=global_step) else: model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step > 0: ckpt = '%s-%d' % (checkpoint_path, args.restore_step) else: ckpt = tf.train.latest_checkpoint(log_dir) if ckpt: log('Resuming from checkpoint: %s at commit: %s' % (ckpt, commit), slack=True) saver.restore(sess, ckpt) else: log('Starting new training run at commit: %s' % commit, slack=True) sess.run(tf.global_variables_initializer()) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, lr, opt = sess.run([global_step, model.loss, model.learning_rate, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, lr=%.05f]' % ( step, time_window.average, loss, loss_window.average, lr) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, target, alignment = sess.run([ model.inputs[0], model.mel_outputs[0], model.mel_targets[0], model.alignments[0]]) waveform = audio.inv_mel_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % ( args.model, commit, time_string(), step, loss)) plot.plot_spectrogram(spectrogram, os.path.join(log_dir, 'step-{}-mel-spectrogram.png'.format(step)), info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, loss), target_spectrogram=target, max_len=None) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, args.data_paths, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.target_lengths, feeder.prefixes, feeder.speaker_ids, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment, prefix, input_length, target_length = sess.run( [ model.inputs[0], model.linear_outputs[0], model.alignments[0], model.prefixes[0], model.input_lengths[0], model.target_lengths[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join( log_dir, 'step-%d-%s.wav' % (step, prefix.decode()))) plot.plot_alignment( alignment[alignment.shape[0] - input_length:, :target_length + 1], os.path.join( log_dir, 'step-%d-%s.png' % (step, prefix.decode())), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)