def save_states(global_step, mel_outputs, linear_outputs, attn, y, input_lengths, checkpoint_dir=None): print("Save intermediate states at step {}".format(global_step)) # idx = np.random.randint(0, len(input_lengths)) idx = min(1, len(input_lengths) - 1) input_length = input_lengths[idx] # Alignment path = join(checkpoint_dir, "step{}_alignment.png".format(global_step)) # alignment = attn[idx].cpu().data.numpy()[:, :input_length] alignment = attn[idx].cpu().data.numpy() save_alignment(path, alignment) # Predicted spectrogram path = join(checkpoint_dir, "step{}_predicted_spectrogram.png".format(global_step)) linear_output = linear_outputs[idx].cpu().data.numpy() save_spectrogram(path, linear_output) # Predicted audio signal signal = audio.inv_spectrogram(linear_output.T) path = join(checkpoint_dir, "step{}_predicted.wav".format(global_step)) audio.save_wav(signal, path) # Target spectrogram path = join(checkpoint_dir, "step{}_target_spectrogram.png".format(global_step)) linear_output = y[idx].cpu().data.numpy() save_spectrogram(path, linear_output)
def tts(model, text): """Convert text to speech waveform given a Tacotron model. """ if use_cuda: model = model.cuda() # TODO: Turning off dropout of decoder's prenet causes serious performance # regression, not sure why. # model.decoder.eval() model.encoder.eval() model.postnet.eval() sequence = np.array(text_to_sequence(text, [hparams.cleaners])) sequence = Variable(torch.from_numpy(sequence)).unsqueeze(0) if use_cuda: sequence = sequence.cuda() # Greedy decoding mel_outputs, linear_outputs, alignments = model(sequence) linear_output = linear_outputs[0].cpu().data.numpy() spectrogram = audio._denormalize(linear_output) alignment = alignments[0].cpu().data.numpy() # Predicted audio signal waveform = audio.inv_spectrogram(linear_output.T) return waveform, alignment, spectrogram
def synthesize(self, text, save_path=None): seq = textinput.to_sequence( text, force_lowercase=hparams.force_lowercase, expand_abbreviations=hparams.expand_abbreviations) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } spec = self.session.run(self.model.linear_outputs[0], feed_dict=feed_dict) if save_path is not None: out = save_path audio.save_wav(audio.inv_spectrogram(spec.T), out) return out else: out = io.BytesIO() audio.save_wav(audio.inv_spectrogram(spec.T), out) return out.getvalue()
def synthesize(self, text): with chainer.using_config('train', False): seq = textinput.to_sequence( text, force_lowercase=hparams.force_lowercase, expand_abbreviations=hparams.expand_abbreviations) spec = self.model.output(seq) out = io.BytesIO() audio.save_wav(audio.inv_spectrogram(spec.T), out) return out.getvalue()
def synthesize(self, text): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } spec = self.session.run(self.model.linear_outputs[0], feed_dict=feed_dict) out = io.BytesIO() audio.save_wav(audio.inv_spectrogram(spec.T), out) return out.getvalue()
def save_audio(): # model instance has spectrogram data which was processed last spectrogram = model.spectrogram #TODO: change this specification waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join( log_dir, 'iteration_{.updater.iteration}-audio.wav'.format(trainer))) plot.plot_alignment( alignment, os.path.join( log_dir, 'iteration_{.updater.iteration}-align.png'.format(trainer)), info='%s, %s, %s, iteration_{.updater.iteration}, loss=%.5f'. format(args.model, commit, time_string(), trainer, loss)) log('Input: %s' % textinput.to_string(input_seq))
def synthesize(self, text): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } # wav = self.session.run(self.wav_output, feed_dict=feed_dict) # wav = audio.inv_preemphasis(wav) # wav = wav[:audio.find_endpoint(wav)] linear = self.session.run(self.linear_output, feed_dict=feed_dict) linear = linear[:audio. find_endpoint_spectrogram(linear, threshold_db=-10)] wav = audio.inv_spectrogram(linear.T) wav = wav[:audio.find_endpoint(wav)] out = io.BytesIO() audio.save_wav(wav, out) return out.getvalue()
def synthesize(self, text1, text2): seq1 = textinput_fr.to_sequence( text1, force_lowercase=hparams.force_lowercase, expand_abbreviations=hparams.expand_abbreviations) seq2 = textinput_fr.to_sequence( text2, force_lowercase=hparams.force_lowercase, expand_abbreviations=False) feed_dict = { self.model.inputs1: [np.asarray(seq1, dtype=np.int32)], self.model.input_lengths1: np.asarray([len(seq1)], dtype=np.int32), self.model.inputs2: [np.asarray(seq2, dtype=np.int32)], self.model.input_lengths2: np.asarray([len(seq2)], dtype=np.int32) } spec, alignments1, alignments2 = self.session.run([ self.model.linear_outputs[0], self.model.alignments1[0], self.model.alignments2[0] ], feed_dict=feed_dict) out = io.BytesIO() audio.save_wav(audio.inv_spectrogram(spec.T), out) return out.getvalue(), alignments1, alignments2
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') log('Checkpoint path: %s' % checkpoint_path) log('Using model: %s' % args.model) log(hparams_debug_string()) sequence_to_text = sequence_to_text2 with tf.Graph().as_default(), tf.device('/cpu:0'): # Multi-GPU settings GPUs_id = eval(args.GPUs_id) num_GPU = len(GPUs_id) hparams.num_GPU = num_GPU models = [] # Set up DataFeeder: coord = tf.train.Coordinator() if args.data_type == 'tfrecord': with open('./train_data_dict.json', 'r') as f: train_data_dict = json.load(f) train_data = args.train_data.split(',') file_list = [] pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+' id_num = 0 for item in train_data: file_list.append(train_data_dict[item]) id_num += int(re.findall(pattern, train_data_dict[item])[0]) log('train data:%s' % args.train_data) feeder = DataFeeder_tfrecord(hparams, file_list) inputs, input_lengths, linear_targets, mel_targets, n_frames, wavs, identities = feeder._get_batch_input( ) elif args.data_type == 'npy': with open('./train_npy_data_dict.json', 'r') as f: train_data_dict = json.load(f) train_data = args.train_data.split(',') file_list = [] pattern = '[.]*\\_id\\_num\\_([0-9]+)[.]+' id_num = 0 for item in train_data: file_list.append(train_data_dict[item]) id_num += int(re.findall(pattern, train_data_dict[item])[0]) log('train data:%s' % args.train_data) feeder = DataFeeder_npy(hparams, file_list, coord) inputs = feeder.inputs input_lengths = feeder.input_lengths mel_targets = feeder.mel_targets linear_targets = feeder.linear_targets wavs = feeder.wavs identities = feeder.identities else: raise ('not spificied the input data type') # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: for i, GPU_id in enumerate(GPUs_id): with tf.device('/gpu:%d' % GPU_id): with tf.name_scope('GPU_%d' % GPU_id): models.append(None) models[i] = create_model(args.model, hparams) models[i].initialize(inputs=inputs, input_lengths=input_lengths, mel_targets=mel_targets, linear_targets=linear_targets, identities=identities, id_num=id_num) models[i].add_loss() models[i].add_optimizer(global_step) stats = add_stats(models[i]) # Bookkeeping: step = 0 time_window = ValueWindow(250) loss_window = ValueWindow(1000) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=8) # Train! config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s' % restore_path) else: log('Starting new training run') if args.data_type == 'tfrecord': tf.train.start_queue_runners(sess=sess, coord=coord) feeder.start_threads(sess=sess, coord=coord) elif args.data_type == 'npy': feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt, loss_regularity = sess.run([ global_step, models[0].loss, models[0].optimize, models[0].loss_regularity, ]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f avg_sec/step, loss=%.05f, avg_loss=%.05f, lossw=%.05f]' % ( step, time_window.average, loss, loss_window.average, loss_regularity) log(message) # if the gradient seems to explode, then restore to the previous step if loss > 2 * loss_window.average or math.isnan(loss): log('recover to the previous checkpoint') restore_step = int( (step - 10) / args.checkpoint_interval ) * args.checkpoint_interval restore_path = '%s-%d' % (checkpoint_path, restore_step) saver.restore(sess, restore_path) continue if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: crrt_dir = os.path.join(log_dir, str(step)) os.makedirs(crrt_dir, exist_ok=True) log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment, wav_original, melspectogram, spec_original, mel_original, \ identity2 = sess.run([models[0].inputs[0], models[0].linear_outputs[0], models[0].alignments[0], wavs[0],models[0].mel_outputs[0], linear_targets[0], mel_targets[0], identities[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(crrt_dir, 'step-%d-audio.wav' % step)) audio.save_wav( wav_original, os.path.join( crrt_dir, 'step-%d-audio-original-%d.wav' % (step, identity2))) np.save(os.path.join(crrt_dir, 'spec.npy'), spectrogram, allow_pickle=False) np.save(os.path.join(crrt_dir, 'melspectogram.npy'), melspectogram, allow_pickle=False) np.save(os.path.join(crrt_dir, 'spec_original.npy'), spec_original, allow_pickle=False) np.save(os.path.join(crrt_dir, 'mel_original.npy'), mel_original, allow_pickle=False) plot.plot_alignment( alignment, os.path.join(crrt_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) #提取alignment, 看看对其效果如何 transition_params = [] for i in range(alignment.shape[0]): transition_params.append([]) for j in range(alignment.shape[0]): if i == j or j - i == 1: transition_params[-1].append(500) else: transition_params[-1].append(0.0) alignment[0][0] = 100000 alignment2 = np.argmax(alignment, axis=0) alignment3 = tf.contrib.crf.viterbi_decode( alignment.T, transition_params) alignment4 = np.zeros(alignment.shape) for i, item in enumerate(alignment3[0]): alignment4[item, i] = 1 plot.plot_alignment( alignment4, os.path.join(crrt_dir, 'step-%d-align2.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) crrt = 0 sample_crrt = 0 sample_last = 0 for i, item in enumerate(alignment3[0]): if item == crrt: sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step\ / 1000 if not item == crrt: crrt += 1 sample_crrt = int(sample_crrt) sample_last = int(sample_last) wav_crrt = waveform[:sample_crrt] wav_crrt2 = waveform[sample_last:sample_crrt] audio.save_wav( wav_crrt, os.path.join(crrt_dir, '%d.wav' % crrt)) audio.save_wav( wav_crrt2, os.path.join(crrt_dir, '%d-2.wav' % crrt)) sample_last = sample_crrt sample_crrt += hparams.sample_rate * hparams.frame_shift_ms * hparams.outputs_per_step \ / 1000 input_seq2 = [] input_seq3 = [] for item in alignment2: input_seq2.append(input_seq[item]) for item in alignment3[0]: input_seq3.append(input_seq[item]) #output alignment path_align1 = os.path.join(crrt_dir, 'step-%d-align1.txt' % step) path_align2 = os.path.join(crrt_dir, 'step-%d-align2.txt' % step) path_align3 = os.path.join(crrt_dir, 'step-%d-align3.txt' % step) path_seq1 = os.path.join(crrt_dir, 'step-%d-input1.txt' % step) path_seq2 = os.path.join(crrt_dir, 'step-%d-input2.txt' % step) path_seq3 = os.path.join(crrt_dir, 'step-%d-input3.txt' % step) with open(path_align1, 'w') as f: for row in alignment: for item in row: f.write('%.3f' % item) f.write('\t') f.write('\n') with open(path_align2, 'w') as f: for item in alignment2: f.write('%.3f' % item) f.write('\t') with open(path_align3, 'w') as f: for item in alignment3[0]: f.write('%.3f' % item) f.write('\t') with open(path_seq1, 'w') as f: f.write(sequence_to_text(input_seq)) with open(path_seq2, 'w') as f: f.write(sequence_to_text(input_seq2)) with open(path_seq3, 'w') as f: f.write(sequence_to_text(input_seq3)) log('Input: %s' % sequence_to_text(input_seq)) log('Input: %s' % str(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e) traceback.print_exc() coord.request_stop(e)
def train(log_dir, input_path, checkpoint_path, is_restore): # Log the info log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model('tacotron', hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if is_restore: # Restore from a checkpoint if the user requested it. restore_path = '%s' % (checkpoint_path) saver.restore(sess, restore_path) log('Resuming from checkpoint') else: log('Starting new training') feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_interval = time.time() - start_time message = 'Step %d, %.03f sec, loss=%.05f' % (step, loss, time_interval) log(message) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % hparams.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % hparams.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % ('tacotron', time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) coord.request_stop(e)
import numpy as np from util import audio spectrogram = np.load('/home/toan/tacotron/training/ljspeech-spec-00844.npy') wav = audio.inv_spectrogram(spectrogram.T) audio.save_wav(wav, '/home/toan/tacotron/test.wav')
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # graph with tf.Graph().as_default(), tf.device('/cpu:0'): #new attributes of hparams #hparams.num_GPU = len(GPUs_id) #hparams.datasets = eval(args.datasets) hparams.datasets = eval(args.datasets) hparams.prenet_layer1 = args.prenet_layer1 hparams.prenet_layer2 = args.prenet_layer2 hparams.gru_size = args.gru_size hparams.attention_size = args.attention_size hparams.rnn_size = args.rnn_size hparams.enable_fv1 = args.enable_fv1 hparams.enable_fv2 = args.enable_fv2 if args.batch_size: hparams.batch_size = args.batch_size # Multi-GPU settings GPUs_id = eval(args.GPUs_id) hparams.num_GPU = len(GPUs_id) tower_grads = [] tower_loss = [] models = [] global_step = tf.Variable(-1, name='global_step', trainable=False) if hparams.decay_learning_rate: learning_rate = _learning_rate_decay(hparams.initial_learning_rate, global_step, hparams.num_GPU) else: learning_rate = tf.convert_to_tensor(hparams.initial_learning_rate) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: input_path = os.path.join(args.base_dir, args.input) feeder = DataFeeder(coord, input_path, hparams) inputs = feeder.inputs inputs = tf.split(inputs, hparams.num_GPU, 0) input_lengths = feeder.input_lengths input_lengths = tf.split(input_lengths, hparams.num_GPU, 0) mel_targets = feeder.mel_targets mel_targets = tf.split(mel_targets, hparams.num_GPU, 0) linear_targets = feeder.linear_targets linear_targets = tf.split(linear_targets, hparams.num_GPU, 0) # Set up model: with tf.variable_scope('model') as scope: optimizer = tf.train.AdamOptimizer(learning_rate, hparams.adam_beta1, hparams.adam_beta2) for i, GPU_id in enumerate(GPUs_id): with tf.device('/gpu:%d' % GPU_id): with tf.name_scope('GPU_%d' % GPU_id): if hparams.enable_fv1 or hparams.enable_fv2: net = ResCNN(data=mel_targets[i], batch_size=hparams.batch_size, hyparam=hparams) net.inference() voice_print_feature = tf.reduce_mean( net.features, 0) else: voice_print_feature = None models.append(None) models[i] = create_model(args.model, hparams) models[i].initialize( inputs=inputs[i], input_lengths=input_lengths[i], mel_targets=mel_targets[i], linear_targets=linear_targets[i], voice_print_feature=voice_print_feature) models[i].add_loss() """L2 weight decay loss.""" if args.weight_decay > 0: costs = [] for var in tf.trainable_variables(): #if var.op.name.find(r'DW') > 0: costs.append(tf.nn.l2_loss(var)) # tf.summary.histogram(var.op.name, var) weight_decay = tf.cast(args.weight_decay, tf.float32) cost = models[i].loss models[i].loss += tf.multiply( weight_decay, tf.add_n(costs)) cost_pure_wd = tf.multiply(weight_decay, tf.add_n(costs)) else: cost = models[i].loss cost_pure_wd = tf.constant([0]) tower_loss.append(models[i].loss) tf.get_variable_scope().reuse_variables() models[i].add_optimizer(global_step, optimizer) tower_grads.append(models[i].gradients) # calculate average gradient gradients = average_gradients(tower_grads) stats = add_stats(models[0], gradients, learning_rate) time.sleep(10) # apply average gradient with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): apply_gradient_op = optimizer.apply_gradients( gradients, global_step=global_step) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() model = models[0] step, loss, opt, loss_wd, loss_pure_wd = sess.run([ global_step, cost, apply_gradient_op, model.loss, cost_pure_wd ]) feeder._batch_in_queue -= 1 log('feed._batch_in_queue: %s' % str(feeder._batch_in_queue), slack=True) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, loss_wd=%.05f, loss_pure_wd=%.05f]' % ( step, time_window.average, loss, loss_window.average, loss_wd, loss_pure_wd) log(message, slack=(step % args.checkpoint_interval == 0)) #if the gradient seems to explode, then restore to the previous step if loss > 2 * loss_window.average or math.isnan(loss): log('recover to the previous checkpoint') #tf.reset_default_graph() restore_step = int( (step - 10) / args.checkpoint_interval ) * args.checkpoint_interval restore_path = '%s-%d' % (checkpoint_path, restore_step) saver.restore(sess, restore_path) continue if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') try: if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) except: pass if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) # 显示模型的路径信息 log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # 初始化模型 global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = Tacotron(hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, feeder.stop_token_targets, global_step) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=1) # 开始训练 with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if step % args.summary_interval == 0: summary_writer.add_summary(sess.run(stats), step) # 每隔一定的训练步数生成检查点 if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) # 合成样音 audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) time_string = datetime.now().strftime('%Y-%m-%d %H:%M') # 画Encoder-Decoder对齐图 infolog.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string, step, loss)) # 显示合成样音的文本 log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args, trans_ckpt_dir=None): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') if trans_ckpt_dir != None: trans_checkpoint_path = os.path.join(trans_ckpt_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % trans_checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (trans_checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): run_name = args.name or args.model log_dir = os.path.join(args.base_dir, 'logs-%s' % run_name) os.makedirs(log_dir, exist_ok=True) infolog.init(os.path.join(log_dir, 'train.log'), run_name, args.slack_url) checkpoint_path = os.path.join(log_dir, 'model.ckpt') with open(args.input, encoding='utf-8') as f: metadata = [row.strip().split('|') for row in f] metadata = sorted(metadata, key=lambda x: x[2]) data_element = get_dataset(metadata, args.data_dir, hparams) global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(data_element['input'], data_element['input_lengths'], data_element['mel_targets'], data_element['linear_targets']) model.add_loss() model.add_optimizer(global_step) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) sess = tf.Session() sess.run(tf.global_variables_initializer()) for _ in range(int(args.max_iter)): start_time = time.time() step, mel_loss, lin_loss, loss, opt = sess.run([ global_step, model.mel_loss, model.linear_loss, model.loss, model.optimize ]) end_time = time.time() message = 'Step %7d [%.03f sec/step, loss = %.05f (mel : %.05f + lin : %.05f)]' % ( step, end_time - start_time, loss, mel_loss, lin_loss) log(message) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step)) raise Exception('Loss Exploded') if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, step=%d, loss=%.5f' % (args.model, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq))
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') ## input path is lists of both postive path and negtiva path input_path_pos = os.path.join(args.base_dir, args.input_pos) input_path_neg = os.path.join(args.base_dir, args.input_neg) log('Checkpoint path: %s' % checkpoint_path) log('Loading positive training data from: %s' % input_path_pos) log('Loading negative training data from: %s' % input_path_neg) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path_pos, input_path_neg, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs_pos, feeder.input_lengths_pos, feeder.mel_targets_pos, feeder.linear_targets_pos, feeder.mel_targets_neg, feeder.linear_targets_neg, feeder.labels_pos, feeder.labels_neg) model.add_loss() model.add_optimizer(global_step) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: #summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() # train d sess.run(model.d_optimize) # train g step, rec_loss, style_loss, d_loss, g_loss, _ = sess.run([ global_step, model.rec_loss, model.style_loss, model.d_loss, model.g_loss, model.g_optimize ]) time_window.append(time.time() - start_time) message = 'Step %-7d [%.03f sec/step, rec_loss=%.05f, style_loss=%.05f, d_loss=%.05f, g_loss=%.05f]' % ( step, time_window.average, rec_loss, style_loss, d_loss, g_loss) log(message, slack=(step % args.checkpoint_interval == 0)) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram_pos, spectrogram_neg, alignment_pos, alignment_neg = sess.run( [ model.inputs[0], model.linear_outputs_pos[0], model.linear_outputs_neg[0], model.alignments_pos[0], model.alignments_neg[0] ]) waveform_pos = audio.inv_spectrogram(spectrogram_pos.T) waveform_neg = audio.inv_spectrogram(spectrogram_neg.T) audio.save_wav( waveform_pos, os.path.join(log_dir, 'step-%d-audio_pos.wav' % step)) audio.save_wav( waveform_neg, os.path.join(log_dir, 'step-%d-audio_neg.wav' % step)) plot.plot_alignment( alignment_pos, os.path.join(log_dir, 'step-%d-align_pos.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, rec_loss)) plot.plot_alignment( alignment_neg, os.path.join(log_dir, 'step-%d-align_neg.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, rec_loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def testing(): spectrogram = np.load('olli-mel-00001.npy') wav = audio.inv_spectrogram(spectrogram.T) audio.save_wav(wav, 'test-audio.wav')
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as _: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as _: model = create_model(args.model, hparams) model.initialize(feeder.inputs, args.vgg19_pretrained_model, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: time_window = ValueWindow() loss_window = ValueWindow() saver = tf.train.Saver(keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: train_start_time = time.time() summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) checkpoint_saver = tf.train.import_meta_graph( '%s.%s' % (restore_path, 'meta')) checkpoint_saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit)) else: log('Starting new training run at commit: %s' % commit) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.summary_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step)) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio...') _, spectrogram, _ = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio_path = os.path.join(log_dir, 'step-%d-audio.wav' % step) audio.save_wav(waveform, audio_path) infolog.upload_to_slack(audio_path, step) time_so_far = time.time() - train_start_time hrs, rest = divmod(time_so_far, 3600) min, secs = divmod(rest, 60) log('{:.0f} hrs, {:.0f}mins and {:.1f}sec since the training process began' .format(hrs, min, secs)) if asked_to_stop(step): coord.request_stop() except Exception as e: log('@channel: Exiting due to exception: %s' % e) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args, input): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.variant) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: if args.eal_dir: from tacotron.datafeeder import DataFeeder_EAL feeder = DataFeeder_EAL(coord, input_path, hparams, args.eal_dir) else: from tacotron.datafeeder import DataFeeder feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.variant, hparams) if args.eal_dir: model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, feeder.pml_targets, is_training=True, eal=True, locked_alignments=feeder.locked_alignments, flag_trainAlign=args.eal_trainAlign, flag_trainJoint=args.eal_trainJoint, alignScale=args.eal_alignScale) else: model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets, feeder.pml_targets, is_training=True, gta=True) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model, eal_dir=args.eal_dir) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Set up fixed alignment synthesizer alignment_synth = AlignmentSynthesizer() # Set up text for synthesis fixed_sentence = 'Scientists at the CERN laboratory say they have discovered a new particle.' # Set up denormalisation parameters for synthesis mean_path = os.path.abspath(os.path.join(args.base_dir, input, '..', 'pml_data/mean.dat')) std_path = os.path.abspath(os.path.join(args.base_dir, input, '..', 'pml_data/std.dat')) log('Loading normalisation mean from: {}'.format(mean_path)) log('Loading normalisation standard deviation from: {}'.format(std_path)) mean_norm = None std_norm = None if os.path.isfile(mean_path) and os.path.isfile(std_path): mean_norm = np.fromfile(mean_path, 'float32') std_norm = np.fromfile(std_path, 'float32') # Train! # import pdb # flag_pdb = False # pdb.set_trace() # args.checkpoint_interval = 2 # args.num_steps = 5 with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) # pdb.set_trace() if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) elif args.eal_dir and args.eal_ckpt: if args.eal_trainAlign or args.eal_trainJoint: list_var = tf.trainable_variables() + [v for v in tf.global_variables() if 'moving' in v.name] saver_eal = tf.train.Saver(list_var) saver_eal.restore(sess, args.eal_ckpt) log('Loaded weights and batchNorm cache of checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True) elif args.eal_ft: saver.restore(sess, args.eal_ckpt) log('Refining the model from checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True) else: list_var = [var for var in tf.global_variables() if 'optimizer' not in var.name] saver_eal = tf.train.Saver(list_var) saver_eal.restore(sess, args.eal_ckpt) log('Initializing the weights from checkpoint: %s at commit: %s' % (args.eal_ckpt, commit), slack=True) # args.num_steps *= 2 # sess.run(global_step.assign(0)) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) step = 0 # initialise step variable so can use in while condition while not coord.should_stop() and step <= args.num_steps: # pdb.set_trace() start_time = time.time() if args.eal_trainAlign: step, loss, loss_align, opt = sess.run([global_step, model.loss, model.loss_align, model.optimize]) # try: # step, loss, loss_align, opt, tmp_a, tmp_ar = sess.run([global_step, model.loss, model.loss_align, model.optimize, # model.alignments, model.alignments_ref]) # except: # print("Oops!",sys.exc_info()[0],"occured.") # flag_pdb = True # if flag_pdb or np.isnan(loss_align): # pdb.set_trace() # flag_pdb = False time_window.append(time.time() - start_time) loss_window.append(loss_align) message = 'Step %-7d [%.03f sec/step, loss=%.05f, loss_align=%.05f, avg_loss_align=%.05f]' % ( step, time_window.average, loss, loss_align, loss_window.average) elif args.eal_trainJoint: step, loss, loss_align, loss_joint, opt = sess.run([global_step, model.loss, model.loss_align, model.loss_joint, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss_joint) message = 'Step %-7d [%.03f sec/step, loss=%.05f, loss_align=%.05f, avg_loss_joint=%.05f]' % ( step, time_window.average, loss, loss_align, loss_window.average) else: step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') summary_elements = [] # if the model has linear spectrogram features, use them to synthesize audio if hasattr(model, 'linear_targets'): input_seq, alignment, target_spectrogram, spectrogram = sess.run([ model.inputs[0], model.alignments[0], model.linear_targets[0], model.linear_outputs[0]]) output_waveform = audio.inv_spectrogram(spectrogram.T) target_waveform = audio.inv_spectrogram(target_spectrogram.T) audio.save_wav(output_waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) audio.save_wav(target_waveform, os.path.join(log_dir, 'step-%d-target-audio.wav' % step)) # otherwise, synthesize audio from PML vocoder features elif hasattr(model, 'pml_targets'): input_seq, alignment, target_pml_features, pml_features = sess.run([ model.inputs[0], model.alignments[0], model.pml_targets[0], model.pml_outputs[0]]) cfg = Configuration(hparams.sample_rate, hparams.pml_dimension) synth = PMLSynthesizer(cfg) output_waveform = synth.pml_to_wav(pml_features, mean_norm=mean_norm, std_norm=std_norm, spec_type=hparams.spec_type) target_waveform = synth.pml_to_wav(target_pml_features, mean_norm=mean_norm, std_norm=std_norm, spec_type=hparams.spec_type) sp.wavwrite(os.path.join(log_dir, 'step-%d-target-audio.wav' % step), target_waveform, hparams.sample_rate, norm_max_ifneeded=True) sp.wavwrite(os.path.join(log_dir, 'step-%d-audio.wav' % step), output_waveform, hparams.sample_rate, norm_max_ifneeded=True) # we need to adjust the output and target waveforms so the values lie in the interval [-1.0, 1.0] output_waveform /= 1.05 * np.max(np.abs(output_waveform)) target_waveform /= 1.05 * np.max(np.abs(target_waveform)) summary_elements.append( tf.summary.audio('ideal-%d' % step, np.expand_dims(target_waveform, 0), hparams.sample_rate), ) summary_elements.append( tf.summary.audio('sample-%d' % step, np.expand_dims(output_waveform, 0), hparams.sample_rate), ) # get the alignment for the top sentence in the batch random_attention_plot = plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-random-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % ( args.variant, commit, time_string(), step, loss)) summary_elements.append( tf.summary.image('attention-%d' % step, random_attention_plot), ) # also process the alignment for a fixed sentence for comparison alignment_synth.load('%s-%d' % (checkpoint_path, step), hparams, model_name=args.variant) fixed_alignment = alignment_synth.synthesize(fixed_sentence) fixed_attention_plot = plot.plot_alignment(fixed_alignment, os.path.join(log_dir, 'step-%d-fixed-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % ( args.variant, commit, time_string(), step, loss)) summary_elements.append( tf.summary.image('fixed-attention-%d' % step, fixed_attention_plot), ) # save the audio and alignment to tensorboard (audio sample rate is hyperparameter) merged = sess.run(tf.summary.merge(summary_elements)) summary_writer.add_summary(merged, step) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) ps_hosts = args.ps_hosts.split(",") worker_hosts = args.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=args.job_name, task_index=args.task_index) # Block further graph execution if current node is parameter server if args.job_name == "ps": server.join() with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % args.task_index, cluster=cluster)): # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2, sharded=True) hooks = [tf.train.StopAtStepHook(last_step=1000000)] # Train! # Monitored... automatycznie wznawia z checkpointu. is_chief = (args.task_index == 0) init_op = tf.global_variables_initializer() sv = tf.train.Supervisor(is_chief=(args.task_index == 0), logdir="train_logs", init_op=init_op, summary_op=stats, saver=saver, save_model_secs=600) with sv.managed_session(server.target) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(init_op) if args.restore_step and is_chief: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0 and is_chief: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
# dirFile = '/scratch/je369/tacotron/lj-training/{}.npy'.format(name) # dirFile = '/home/dawna/tts/qd212/models/tacotron/results/logs-tacotron-bk2orig-asup/step-5-target-spec.npy' # target_spectrogram = np.load(dirFile) # log_dir = '/home/dawna/tts/qd212/models/tacotron/results/logs-tacotron-bk2orig-asup' # target_waveform = audio.inv_spectrogram(target_spectrogram.T) # audio.save_wav(target_waveform, os.path.join(log_dir, 'step-%d-target-audio.wav' % 23333333333)) # name = 'eval-0' # dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eval/%s/%s' # dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/gta/%s/%s' # dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eal/%s/%s' # dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig/eval/%s/%s' dirFile_tmp = '/home/dawna/tts/qd212/models/tacotron/results/tacotron-bk2orig-eal-scratch/eval/%s/%s' os.makedirs(dirFile_tmp % ('wav', ''), exist_ok=True) # name_list = ['LJ001-0073','LJ001-0001','LJ001-0002','LJ001-0003']: # name_list = ['eval-0','eval-1','eval-2','eval-3']: # name_list = ['LJ001-0001','LJ001-0002','LJ001-0003'] name_list = [ 'LJ001-0073', 'LJ003-0229', 'LJ003-0296', 'LJ003-0304', 'LJ004-0208' ] for name in name_list: dirFile_npy = dirFile_tmp % ('npy', name + '.npy') dirFile_wav = dirFile_tmp % ('wav', name + '.wav') target_spectrogram = np.load(dirFile_npy) target_waveform = audio.inv_spectrogram(target_spectrogram.T) audio.save_wav(target_waveform, dirFile_wav)
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args): checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s' % (restore_path), slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt, mel_loss, linear_loss = \ sess.run([global_step, model.loss, model.optimize, model.mel_loss, model.linear_loss]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f, mel_loss=%.5f, linear_loss=%.5f]' % ( step, time_window.average, loss, loss_window.average, mel_loss, linear_loss) log(message, slack=(step % args.checkpoint_interval == 0)) if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) input_seq = sequence_to_text(input_seq) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), input_seq, info='%s, step=%d, loss=%.5f' % (args.model, step, loss), istrain=1) log('Input: %s' % input_seq) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)