def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): self.x, self.y, self.z, self.num_batch = get_batch( is_training=is_training) self.decoder_inputs = shift_by_one(self.y) # Encoder self.memory = encode(self.x, is_training=is_training) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory) #, hp.n_mels, 1+hp.n_fft/2) self.outputs2 = decode2( self.outputs1, is_training=is_training) #, hp.n_mels, 1+hp.n_fft/2) # L1 loss self.loss = tf.reduce_mean(tf.abs(self.outputs1 - self.y)) +\ tf.reduce_mean(tf.abs(self.outputs2 - self.z)) if is_training: # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) # Summmary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=( None, hp.max_len, )) self.y = tf.placeholder(tf.int32, shape=( None, hp.max_len, )) # Character Embedding for x self.enc = embed(self.x, len(roma2idx), hp.embed_size, scope="emb_x") # Encoder self.memory = encode(self.enc, is_training=True) # Character Embedding for decoder_inputs self.decoder_inputs = shift_by_one(self.y) self.dec = embed(self.decoder_inputs, len(surf2idx), hp.embed_size, scope="emb_decoder_inputs") # Decoder self.outputs = decode( self.dec, self.memory, len(surf2idx), is_training=is_training) # (N, T', hp.n_mels*hp.r) self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10) self.preds = tf.arg_max(self.outputs, dimension=-1) if is_training: self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y, logits=self.outputs) self.istarget = tf.to_float( tf.not_equal(self.y, tf.zeros_like(self.y))) # masking self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / ( tf.reduce_sum(self.istarget) + 1e-5) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() self.is_training = is_training with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.y = tf.placeholder(tf.int32, shape=(None, hp.max_len)) self.decoder_inputs = embed(shift_by_one(self.y), len(char2idx), hp.embed_size) # (N, T', E) with tf.variable_scope('net'): # Encoder self.memory = encode( self.x, is_training=is_training) # (N, T, hp.n_mels*hp.r) # Decoder self.outputs = decode(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', E) self.logprobs = tf.log(tf.nn.softmax(self.outputs) + 1e-10) self.preds = tf.arg_max(self.outputs, dimension=-1) if is_training: # Loss self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y, logits=self.outputs) # Target masking self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / ( tf.reduce_sum(self.istarget) + 1e-7) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Note that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode( self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs[i], self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training ) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference( self.outputs1, self.y[i]) self.loss2 = tf.squared_difference( self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float( tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float( tf.not_equal(self.z[i], 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 self.losses.append(self.mean_loss) self.grads_and_vars = self.optimizer.compute_gradients( self.mean_loss) self.grads_and_vars_list.append( self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients( self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('loss', self.loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.decoder_inputs = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): # Build vocab if is_training: _, idx2char = learn_vocab() store_vocab(idx2char) if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1( self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1), 0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1), 0) ## images self.expected1_i = tf.expand_dims( tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims( tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims( tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims( tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i * 255) tf.summary.image("gotten_values1", self.got1_i * 255) tf.summary.image("expected_values2", self.expected2_i * 255) tf.summary.image("gotten_values2", self.got2_i * 255) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("net"): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y) self.loss2 = tf.abs(self.outputs2 - self.z) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y) self.loss2 = tf.squared_difference(self.outputs2, self.z) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean(tf.reduce_mean(self.y, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1),0) self.expected2_h = tf.reduce_mean(tf.reduce_mean(self.z, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1),0) ## images self.expected1_i = tf.expand_dims(tf.reduce_mean(self.y[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims(tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims(tf.reduce_mean(self.z[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims(tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i*255) tf.summary.image("gotten_values1", self.got1_i*255) tf.summary.image("expected_values2", self.expected2_i*255) tf.summary.image("gotten_values2", self.got2_i*255) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.z, self.num_batch = get_batch() self.decoder_inputs = shift_by_one(self.y) # Make sure that batch size was multiplied by # gpus. # Now we split the mini-batch data by # gpus. self.x = tf.split(self.x, hp.num_gpus, 0) self.y = tf.split(self.y, hp.num_gpus, 0) self.z = tf.split(self.z, hp.num_gpus, 0) self.decoder_inputs = tf.split(self.decoder_inputs, hp.num_gpus, 0) # Sequence lengths for masking self.x_lengths = tf.to_int32(tf.reduce_sum(tf.sign(tf.abs(self.x)), -1)) # (N,) self.x_masks = tf.to_float(tf.expand_dims(tf.sign(tf.abs(self.x)), -1)) # (N, T, 1) # optimizer self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.losses, self.grads_and_vars_list = [], [] for i in range(hp.num_gpus): with tf.variable_scope('net', reuse=bool(i)): with tf.device('/gpu:{}'.format(i)): with tf.name_scope('gpu_{}'.format(i)): # Encoder self.memory = encode(self.x[i], is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs[i], self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) # Loss if hp.loss_type=="l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.y[i]) self.loss2 = tf.abs(self.outputs2 - self.z[i]) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.y[i]) self.loss2 = tf.squared_difference(self.outputs2, self.z[i]) # Target masking if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.y[i], 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.z[i], 0.)) self.loss1 = tf.reduce_mean(self.loss1) self.loss2 = tf.reduce_mean(self.loss2) self.loss = self.loss1 + self.loss2 self.losses.append(self.loss) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.grads_and_vars_list.append(self.grads_and_vars) with tf.device('/cpu:0'): # Aggregate losses, then calculate average loss. self.mean_loss = tf.add_n(self.losses) / len(self.losses) #Aggregate gradients, then calculate average gradients. self.mean_grads_and_vars = [] for grads_and_vars in zip(*self.grads_and_vars_list): grads = [] for grad, var in grads_and_vars: if grad is not None: grads.append(tf.expand_dims(grad, 0)) mean_grad = tf.reduce_mean(tf.concat(grads, 0), 0) #() self.mean_grads_and_vars.append((mean_grad, var)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.train_op = self.optimizer.apply_gradients(self.mean_grads_and_vars, self.global_step) # Summmary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all() else: # Evaluation self.x = tf.placeholder(tf.int32, shape=(None, None)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r)) self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope('net'): # Encoder self.memory = encode(self.x, is_training=is_training) # (N, T, E) # Decoder self.outputs1 = decode1(self.decoder_inputs, self.memory, is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = decode2(self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.q, self.y, self.z, self.num_batch = get_batch() else: # Evaluation self.x = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) #self.decoder_inputs = shift_by_one(self.y) with tf.variable_scope("Generator"): # Encoder self.memory_gen = encode(self.q, is_training=is_training) # (N, T, E) # Decoder decode_length = int( (hp.bin_size_y[1] * hp.sr - (hp.win_length - 1)) / ((hp.hop_length) * hp.r)) # about 50 self._outputs1_gen = tf.zeros( [hp.batch_size, 1, hp.n_mels * hp.r]) outputs1_gen_list = [] for j in range(decode_length): reuse = None if j == 0 else True self._outputs1_gen += decode1(self._outputs1_gen, self.memory_gen, is_training=is_training, reuse=reuse) outputs1_gen_list.append(self._outputs1_gen) self.outputs1_gen = tf.concat(outputs1_gen_list, 1) self.outputs2_gen = decode2(self.outputs1_gen, is_training=is_training) # for b in range(hp.batch_size): #restore the linear spectrogram # s = self.outputs2_gen[b,:,:] # restore_shape(s, hp.win_length//hp.hop_length, hp.r) with tf.variable_scope("Discriminator"): self.final_state_real = encode_dis(self.z, is_training=is_training) self.final_state_fake = encode_dis(self.outputs2_gen, is_training=is_training, reuse=True) if is_training: # Discriminator Loss self.dis_loss_real = tf.reduce_mean( tf.squared_difference(self.final_state_real, 1)) self.dis_loss_fake = tf.reduce_mean( tf.squared_difference(self.final_state_fake, 0)) self.dis_loss = tf.reduce_mean(self.dis_loss_real + self.dis_loss_fake) # Generator Loss self.gen_loss = tf.reduce_mean( tf.squared_difference(self.final_state_fake, 1)) # Training Scheme dvars = [ e for e in self.graph.get_collection('trainable_variables') if 'Discriminator' in e.name ] gvars = [ e for e in self.graph.get_collection('trainable_variables') if 'Generator' in e.name ] self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) grad_d, var_d = zip(*self.optimizer.compute_gradients( self.dis_loss, var_list=dvars)) grad_d_clipped, _ = tf.clip_by_global_norm(grad_d, 5.) grad_g, var_g = zip(*self.optimizer.compute_gradients( self.gen_loss, var_list=gvars)) grad_g_clipped, _ = tf.clip_by_global_norm(grad_g, 5.) self.train_op_dis = self.optimizer.apply_gradients( zip(grad_d_clipped, var_d)) self.train_op_gen = self.optimizer.apply_gradients( zip(grad_g_clipped, var_g)) # self.train_op_dis = self.optimizer.minimize(self.dis_loss, global_step=self.global_step,var_list=dvars) # self.train_op_gen = self.optimizer.minimize(self.gen_loss, global_step=self.global_step,var_list=gvars) # Increments global step self.inc = tf.assign_add(self.global_step, 1, name='increment') # Profiling options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Summmary tf.summary.scalar('dis_loss_real', self.dis_loss_real) tf.summary.scalar('dis_loss_fake', self.dis_loss_fake) tf.summary.scalar('dis_loss', self.dis_loss) tf.summary.scalar('gen_loss', self.gen_loss) tf.summary.scalar('step', self.inc) self.merged = tf.summary.merge_all()
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: #(batch_size, ?, 258) (batch_size, ?, 400)(batch_size, ?, 5125) self.spectro, self.magnit, self.length, self.num_batch = get_batch( ) # Get data batch else: # Evaluation self.length = tf.placeholder(tf.int32, shape=(None, None)) self.spectro = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r)) self.decoder_inputs = shift_by_one( self.spectro) # this is the decoder's input with tf.variable_scope("net"): #### Encoder ipdb.set_trace() self.memory = net.encode(self.spectro, is_training=is_training) # (N, T, E) #### Decoder self.outputs1 = net.decode1( self.decoder_inputs, self.memory, # encoder RNN output is_training=is_training) # (N, T', hp.n_mels*hp.r) self.outputs2 = net.decode2( self.outputs1, is_training=is_training) # (N, T', (1+hp.n_fft//2)*hp.r) if is_training: #### Loss if hp.loss_type == "l1": # L1 loss self.loss1 = tf.abs(self.outputs1 - self.spectro) self.loss2 = tf.abs(self.outputs2 - self.magnit) else: # L2 loss self.loss1 = tf.squared_difference(self.outputs1, self.spectro) self.loss2 = tf.squared_difference(self.outputs2, self.magnit) # Target masking ### mask the loss with shape of the input length if hp.target_zeros_masking: self.loss1 *= tf.to_float(tf.not_equal(self.spectro, 0.)) self.loss2 *= tf.to_float(tf.not_equal(self.magnit, 0.)) self.mean_loss1 = tf.reduce_mean(self.loss1) self.mean_loss2 = tf.reduce_mean(self.loss2) self.mean_loss = self.mean_loss1 + self.mean_loss2 # Logging ## histograms self.expected1_h = tf.reduce_mean( tf.reduce_mean(self.spectro, -1), 0) self.got1_h = tf.reduce_mean(tf.reduce_mean(self.outputs1, -1), 0) self.expected2_h = tf.reduce_mean( tf.reduce_mean(self.magnit, -1), 0) self.got2_h = tf.reduce_mean(tf.reduce_mean(self.outputs2, -1), 0) ## images self.expected1_i = tf.expand_dims( tf.reduce_mean(self.spectro[:1], -1, keep_dims=True), 1) self.got1_i = tf.expand_dims( tf.reduce_mean(self.outputs1[:1], -1, keep_dims=True), 1) self.expected2_i = tf.expand_dims( tf.reduce_mean(self.magnit[:1], -1, keep_dims=True), 1) self.got2_i = tf.expand_dims( tf.reduce_mean(self.outputs2[:1], -1, keep_dims=True), 1) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summmary tf.summary.scalar('mean_loss1', self.mean_loss1) tf.summary.scalar('mean_loss2', self.mean_loss2) tf.summary.scalar('mean_loss', self.mean_loss) tf.summary.histogram('expected_values1', self.expected1_h) tf.summary.histogram('gotten_values1', self.got1_h) tf.summary.histogram('expected_values2', self.expected2_h) tf.summary.histogram('gotten values2', self.got2_h) tf.summary.image("expected_values1", self.expected1_i * 255) tf.summary.image("gotten_values1", self.got1_i * 255) tf.summary.image("expected_values2", self.expected2_i * 255) tf.summary.image("gotten_values2", self.got2_i * 255) self.merged = tf.summary.merge_all()