def loss(self, data): x_sp = data['sp'] x_mcc = data['mcc'] y = data['speaker'] # normalize input using mean/var x_sp_in_minmax = self.normalizers['sp']['minmax'].forward_process(x_sp) x_sp_in = tf.expand_dims(x_sp_in_minmax, 1) # insert channel dimension x_mcc_in_minmax = self.normalizers['mcc']['minmax'].forward_process( x_mcc) x_mcc_in = tf.expand_dims(x_mcc_in_minmax, 1) # insert channel dimension # forward pass # Use sp as source sp_z_mu, sp_z_lv = self.sp_enc(x_sp_in) z_sp = GaussianSampleLayer(sp_z_mu, sp_z_lv) x_sp_sp = self.sp_dec(z_sp, y) x_sp_mcc = self.mcc_dec(z_sp, y) # Use mcc as source mcc_z_mu, mcc_z_lv = self.mcc_enc(x_mcc_in) z_mcc = GaussianSampleLayer(mcc_z_mu, mcc_z_lv) x_mcc_sp = self.sp_dec(z_mcc, y) x_mcc_mcc = self.mcc_dec(z_mcc, y) # loss kl_loss_sp = kl_loss(sp_z_mu, sp_z_lv) recon_loss_sp = log_loss(x_sp_in, x_sp_sp) cross_loss_sp2mcc = log_loss(x_mcc_in, x_sp_mcc) kl_loss_mcc = kl_loss(mcc_z_mu, mcc_z_lv) recon_loss_mcc = log_loss(x_mcc_in, x_mcc_mcc) cross_loss_mcc2sp = log_loss(x_sp_in, x_mcc_sp) latent_loss = tf.reduce_mean(tf.abs(sp_z_mu - mcc_z_mu)) loss = dict() loss['D_KL'] = (kl_loss_sp + kl_loss_mcc) loss['recon'] = (recon_loss_sp + recon_loss_mcc) loss['cross'] = (cross_loss_sp2mcc + cross_loss_mcc2sp) loss['latent'] = latent_loss loss['all'] = -loss['recon'] - loss['cross'] + loss['D_KL'] + loss[ 'latent'] # summary tf.summary.scalar('KL-div-sp', kl_loss_sp) tf.summary.scalar('KL-div-mcc', kl_loss_mcc) tf.summary.scalar('reconstruction-sp', recon_loss_sp) tf.summary.scalar('reconstruction-mcc', recon_loss_mcc) tf.summary.scalar('cross-sp2mcc', cross_loss_sp2mcc) tf.summary.scalar('cross-mcc2sp', cross_loss_mcc2sp) tf.summary.scalar('latent', latent_loss) return loss
def circuit_loop(x, y): z_mu, z_lv = self._encode(x, is_training=self.is_training) z = GaussianSampleLayer(z_mu, z_lv) x_logit, x_feature = self._discriminate( x, is_training=self.is_training) xh, xh_sig_logit = self._generate(z, y, is_training=self.is_training) zh_mu, zh_lv = self._encode(xh, is_training=self.is_training) xh_logit, xh_feature = self._discriminate( xh, is_training=self.is_training) return dict( z=z, z_mu=z_mu, z_lv=z_lv, xh=xh, xh_sig_logit=xh_sig_logit, x_logit=x_logit, x_feature=x_feature, zh_mu=zh_mu, zh_lv=zh_lv, xh_logit=xh_logit, xh_feature=xh_feature, )
def loss(self, x, y): with tf.name_scope('loss'): z_mu, z_lv = self._encode(x) z = GaussianSampleLayer(z_mu, z_lv) xh = self._generate(z, y) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = tf.reduce_mean( GaussianLogDensity(slim.flatten(x), slim.flatten(xh), tf.zeros_like(slim.flatten(xh))), ) loss = dict() loss['G'] = -logPx + D_KL loss['D_KL'] = D_KL loss['logP'] = logPx tf.summary.scalar('KL-div', D_KL) tf.summary.scalar('logPx', logPx) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) return loss
def loss(self, x, y): ''' Args: x: shape=[s, b, c] y: shape=[s, b] Returns: a `dict` of losses ''' z_mu, z_lv = self._encode(x, is_training=self.is_training) z = GaussianSampleLayer(z_mu, z_lv) xh = self._decode(z, y, is_training=self.is_training) with tf.name_scope('loss'): with tf.name_scope('E_log_p_x_zy'): L_x = -1.0 * tf.reduce_mean( GaussianLogDensity(x, xh, tf.zeros_like(x)), ) with tf.name_scope('D_KL_z'): L_z = tf.reduce_mean( GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu), tf.zeros_like(z_lv))) loss = { 'L_x': L_x, 'L_z': L_z, } tf.summary.scalar('L_x', L_x) tf.summary.scalar('L_z', L_z) return loss
def loss(self, data): x = data[self.feat_type] y = data['speaker'] # normalize input using mean/var x_in_minmax = self.normalizers[ self.feat_type]['minmax'].forward_process(x) x_in = tf.expand_dims(x_in_minmax, 1) # insert channel dimension # forward pass z_mu, z_lv = self.enc(x_in) z = GaussianSampleLayer(z_mu, z_lv) xb = self.dec(z, y) # loss KL_loss = kl_loss(z_mu, z_lv) recon_loss = log_loss(x_in_minmax, xb) loss = dict() loss['D_KL'] = KL_loss loss['recon'] = recon_loss loss['all'] = -loss['recon'] + loss['D_KL'] # summary tf.summary.scalar('KL-div-sp', KL_loss) tf.summary.scalar('reconstruction-sp', recon_loss) return loss
def loss(self, x, y): with tf.name_scope('loss'): z_mu, z_lv = self._encode(x) z = GaussianSampleLayer(z_mu, z_lv) xh = self._generate(z, y) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = tf.reduce_mean( GaussianLogDensity(slim.flatten(x), slim.flatten(xh), tf.zeros_like(slim.flatten(xh))), ) dx = self._discriminate(x) dxh = self._discriminate(xh) W_dist = tf.reduce_mean(dx - dxh) g_loss = tf.reduce_mean(-dxh) batch_size = self.arch['training']['batch_size'] lam = self.arch['training']['lambda'] alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = x + alpha * (xh - x) inte_logit = self._discriminate(interpolated) gradients = tf.gradients(inte_logit, [ interpolated, ])[0] grad_l2 = tf.sqrt( tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3])) gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2) gp = lam * gradient_penalty loss = dict() alpha = self.arch['training']['alpha'] loss['l_E'] = -logPx + D_KL loss['D_KL'] = D_KL loss['logP'] = logPx loss['l_D'] = -W_dist + gp loss['l_G'] = -logPx + alpha * g_loss loss['W_dist'] = W_dist loss['gp'] = gp tf.summary.scalar('KL-div', D_KL) tf.summary.scalar('logPx', logPx) tf.summary.scalar('W_dist', W_dist) tf.summary.scalar("gp_loss", gradient_penalty) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) return loss
def loss(self, x, y): with tf.name_scope('loss'): with tf.variable_scope( "encoder") as scope: #specify variable_scope z_mu, z_lv = self.encoder( x, self.is_training) #so that to collect trainable z = GaussianSampleLayer(z_mu, z_lv) # variables with tf.variable_scope("generator") as scope: xh = self.generator(z, y, self.is_training) print("xh shape:", xh.get_shape().as_list()) #xh = self.nchw_to_nhwc(xh) print("xh shape:", xh.get_shape().as_list()) with tf.variable_scope("discriminator") as scope: #x = nchw_to_nhwc(x) disc_real, x_through_d = self.discriminator( x, self.is_training) print("disc_real shape:", disc_real.get_shape().as_list()) print("x_through_d:", x_through_d.get_shape().as_list()) disc_fake, xh_through_d = self.discriminator( xh, self.is_training) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = -tf.reduce_mean( GaussianLogDensity(x_through_d, xh_through_d, tf.zeros_like(xh_through_d)), ) loss = dict() loss['D_KL'] = D_KL loss['logP'] = logPx batch_size = self.arch['training']['batch_size'] #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1])) #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0)) gen_loss = -tf.reduce_mean(disc_fake) disc_loss = tf.reduce_mean(disc_fake - disc_real) alpha = tf.random_uniform(shape=[batch_size, 513, 1, 1], minval=0., maxval=1.) self.reuse = False #gradient penalty print("before gradient x shape:", x.get_shape().as_list()) differences = xh - x interpolates = x + (alpha * differences) print("interpolates shape:", interpolates.get_shape().as_list()) pred, inter_h = self.discriminator(interpolates, self.is_training) print("pred shape:", pred.get_shape().as_list()) gradients = tf.gradients(pred, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) disc_loss += self.arch['LAMBDA'] * gradient_penalty self.reuse = True #d_loss = disc_real_loss + disc_fake_loss #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1])) g_loss = gen_loss d_loss = disc_loss loss['xh'] = xh loss['l_G'] = g_loss loss['l_D'] = d_loss loss['l_E'] = D_KL + logPx loss['G'] = D_KL + logPx + 50. * d_loss return loss
def loss(self, x, y, is_training=True): batch_size = self.arch['training']['batch_size'] alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1], minval=0., maxval=1.) with tf.name_scope('loss'): with tf.variable_scope( "encoder") as scope: #specify variable_scope z_mu, z_lv = self.encoder( x, is_training) #so that to collect trainable z = GaussianSampleLayer(z_mu, z_lv) # variables D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), #slim.flatten(tf.zeros_like(z_mu)), #slim.flatten(tf.zeros_like(z_lv)), slim.flatten(tf.random_normal(tf.shape(z_mu))), slim.flatten(tf.random_normal(tf.shape(z_lv))), )) with tf.variable_scope("generator") as scope: xh = self.generator(z, y, is_training) print("xh shape:", xh.get_shape().as_list()) xh = self.nchw_to_nhwc(xh) print("xh shape:", xh.get_shape().as_list()) with tf.variable_scope("discriminator") as scope: disc_real, x_through_d = self.discriminator(x, is_training) print("disc_real shape:", disc_real.get_shape().as_list()) print("x_through_d:", x_through_d.get_shape().as_list()) disc_fake, xh_through_d = self.discriminator(xh, is_training, reuse=True) logPx = -tf.reduce_mean( GaussianLogDensity( x_through_d, xh_through_d, tf.zeros_like(xh_through_d), )) print("before gradient x shape:", x.get_shape().as_list()) differences = xh - x differences = self.nhwc_to_nchw(differences) print("differences shape:", differences.get_shape().as_list()) interpolates = self.nhwc_to_nchw(x) + (alpha * differences) print("interpolates shape:", interpolates.get_shape().as_list()) interpolates = tf.transpose(interpolates, [0, 2, 1, 3]) pred, inter_h = self.discriminator(interpolates, is_training, reuse=True) print("pred shape:", pred.get_shape().as_list()) gradients = tf.gradients(pred, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) gradient_penalty = self.arch['LAMBDA'] * gradient_penalty tf.summary.histogram("gradient_penalty", gradient_penalty) loss = dict() loss['D_KL'] = D_KL loss['logP'] = logPx #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1])) #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0)) #disc_real_loss = -tf.reduce_mean(disc_real) #disc_fake_loss = -tf.reduce_mean(disc_fake_loss) gen_loss = tf.reduce_mean(-disc_fake) disc_loss = tf.reduce_mean(disc_real) - tf.reduce_mean(disc_fake) #gradient penalty #d_loss = disc_real_loss + disc_fake_loss #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1])) g_loss = gen_loss d_loss = disc_loss + gradient_penalty tf.summary.histogram("D_KL", D_KL) tf.summary.histogram("logpx", logPx) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) tf.summary.histogram('gen_loss', gen_loss) tf.summary.histogram('disc_loss', disc_loss) tf.summary.histogram("gradient penalty", gradient_penalty) tf.summary.histogram("Discriminator_loss", d_loss) tf.summary.histogram("Generator_loss", g_loss) loss['l_G'] = g_loss + logPx loss['l_D'] = d_loss loss['l_E'] = D_KL + logPx loss['G'] = (D_KL + logPx) + 50. * g_loss + loss['l_D'] return loss
def loss(self, data): x_sp = data['sp'] x_mcc = data['mcc'] y = data['speaker'] label = tf.one_hot(tf.reduce_mean(y, axis=1, keep_dims=True), self.arch['y_dim']) # normalize input using mean/var x_sp_in_minmax = self.normalizers['sp']['minmax'].forward_process(x_sp) x_sp_in = tf.expand_dims(x_sp_in_minmax, 1) # insert channel dimension x_mcc_in_minmax = self.normalizers['mcc']['minmax'].forward_process(x_mcc) x_mcc_in = tf.expand_dims(x_mcc_in_minmax, 1) # insert channel dimension # forward pass # Use sp as source sp_z_mu, sp_z_lv = self.sp_enc(x_sp_in) z_sp = GaussianSampleLayer(sp_z_mu, sp_z_lv) x_sp_sp = self.sp_dec(z_sp, y) x_sp_mcc = self.mcc_dec(z_sp, y) cls_sp_logit = self.latent_cls(sp_z_mu) z_sp_pred = tf.nn.softmax(cls_sp_logit) sp_corr_pred = tf.equal(tf.argmax(z_sp_pred, 1), tf.reduce_mean(y, axis=1)) # Use mcc as source mcc_z_mu, mcc_z_lv = self.mcc_enc(x_mcc_in) z_mcc = GaussianSampleLayer(mcc_z_mu, mcc_z_lv) x_mcc_sp = self.sp_dec(z_mcc, y) x_mcc_mcc = self.mcc_dec(z_mcc, y) x_mcc_mcc_NCHW = tf.expand_dims(x_mcc_mcc, axis=1) real_mcc_logit = self.mcc_dis(x_mcc_in) fake_mcc_logit = self.mcc_dis(x_mcc_mcc_NCHW) cls_mcc_logit = self.latent_cls(mcc_z_mu) z_mcc_pred = tf.nn.softmax(cls_mcc_logit) mcc_corr_pred = tf.equal(tf.argmax(z_mcc_pred, 1), tf.reduce_mean(y, axis=1)) # loss kl_loss_sp = kl_loss(sp_z_mu, sp_z_lv) recon_loss_sp = log_loss(x_sp_in, x_sp_sp) cross_loss_sp2mcc = log_loss(x_mcc_in, x_sp_mcc) kl_loss_mcc = kl_loss(mcc_z_mu, mcc_z_lv) recon_loss_mcc = log_loss(x_mcc_in, x_mcc_mcc_NCHW) cross_loss_mcc2sp = log_loss(x_sp_in, x_mcc_sp) latent_loss = tf.reduce_mean(tf.abs(sp_z_mu - mcc_z_mu)) gradient_penalty_mcc = gradient_penalty_loss(x_mcc_in, x_mcc_mcc, self.mcc_dis) cls_loss_sp = \ tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=tf.stop_gradient(label), logits=cls_sp_logit)) cls_loss_mcc = \ tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=tf.stop_gradient(label), logits=cls_mcc_logit)) acc = 0.5 * (tf.reduce_mean(tf.cast(sp_corr_pred, tf.float32)) + tf.reduce_mean(tf.cast(mcc_corr_pred, tf.float32))) loss = dict() loss['D_KL_sp'] = kl_loss_sp loss['D_KL_mcc'] = kl_loss_mcc loss['recon_sp'] = recon_loss_sp loss['recon_mcc'] = recon_loss_mcc loss['cross_sp2mcc'] = cross_loss_sp2mcc loss['cross_mcc2sp'] = cross_loss_mcc2sp loss['latent'] = latent_loss loss['wgan_mcc'] = tf.reduce_mean(fake_mcc_logit) - tf.reduce_mean(real_mcc_logit) loss['wgan_gp_mcc'] = gradient_penalty_mcc loss['cls_loss_sp'] = cls_loss_sp loss['cls_loss_mcc'] = cls_loss_mcc with tf.name_scope('Summary'): tf.summary.scalar('KL-div-sp', kl_loss_sp) tf.summary.scalar('KL-div-mcc', kl_loss_mcc) tf.summary.scalar('reconstruction-sp', recon_loss_sp) tf.summary.scalar('reconstruction-mcc', recon_loss_mcc) tf.summary.scalar('cross-sp2mcc', cross_loss_sp2mcc) tf.summary.scalar('cross-mcc2sp', cross_loss_mcc2sp) tf.summary.scalar('latent', latent_loss) tf.summary.scalar('wgan-mcc', loss['wgan_mcc']) tf.summary.scalar('wgan-gp-mcc', gradient_penalty_mcc) tf.summary.scalar('cls-sp', cls_loss_sp) tf.summary.scalar('cls-mcc', cls_loss_mcc) tf.summary.scalar('cls-accuracy', acc) return loss