Exemplo n.º 1
0
    def loss(self, data):

        x_sp = data['sp']
        x_mcc = data['mcc']
        y = data['speaker']

        # normalize input using mean/var
        x_sp_in_minmax = self.normalizers['sp']['minmax'].forward_process(x_sp)
        x_sp_in = tf.expand_dims(x_sp_in_minmax, 1)  # insert channel dimension
        x_mcc_in_minmax = self.normalizers['mcc']['minmax'].forward_process(
            x_mcc)
        x_mcc_in = tf.expand_dims(x_mcc_in_minmax,
                                  1)  # insert channel dimension

        # forward pass
        # Use sp as source
        sp_z_mu, sp_z_lv = self.sp_enc(x_sp_in)
        z_sp = GaussianSampleLayer(sp_z_mu, sp_z_lv)
        x_sp_sp = self.sp_dec(z_sp, y)
        x_sp_mcc = self.mcc_dec(z_sp, y)

        # Use mcc as source
        mcc_z_mu, mcc_z_lv = self.mcc_enc(x_mcc_in)
        z_mcc = GaussianSampleLayer(mcc_z_mu, mcc_z_lv)
        x_mcc_sp = self.sp_dec(z_mcc, y)
        x_mcc_mcc = self.mcc_dec(z_mcc, y)

        # loss
        kl_loss_sp = kl_loss(sp_z_mu, sp_z_lv)
        recon_loss_sp = log_loss(x_sp_in, x_sp_sp)
        cross_loss_sp2mcc = log_loss(x_mcc_in, x_sp_mcc)
        kl_loss_mcc = kl_loss(mcc_z_mu, mcc_z_lv)
        recon_loss_mcc = log_loss(x_mcc_in, x_mcc_mcc)
        cross_loss_mcc2sp = log_loss(x_sp_in, x_mcc_sp)
        latent_loss = tf.reduce_mean(tf.abs(sp_z_mu - mcc_z_mu))

        loss = dict()
        loss['D_KL'] = (kl_loss_sp + kl_loss_mcc)
        loss['recon'] = (recon_loss_sp + recon_loss_mcc)
        loss['cross'] = (cross_loss_sp2mcc + cross_loss_mcc2sp)
        loss['latent'] = latent_loss

        loss['all'] = -loss['recon'] - loss['cross'] + loss['D_KL'] + loss[
            'latent']

        # summary
        tf.summary.scalar('KL-div-sp', kl_loss_sp)
        tf.summary.scalar('KL-div-mcc', kl_loss_mcc)
        tf.summary.scalar('reconstruction-sp', recon_loss_sp)
        tf.summary.scalar('reconstruction-mcc', recon_loss_mcc)
        tf.summary.scalar('cross-sp2mcc', cross_loss_sp2mcc)
        tf.summary.scalar('cross-mcc2sp', cross_loss_mcc2sp)
        tf.summary.scalar('latent', latent_loss)
        return loss
        def circuit_loop(x, y):

            z_mu, z_lv = self._encode(x, is_training=self.is_training)
            z = GaussianSampleLayer(z_mu, z_lv)

            x_logit, x_feature = self._discriminate(
                x, is_training=self.is_training)

            xh, xh_sig_logit = self._generate(z,
                                              y,
                                              is_training=self.is_training)

            zh_mu, zh_lv = self._encode(xh, is_training=self.is_training)

            xh_logit, xh_feature = self._discriminate(
                xh, is_training=self.is_training)

            return dict(
                z=z,
                z_mu=z_mu,
                z_lv=z_lv,
                xh=xh,
                xh_sig_logit=xh_sig_logit,
                x_logit=x_logit,
                x_feature=x_feature,
                zh_mu=zh_mu,
                zh_lv=zh_lv,
                xh_logit=xh_logit,
                xh_feature=xh_feature,
            )
Exemplo n.º 3
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            z_mu, z_lv = self._encode(x)
            z = GaussianSampleLayer(z_mu, z_lv)
            xh = self._generate(z, y)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))
            logPx = tf.reduce_mean(
                GaussianLogDensity(slim.flatten(x), slim.flatten(xh),
                                   tf.zeros_like(slim.flatten(xh))), )

        loss = dict()
        loss['G'] = -logPx + D_KL
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        tf.summary.scalar('KL-div', D_KL)
        tf.summary.scalar('logPx', logPx)

        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        return loss
Exemplo n.º 4
0
    def loss(self, x, y):
        '''
        Args:
            x: shape=[s, b, c]
            y: shape=[s, b]
        Returns:
            a `dict` of losses
        '''
        z_mu, z_lv = self._encode(x, is_training=self.is_training)
        z = GaussianSampleLayer(z_mu, z_lv)
        xh = self._decode(z, y, is_training=self.is_training)

        with tf.name_scope('loss'):
            with tf.name_scope('E_log_p_x_zy'):
                L_x = -1.0 * tf.reduce_mean(
                    GaussianLogDensity(x, xh, tf.zeros_like(x)), )
            with tf.name_scope('D_KL_z'):
                L_z = tf.reduce_mean(
                    GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu),
                                tf.zeros_like(z_lv)))
            loss = {
                'L_x': L_x,
                'L_z': L_z,
            }

        tf.summary.scalar('L_x', L_x)
        tf.summary.scalar('L_z', L_z)
        return loss
Exemplo n.º 5
0
    def loss(self, data):

        x = data[self.feat_type]
        y = data['speaker']

        # normalize input using mean/var
        x_in_minmax = self.normalizers[
            self.feat_type]['minmax'].forward_process(x)
        x_in = tf.expand_dims(x_in_minmax, 1)  # insert channel dimension

        # forward pass
        z_mu, z_lv = self.enc(x_in)
        z = GaussianSampleLayer(z_mu, z_lv)
        xb = self.dec(z, y)

        # loss
        KL_loss = kl_loss(z_mu, z_lv)
        recon_loss = log_loss(x_in_minmax, xb)

        loss = dict()
        loss['D_KL'] = KL_loss
        loss['recon'] = recon_loss

        loss['all'] = -loss['recon'] + loss['D_KL']

        # summary
        tf.summary.scalar('KL-div-sp', KL_loss)
        tf.summary.scalar('reconstruction-sp', recon_loss)
        return loss
Exemplo n.º 6
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            z_mu, z_lv = self._encode(x)
            z = GaussianSampleLayer(z_mu, z_lv)
            xh = self._generate(z, y)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))

            logPx = tf.reduce_mean(
                GaussianLogDensity(slim.flatten(x), slim.flatten(xh),
                                   tf.zeros_like(slim.flatten(xh))), )

            dx = self._discriminate(x)
            dxh = self._discriminate(xh)
            W_dist = tf.reduce_mean(dx - dxh)
            g_loss = tf.reduce_mean(-dxh)

            batch_size = self.arch['training']['batch_size']
            lam = self.arch['training']['lambda']

            alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.)
            alpha = alpha_dist.sample((batch_size, 1, 1, 1))
            interpolated = x + alpha * (xh - x)
            inte_logit = self._discriminate(interpolated)
            gradients = tf.gradients(inte_logit, [
                interpolated,
            ])[0]
            grad_l2 = tf.sqrt(
                tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
            gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2)
            gp = lam * gradient_penalty

        loss = dict()
        alpha = self.arch['training']['alpha']
        loss['l_E'] = -logPx + D_KL
        loss['D_KL'] = D_KL
        loss['logP'] = logPx
        loss['l_D'] = -W_dist + gp
        loss['l_G'] = -logPx + alpha * g_loss
        loss['W_dist'] = W_dist
        loss['gp'] = gp

        tf.summary.scalar('KL-div', D_KL)
        tf.summary.scalar('logPx', logPx)
        tf.summary.scalar('W_dist', W_dist)
        tf.summary.scalar("gp_loss", gradient_penalty)

        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        return loss
Exemplo n.º 7
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            with tf.variable_scope(
                    "encoder") as scope:  #specify variable_scope
                z_mu, z_lv = self.encoder(
                    x, self.is_training)  #so that to collect trainable
                z = GaussianSampleLayer(z_mu, z_lv)  # variables
            with tf.variable_scope("generator") as scope:
                xh = self.generator(z, y, self.is_training)
                print("xh shape:", xh.get_shape().as_list())
                #xh = self.nchw_to_nhwc(xh)
                print("xh shape:", xh.get_shape().as_list())
            with tf.variable_scope("discriminator") as scope:
                #x = nchw_to_nhwc(x)

                disc_real, x_through_d = self.discriminator(
                    x, self.is_training)
                print("disc_real shape:", disc_real.get_shape().as_list())
                print("x_through_d:", x_through_d.get_shape().as_list())
                disc_fake, xh_through_d = self.discriminator(
                    xh, self.is_training)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))
            logPx = -tf.reduce_mean(
                GaussianLogDensity(x_through_d, xh_through_d,
                                   tf.zeros_like(xh_through_d)), )

        loss = dict()
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        batch_size = self.arch['training']['batch_size']

        #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1]))
        #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0))
        gen_loss = -tf.reduce_mean(disc_fake)
        disc_loss = tf.reduce_mean(disc_fake - disc_real)

        alpha = tf.random_uniform(shape=[batch_size, 513, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        self.reuse = False
        #gradient penalty
        print("before gradient x shape:", x.get_shape().as_list())
        differences = xh - x
        interpolates = x + (alpha * differences)
        print("interpolates shape:", interpolates.get_shape().as_list())
        pred, inter_h = self.discriminator(interpolates, self.is_training)
        print("pred shape:", pred.get_shape().as_list())
        gradients = tf.gradients(pred, [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
        gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
        disc_loss += self.arch['LAMBDA'] * gradient_penalty
        self.reuse = True
        #d_loss = disc_real_loss + disc_fake_loss
        #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1]))
        g_loss = gen_loss
        d_loss = disc_loss
        loss['xh'] = xh
        loss['l_G'] = g_loss
        loss['l_D'] = d_loss
        loss['l_E'] = D_KL + logPx
        loss['G'] = D_KL + logPx + 50. * d_loss
        return loss
Exemplo n.º 8
0
    def loss(self, x, y, is_training=True):

        batch_size = self.arch['training']['batch_size']
        alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        with tf.name_scope('loss'):
            with tf.variable_scope(
                    "encoder") as scope:  #specify variable_scope
                z_mu, z_lv = self.encoder(
                    x, is_training)  #so that to collect trainable
                z = GaussianSampleLayer(z_mu, z_lv)  # variables
                D_KL = tf.reduce_mean(
                    GaussianKLD(
                        slim.flatten(z_mu),
                        slim.flatten(z_lv),
                        #slim.flatten(tf.zeros_like(z_mu)),
                        #slim.flatten(tf.zeros_like(z_lv)),
                        slim.flatten(tf.random_normal(tf.shape(z_mu))),
                        slim.flatten(tf.random_normal(tf.shape(z_lv))),
                    ))
            with tf.variable_scope("generator") as scope:
                xh = self.generator(z, y, is_training)
                print("xh shape:", xh.get_shape().as_list())
                xh = self.nchw_to_nhwc(xh)
                print("xh shape:", xh.get_shape().as_list())
            with tf.variable_scope("discriminator") as scope:
                disc_real, x_through_d = self.discriminator(x, is_training)
                print("disc_real shape:", disc_real.get_shape().as_list())
                print("x_through_d:", x_through_d.get_shape().as_list())
                disc_fake, xh_through_d = self.discriminator(xh,
                                                             is_training,
                                                             reuse=True)
                logPx = -tf.reduce_mean(
                    GaussianLogDensity(
                        x_through_d,
                        xh_through_d,
                        tf.zeros_like(xh_through_d),
                    ))
                print("before gradient x shape:", x.get_shape().as_list())
                differences = xh - x
                differences = self.nhwc_to_nchw(differences)
                print("differences shape:", differences.get_shape().as_list())
                interpolates = self.nhwc_to_nchw(x) + (alpha * differences)
                print("interpolates shape:",
                      interpolates.get_shape().as_list())
                interpolates = tf.transpose(interpolates, [0, 2, 1, 3])
                pred, inter_h = self.discriminator(interpolates,
                                                   is_training,
                                                   reuse=True)
                print("pred shape:", pred.get_shape().as_list())
                gradients = tf.gradients(pred, [interpolates])[0]
                slopes = tf.sqrt(
                    tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
                gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
                gradient_penalty = self.arch['LAMBDA'] * gradient_penalty
                tf.summary.histogram("gradient_penalty", gradient_penalty)

        loss = dict()
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1]))
        #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0))
        #disc_real_loss = -tf.reduce_mean(disc_real)
        #disc_fake_loss = -tf.reduce_mean(disc_fake_loss)
        gen_loss = tf.reduce_mean(-disc_fake)
        disc_loss = tf.reduce_mean(disc_real) - tf.reduce_mean(disc_fake)

        #gradient penalty

        #d_loss = disc_real_loss + disc_fake_loss
        #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1]))
        g_loss = gen_loss
        d_loss = disc_loss + gradient_penalty
        tf.summary.histogram("D_KL", D_KL)
        tf.summary.histogram("logpx", logPx)
        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        tf.summary.histogram('gen_loss', gen_loss)
        tf.summary.histogram('disc_loss', disc_loss)
        tf.summary.histogram("gradient penalty", gradient_penalty)
        tf.summary.histogram("Discriminator_loss", d_loss)
        tf.summary.histogram("Generator_loss", g_loss)

        loss['l_G'] = g_loss + logPx
        loss['l_D'] = d_loss
        loss['l_E'] = D_KL + logPx
        loss['G'] = (D_KL + logPx) + 50. * g_loss + loss['l_D']
        return loss
Exemplo n.º 9
0
    def loss(self, data):

        x_sp = data['sp']
        x_mcc = data['mcc']
        y = data['speaker']
        label = tf.one_hot(tf.reduce_mean(y, axis=1, keep_dims=True), self.arch['y_dim'])
        
        # normalize input using mean/var
        x_sp_in_minmax = self.normalizers['sp']['minmax'].forward_process(x_sp)
        x_sp_in = tf.expand_dims(x_sp_in_minmax, 1) # insert channel dimension
        x_mcc_in_minmax = self.normalizers['mcc']['minmax'].forward_process(x_mcc)
        x_mcc_in = tf.expand_dims(x_mcc_in_minmax, 1) # insert channel dimension
       
        # forward pass
        # Use sp as source
        sp_z_mu, sp_z_lv = self.sp_enc(x_sp_in)
        z_sp = GaussianSampleLayer(sp_z_mu, sp_z_lv)
        x_sp_sp = self.sp_dec(z_sp, y)
        x_sp_mcc = self.mcc_dec(z_sp, y)

        cls_sp_logit = self.latent_cls(sp_z_mu)
        z_sp_pred = tf.nn.softmax(cls_sp_logit)
        sp_corr_pred = tf.equal(tf.argmax(z_sp_pred, 1), tf.reduce_mean(y, axis=1))

        # Use mcc as source
        mcc_z_mu, mcc_z_lv = self.mcc_enc(x_mcc_in)
        z_mcc = GaussianSampleLayer(mcc_z_mu, mcc_z_lv)
        x_mcc_sp = self.sp_dec(z_mcc, y)
        x_mcc_mcc = self.mcc_dec(z_mcc, y)
        
        x_mcc_mcc_NCHW = tf.expand_dims(x_mcc_mcc, axis=1)
        real_mcc_logit = self.mcc_dis(x_mcc_in)
        fake_mcc_logit = self.mcc_dis(x_mcc_mcc_NCHW)
        cls_mcc_logit = self.latent_cls(mcc_z_mu)
        z_mcc_pred = tf.nn.softmax(cls_mcc_logit)
        mcc_corr_pred = tf.equal(tf.argmax(z_mcc_pred, 1), tf.reduce_mean(y, axis=1))

        # loss
        kl_loss_sp = kl_loss(sp_z_mu, sp_z_lv)
        recon_loss_sp = log_loss(x_sp_in, x_sp_sp)
        cross_loss_sp2mcc = log_loss(x_mcc_in, x_sp_mcc)
        kl_loss_mcc = kl_loss(mcc_z_mu, mcc_z_lv)
        recon_loss_mcc = log_loss(x_mcc_in, x_mcc_mcc_NCHW)
        cross_loss_mcc2sp = log_loss(x_sp_in, x_mcc_sp)
        latent_loss = tf.reduce_mean(tf.abs(sp_z_mu - mcc_z_mu))

        gradient_penalty_mcc = gradient_penalty_loss(x_mcc_in, x_mcc_mcc, self.mcc_dis)
        cls_loss_sp = \
            tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.stop_gradient(label), logits=cls_sp_logit))
        cls_loss_mcc = \
            tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                labels=tf.stop_gradient(label), logits=cls_mcc_logit))
        
        acc = 0.5 * (tf.reduce_mean(tf.cast(sp_corr_pred, tf.float32)) + tf.reduce_mean(tf.cast(mcc_corr_pred, tf.float32)))

        loss = dict()
        loss['D_KL_sp'] = kl_loss_sp
        loss['D_KL_mcc'] = kl_loss_mcc
        loss['recon_sp'] = recon_loss_sp 
        loss['recon_mcc'] = recon_loss_mcc
        loss['cross_sp2mcc'] = cross_loss_sp2mcc 
        loss['cross_mcc2sp'] = cross_loss_mcc2sp
        loss['latent'] = latent_loss
        loss['wgan_mcc'] = tf.reduce_mean(fake_mcc_logit) - tf.reduce_mean(real_mcc_logit)
        loss['wgan_gp_mcc'] = gradient_penalty_mcc
        loss['cls_loss_sp'] = cls_loss_sp
        loss['cls_loss_mcc'] = cls_loss_mcc

        with tf.name_scope('Summary'):
            tf.summary.scalar('KL-div-sp', kl_loss_sp)
            tf.summary.scalar('KL-div-mcc', kl_loss_mcc)
            tf.summary.scalar('reconstruction-sp', recon_loss_sp)
            tf.summary.scalar('reconstruction-mcc', recon_loss_mcc)
            tf.summary.scalar('cross-sp2mcc', cross_loss_sp2mcc)
            tf.summary.scalar('cross-mcc2sp', cross_loss_mcc2sp)
            tf.summary.scalar('latent', latent_loss)
            tf.summary.scalar('wgan-mcc', loss['wgan_mcc'])
            tf.summary.scalar('wgan-gp-mcc', gradient_penalty_mcc)
            tf.summary.scalar('cls-sp', cls_loss_sp)
            tf.summary.scalar('cls-mcc', cls_loss_mcc)
            tf.summary.scalar('cls-accuracy', acc)
        
        return loss