Exemplo n.º 1
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            z_mu, z_lv = self._encode(x)
            z = GaussianSampleLayer(z_mu, z_lv)
            xh = self._generate(z, y)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))
            logPx = tf.reduce_mean(
                GaussianLogDensity(slim.flatten(x), slim.flatten(xh),
                                   tf.zeros_like(slim.flatten(xh))), )

        loss = dict()
        loss['G'] = -logPx + D_KL
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        tf.summary.scalar('KL-div', D_KL)
        tf.summary.scalar('logPx', logPx)

        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        return loss
Exemplo n.º 2
0
    def loss(self, x, y):
        '''
        Args:
            x: shape=[s, b, c]
            y: shape=[s, b]
        Returns:
            a `dict` of losses
        '''
        z_mu, z_lv = self._encode(x, is_training=self.is_training)
        z = GaussianSampleLayer(z_mu, z_lv)
        xh = self._decode(z, y, is_training=self.is_training)

        with tf.name_scope('loss'):
            with tf.name_scope('E_log_p_x_zy'):
                L_x = -1.0 * tf.reduce_mean(
                    GaussianLogDensity(x, xh, tf.zeros_like(x)), )
            with tf.name_scope('D_KL_z'):
                L_z = tf.reduce_mean(
                    GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu),
                                tf.zeros_like(z_lv)))
            loss = {
                'L_x': L_x,
                'L_z': L_z,
            }

        tf.summary.scalar('L_x', L_x)
        tf.summary.scalar('L_z', L_z)
        return loss
Exemplo n.º 3
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            z_mu, z_lv = self._encode(x)
            z = GaussianSampleLayer(z_mu, z_lv)
            xh = self._generate(z, y)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))

            logPx = tf.reduce_mean(
                GaussianLogDensity(slim.flatten(x), slim.flatten(xh),
                                   tf.zeros_like(slim.flatten(xh))), )

            dx = self._discriminate(x)
            dxh = self._discriminate(xh)
            W_dist = tf.reduce_mean(dx - dxh)
            g_loss = tf.reduce_mean(-dxh)

            batch_size = self.arch['training']['batch_size']
            lam = self.arch['training']['lambda']

            alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.)
            alpha = alpha_dist.sample((batch_size, 1, 1, 1))
            interpolated = x + alpha * (xh - x)
            inte_logit = self._discriminate(interpolated)
            gradients = tf.gradients(inte_logit, [
                interpolated,
            ])[0]
            grad_l2 = tf.sqrt(
                tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
            gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2)
            gp = lam * gradient_penalty

        loss = dict()
        alpha = self.arch['training']['alpha']
        loss['l_E'] = -logPx + D_KL
        loss['D_KL'] = D_KL
        loss['logP'] = logPx
        loss['l_D'] = -W_dist + gp
        loss['l_G'] = -logPx + alpha * g_loss
        loss['W_dist'] = W_dist
        loss['gp'] = gp

        tf.summary.scalar('KL-div', D_KL)
        tf.summary.scalar('logPx', logPx)
        tf.summary.scalar('W_dist', W_dist)
        tf.summary.scalar("gp_loss", gradient_penalty)

        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        return loss
Exemplo n.º 4
0
    def loss(self, x, y, t):
        # t is the sentence embeddings (actual values)
        with tf.name_scope('loss'):
            t_enc = self._text_encode(x)
            xh = self._generate(t_enc, y)

            tx_loss = tf.reduce_mean(tf.nn.l2_loss(t_enc - t))

            logPx = tf.reduce_mean(
                GaussianLogDensity(slim.flatten(x), slim.flatten(xh),
                                   tf.zeros_like(slim.flatten(xh))), )

            dx = self._discriminate(x)
            dxh = self._discriminate(xh)
            W_dist = tf.reduce_mean(dx - dxh)
            g_loss = tf.reduce_mean(-dxh)

            batch_size = self.arch['training']['batch_size']
            lam = self.arch['training']['lambda']

            alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.)
            alpha = alpha_dist.sample((batch_size, 1, 1, 1))
            interpolated = x + alpha * (xh - x)
            inte_logit = self._discriminate(interpolated)
            gradients = tf.gradients(inte_logit, [
                interpolated,
            ])[0]
            grad_l2 = tf.sqrt(
                tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
            gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2)
            gp = lam * gradient_penalty

        loss = dict()
        alpha = self.arch['training']['alpha']
        loss['l_T'] = tx_loss - logPx
        loss['logP'] = logPx
        loss['l_D'] = -W_dist + gp
        loss['l_G'] = -logPx + alpha * g_loss
        loss['W_dist'] = W_dist
        loss['gp'] = gp
        loss['tx_loss'] = tx_loss

        tf.summary.scalar('logPx', logPx)
        tf.summary.scalar('W_dist', W_dist)
        tf.summary.scalar("gp_loss", gradient_penalty)
        tf.summary.scalar('text_loss', tx_loss)
        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        return loss
    def loss(self, x_s, y_s, x_t, y_t):
        def circuit_loop(x, y):

            z_mu, z_lv = self._encode(x, is_training=self.is_training)
            z = GaussianSampleLayer(z_mu, z_lv)

            x_logit, x_feature = self._discriminate(
                x, is_training=self.is_training)

            xh, xh_sig_logit = self._generate(z,
                                              y,
                                              is_training=self.is_training)

            zh_mu, zh_lv = self._encode(xh, is_training=self.is_training)

            xh_logit, xh_feature = self._discriminate(
                xh, is_training=self.is_training)

            return dict(
                z=z,
                z_mu=z_mu,
                z_lv=z_lv,
                xh=xh,
                xh_sig_logit=xh_sig_logit,
                x_logit=x_logit,
                x_feature=x_feature,
                zh_mu=zh_mu,
                zh_lv=zh_lv,
                xh_logit=xh_logit,
                xh_feature=xh_feature,
            )

        s = circuit_loop(x_s, y_s)
        t = circuit_loop(x_t, y_t)
        s2t = circuit_loop(x_s, y_t)

        with tf.name_scope('loss'):

            def mean_sigmoid_cross_entropy_with_logits(logit, truth):
                '''
                truth: 0. or 1.
                '''
                return tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logit, truth * tf.ones_like(logit)))

            loss = dict()

            # Parallel
            loss['reconst_t'] = \
                  tf.reduce_mean(t['x_logit']) \
                - tf.reduce_mean(t['xh_logit'])

            # Parallel
            loss['reconst_s'] = \
                  tf.reduce_mean(s['x_logit']) \
                - tf.reduce_mean(s['xh_logit'])

            # Non-parallel
            loss['conv_s2t'] = \
                  tf.reduce_mean(t['x_logit']) \
                - tf.reduce_mean(s2t['xh_logit'])

            # Non-parallel: s v. t
            loss['real_s_t'] = \
                  tf.reduce_mean(t['x_logit']) \
                - tf.reduce_mean(s['x_logit'])

            # That's why I only take the last term into consideration
            loss['WGAN'] = loss['conv_s2t']

            # VAE's Kullback-Leibler Divergence
            loss['KL(z)'] = \
                tf.reduce_mean(
                    GaussianKLD(
                        s['z_mu'], s['z_lv'],
                        tf.zeros_like(s['z_mu']), tf.zeros_like(s['z_lv']))) +\
                tf.reduce_mean(
                    GaussianKLD(
                        t['z_mu'], t['z_lv'],
                        tf.zeros_like(t['z_mu']), tf.zeros_like(t['z_lv'])))
            loss['KL(z)'] /= 2.0

            # VAE's Reconstruction Neg. Log-Likelihood (on the 'feature' space of Dx)
            loss['Dis'] = \
                tf.reduce_mean(
                    GaussianLogDensity(
                        slim.flatten(x_t),
                        slim.flatten(t['xh']),
                        tf.zeros_like(slim.flatten(x_t)))) +\
                tf.reduce_mean(
                    GaussianLogDensity(
                        slim.flatten(x_s),
                        slim.flatten(s['xh']),
                        tf.zeros_like(slim.flatten(x_s))))
            loss['Dis'] /= -2.0

            # For summaries
            with tf.name_scope('Summary'):
                tf.summary.scalar('DKL_z', loss['KL(z)'])
                tf.summary.scalar('MMSE', loss['Dis'])

                tf.summary.scalar('WGAN', loss['WGAN'])
                tf.summary.scalar('WGAN-s', loss['reconst_s'])
                tf.summary.scalar('WGAN-t', loss['reconst_t'])
                tf.summary.scalar('WGAN-s2t', loss['conv_s2t'])
                tf.summary.scalar('WGAN-t-s', loss['real_s_t'])

                tf.summary.histogram('y', tf.concat([y_t, y_s], 0))
                tf.summary.histogram('z', tf.concat([s['z'], t['z']], 0))

                tf.summary.histogram('z_s', s['z'])
                tf.summary.histogram('z_t', t['z'])

                tf.summary.histogram('z_mu',
                                     tf.concat([s['z_mu'], t['z_mu']], 0))
                tf.summary.histogram('z_mu_s', s['z_mu'])
                tf.summary.histogram('z_mu_t', t['z_mu'])

                tf.summary.histogram('z_lv',
                                     tf.concat([s['z_lv'], t['z_lv']], 0))
                tf.summary.histogram('z_lv_s', s['z_lv'])
                tf.summary.histogram('z_lv_t', t['z_lv'])

                tf.summary.histogram('logit_t_from_t', t['xh_logit'])
                tf.summary.histogram('logit_t_from_s', s2t['xh_logit'])
                tf.summary.histogram('logit_t', t['x_logit'])

                tf.summary.histogram(
                    'logit_t_True_FromT_FromS',
                    tf.concat([t['x_logit'], t['xh_logit'], s2t['xh_logit']],
                              0))
                tf.summary.histogram(
                    'logit_s_v_sh', tf.concat([s['x_logit'], s['xh_logit']],
                                              0))
                tf.summary.histogram(
                    'logit_t_v_th', tf.concat([t['x_logit'], t['xh_logit']],
                                              0))
        return loss
Exemplo n.º 6
0
    def loss(self, x, y):
        with tf.name_scope('loss'):
            with tf.variable_scope(
                    "encoder") as scope:  #specify variable_scope
                z_mu, z_lv = self.encoder(
                    x, self.is_training)  #so that to collect trainable
                z = GaussianSampleLayer(z_mu, z_lv)  # variables
            with tf.variable_scope("generator") as scope:
                xh = self.generator(z, y, self.is_training)
                print("xh shape:", xh.get_shape().as_list())
                #xh = self.nchw_to_nhwc(xh)
                print("xh shape:", xh.get_shape().as_list())
            with tf.variable_scope("discriminator") as scope:
                #x = nchw_to_nhwc(x)

                disc_real, x_through_d = self.discriminator(
                    x, self.is_training)
                print("disc_real shape:", disc_real.get_shape().as_list())
                print("x_through_d:", x_through_d.get_shape().as_list())
                disc_fake, xh_through_d = self.discriminator(
                    xh, self.is_training)

            D_KL = tf.reduce_mean(
                GaussianKLD(
                    slim.flatten(z_mu),
                    slim.flatten(z_lv),
                    slim.flatten(tf.zeros_like(z_mu)),
                    slim.flatten(tf.zeros_like(z_lv)),
                ))
            logPx = -tf.reduce_mean(
                GaussianLogDensity(x_through_d, xh_through_d,
                                   tf.zeros_like(xh_through_d)), )

        loss = dict()
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        batch_size = self.arch['training']['batch_size']

        #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1]))
        #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0))
        gen_loss = -tf.reduce_mean(disc_fake)
        disc_loss = tf.reduce_mean(disc_fake - disc_real)

        alpha = tf.random_uniform(shape=[batch_size, 513, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        self.reuse = False
        #gradient penalty
        print("before gradient x shape:", x.get_shape().as_list())
        differences = xh - x
        interpolates = x + (alpha * differences)
        print("interpolates shape:", interpolates.get_shape().as_list())
        pred, inter_h = self.discriminator(interpolates, self.is_training)
        print("pred shape:", pred.get_shape().as_list())
        gradients = tf.gradients(pred, [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
        gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
        disc_loss += self.arch['LAMBDA'] * gradient_penalty
        self.reuse = True
        #d_loss = disc_real_loss + disc_fake_loss
        #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1]))
        g_loss = gen_loss
        d_loss = disc_loss
        loss['xh'] = xh
        loss['l_G'] = g_loss
        loss['l_D'] = d_loss
        loss['l_E'] = D_KL + logPx
        loss['G'] = D_KL + logPx + 50. * d_loss
        return loss
Exemplo n.º 7
0
    def loss(self, x, y, is_training=True):

        batch_size = self.arch['training']['batch_size']
        alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1],
                                  minval=0.,
                                  maxval=1.)
        with tf.name_scope('loss'):
            with tf.variable_scope(
                    "encoder") as scope:  #specify variable_scope
                z_mu, z_lv = self.encoder(
                    x, is_training)  #so that to collect trainable
                z = GaussianSampleLayer(z_mu, z_lv)  # variables
                D_KL = tf.reduce_mean(
                    GaussianKLD(
                        slim.flatten(z_mu),
                        slim.flatten(z_lv),
                        #slim.flatten(tf.zeros_like(z_mu)),
                        #slim.flatten(tf.zeros_like(z_lv)),
                        slim.flatten(tf.random_normal(tf.shape(z_mu))),
                        slim.flatten(tf.random_normal(tf.shape(z_lv))),
                    ))
            with tf.variable_scope("generator") as scope:
                xh = self.generator(z, y, is_training)
                print("xh shape:", xh.get_shape().as_list())
                xh = self.nchw_to_nhwc(xh)
                print("xh shape:", xh.get_shape().as_list())
            with tf.variable_scope("discriminator") as scope:
                disc_real, x_through_d = self.discriminator(x, is_training)
                print("disc_real shape:", disc_real.get_shape().as_list())
                print("x_through_d:", x_through_d.get_shape().as_list())
                disc_fake, xh_through_d = self.discriminator(xh,
                                                             is_training,
                                                             reuse=True)
                logPx = -tf.reduce_mean(
                    GaussianLogDensity(
                        x_through_d,
                        xh_through_d,
                        tf.zeros_like(xh_through_d),
                    ))
                print("before gradient x shape:", x.get_shape().as_list())
                differences = xh - x
                differences = self.nhwc_to_nchw(differences)
                print("differences shape:", differences.get_shape().as_list())
                interpolates = self.nhwc_to_nchw(x) + (alpha * differences)
                print("interpolates shape:",
                      interpolates.get_shape().as_list())
                interpolates = tf.transpose(interpolates, [0, 2, 1, 3])
                pred, inter_h = self.discriminator(interpolates,
                                                   is_training,
                                                   reuse=True)
                print("pred shape:", pred.get_shape().as_list())
                gradients = tf.gradients(pred, [interpolates])[0]
                slopes = tf.sqrt(
                    tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
                gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
                gradient_penalty = self.arch['LAMBDA'] * gradient_penalty
                tf.summary.histogram("gradient_penalty", gradient_penalty)

        loss = dict()
        loss['D_KL'] = D_KL
        loss['logP'] = logPx

        #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1]))
        #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0))
        #disc_real_loss = -tf.reduce_mean(disc_real)
        #disc_fake_loss = -tf.reduce_mean(disc_fake_loss)
        gen_loss = tf.reduce_mean(-disc_fake)
        disc_loss = tf.reduce_mean(disc_real) - tf.reduce_mean(disc_fake)

        #gradient penalty

        #d_loss = disc_real_loss + disc_fake_loss
        #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1]))
        g_loss = gen_loss
        d_loss = disc_loss + gradient_penalty
        tf.summary.histogram("D_KL", D_KL)
        tf.summary.histogram("logpx", logPx)
        tf.summary.histogram('xh', xh)
        tf.summary.histogram('x', x)
        tf.summary.histogram('gen_loss', gen_loss)
        tf.summary.histogram('disc_loss', disc_loss)
        tf.summary.histogram("gradient penalty", gradient_penalty)
        tf.summary.histogram("Discriminator_loss", d_loss)
        tf.summary.histogram("Generator_loss", g_loss)

        loss['l_G'] = g_loss + logPx
        loss['l_D'] = d_loss
        loss['l_E'] = D_KL + logPx
        loss['G'] = (D_KL + logPx) + 50. * g_loss + loss['l_D']
        return loss