Exemplo n.º 1
0
def truncate_fancy(dlat,
                   dlat_avg,
                   model_scale=18,
                   truncation_psi=0.7,
                   minlayer=0,
                   maxlayer=8,
                   do_clip=False):
    layer_idx = np.arange(model_scale)[np.newaxis, :, np.newaxis]
    ones = np.ones(layer_idx.shape, dtype=np.float32)
    coefs = np.where(layer_idx < maxlayer, truncation_psi * ones, ones)
    if minlayer > 0:
        coefs[0, :minlayer, :] = ones[0, :minlayer, :]
    if do_clip:
        return tflib.lerp_clip(dlat_avg, dlat, coefs).eval()
    else:
        return tflib.lerp(dlat_avg, dlat, coefs)
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
    rotation_offset = 108

    with tf.name_scope('DynamicRange'):
        x = tf.cast(x, tf.float32)
        x = misc.adjust_dynamic_range(x, drange_data, drange_net)
    if mirror_augment:
        with tf.name_scope('MirrorAugment'):
            random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5
            x = tf.where(random_vector, x, tf.reverse(x, [3]))
            rotation_cos = tf.expand_dims(labels[:, rotation_offset], axis=-1)
            rotation_sin = tf.expand_dims(labels[:, rotation_offset + 1],
                                          axis=-1)
            angle = tf.atan2(rotation_sin, rotation_cos)
            new_rotation_cos = tf.cos(angle)
            new_rotation_sin = tf.sin(angle) * -1
            mirrored_labels = tf.concat([
                labels[:, :rotation_offset], new_rotation_cos,
                new_rotation_sin, labels[:, rotation_offset + 2:]
            ],
                                        axis=1)
            labels = tf.where(random_vector, labels, mirrored_labels)
    with tf.name_scope(
            'FadeLOD'
    ):  # Smooth crossfade between consecutive levels-of-detail.
        s = tf.shape(x)
        y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2])
        y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
        y = tf.tile(y, [1, 1, 1, 2, 1, 2])
        y = tf.reshape(y, [-1, s[1], s[2], s[3]])
        x = tflib.lerp(x, y, lod - tf.floor(lod))
    with tf.name_scope(
            'UpscaleLOD'
    ):  # Upscale to match the expected input/output size of the networks.
        s = tf.shape(x)
        factor = tf.cast(2**tf.floor(lod), tf.int32)
        x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
        x = tf.tile(x, [1, 1, 1, factor, 1, factor])
        x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])

    # Multiply the rotation label by 2.0
    # labels = tf.concat([
    #    labels[:, :rotation_offset],
    #    labels[:, rotation_offset:rotation_offset + 2] * 2.0,
    #    labels[:, rotation_offset + 2:]
    # ], axis=-1)
    return x, labels
Exemplo n.º 3
0
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
    rotation_offset = 108
    with tf.name_scope('DynamicRange'):
        x = tf.cast(x, tf.float32)
        x = misc.adjust_dynamic_range(x, drange_data, drange_net)
    if mirror_augment:
        with tf.name_scope('MirrorAugment'):
            random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5
            x = tf.where(random_vector, x, tf.reverse(x, [3]))
            indices_first = tf.range(rotation_offset)
            swaps = tf.constant([0, 7, 6, 5, 4, 3, 2, 1]) + rotation_offset
            indices_last = tf.range(rotation_offset + 8, tf.shape(labels)[1])
            indices = tf.concat([indices_first, swaps, indices_last], axis=0)
            mirrored_labels = tf.gather(labels, indices, axis=1)
            labels = tf.where(random_vector, labels, mirrored_labels)
    with tf.name_scope(
            'FadeLOD'
    ):  # Smooth crossfade between consecutive levels-of-detail.
        s = tf.shape(x)
        y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2])
        y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
        y = tf.tile(y, [1, 1, 1, 2, 1, 2])
        y = tf.reshape(y, [-1, s[1], s[2], s[3]])
        x = tflib.lerp(x, y, lod - tf.floor(lod))
    with tf.name_scope(
            'UpscaleLOD'
    ):  # Upscale to match the expected input/output size of the networks.
        s = tf.shape(x)
        factor = tf.cast(2**tf.floor(lod), tf.int32)
        x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
        x = tf.tile(x, [1, 1, 1, factor, 1, factor])
        x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
    with tf.name_scope('BalanceLabels'):
        random_mask = tf.cast(tf.random_uniform([tf.shape(x)[0], 1]) < 0.5,
                              dtype=tf.float32)
        fl = 1
        fr = 7
        labels = tf.concat([
            labels[:, :rotation_offset + fl],
            labels[:, rotation_offset + fl:rotation_offset + fl + 1] *
            random_mask, labels[:,
                                rotation_offset + fl + 1:rotation_offset + fr],
            labels[:, rotation_offset + fr:rotation_offset + fr + 1] *
            random_mask, labels[:, rotation_offset + fr + 1:]
        ],
                           axis=-1)
    return x, labels
Exemplo n.º 4
0
def D_wgan_gp(
    G,
    D,
    opt,
    training_set,
    minibatch_size,
    reals,
    labels,  # pylint: disable=unused-argument
    wgan_lambda=10.0,  # Weight for the gradient penalty term.
    wgan_epsilon=0.001,  # Weight for the epsilon term, \epsilon_{drift}.
    wgan_target=1.0,
):  # Target value for gradient magnitudes.

    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True))
    fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True))
    real_scores_out = autosummary("Loss/scores/real", real_scores_out)
    fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out)
    loss = fake_scores_out - real_scores_out

    with tf.name_scope("GradientPenalty"):
        mixing_factors = tf.random_uniform(
            [minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype
        )
        mixed_images_out = tflib.lerp(
            tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors
        )
        mixed_scores_out = fp32(
            D.get_output_for(mixed_images_out, labels, is_training=True)
        )
        mixed_scores_out = autosummary("Loss/scores/mixed", mixed_scores_out)
        mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
        mixed_grads = opt.undo_loss_scaling(
            fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])
        )
        mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
        mixed_norms = autosummary("Loss/mixed_norms", mixed_norms)
        gradient_penalty = tf.square(mixed_norms - wgan_target)
    loss += gradient_penalty * (wgan_lambda / (wgan_target ** 2))

    with tf.name_scope("EpsilonPenalty"):
        epsilon_penalty = autosummary(
            "Loss/epsilon_penalty", tf.square(real_scores_out)
        )
    loss += epsilon_penalty * wgan_epsilon
    return loss
Exemplo n.º 5
0
def D_wgan_gp(
    G,
    D,
    opt,
    training_set,
    minibatch_size,
    reals,
    labels,
    wgan_lambda=10.0,
    wgan_epsilon=0.001,
    wgan_target=1.0,
):
    _ = opt, training_set
    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    real_scores_out = D.get_output_for(reals, labels, is_training=True)
    fake_scores_out = D.get_output_for(fake_images_out,
                                       labels,
                                       is_training=True)
    real_scores_out = autosummary("Loss/scores/real", real_scores_out)
    fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out)
    loss = fake_scores_out - real_scores_out
    with tf.name_scope("EpsilonPenalty"):
        epsilon_penalty = autosummary("Loss/epsilon_penalty",
                                      tf.square(real_scores_out))
    loss += epsilon_penalty * wgan_epsilon

    with tf.name_scope("GradientPenalty"):
        mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                           0.0,
                                           1.0,
                                           dtype=fake_images_out.dtype)
        mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype),
                                      fake_images_out, mixing_factors)
        mixed_scores_out = D.get_output_for(mixed_images_out,
                                            labels,
                                            is_training=True)
        mixed_scores_out = autosummary("Loss/scores/mixed", mixed_scores_out)
        mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out),
                                   [mixed_images_out])[0]
        mixed_norms = tf.sqrt(
            tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
        mixed_norms = autosummary("Loss/mixed_norms", mixed_norms)
        gradient_penalty = tf.square(mixed_norms - wgan_target)
        reg = gradient_penalty * (wgan_lambda / (wgan_target**2))
    return loss, reg
Exemplo n.º 6
0
 def grow(res, lod):
     x = lambda: fromrgb(downscale2d(images_in, 2 ** lod), res)
     if lod > 0:
         x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
     x = block(x(), res)
     y = lambda: x
     if res > 2:
         y = cset(
             y,
             (lod_in > lod),
             lambda: tflib.lerp(
                 x,
                 fromrgb(downscale2d(images_in, 2 ** (lod + 1)), res - 1),
                 lod_in - lod,
             ),
         )
     return y()
Exemplo n.º 7
0
 def grow(res, lod):
     x = lambda: fromrgb(downscale2d(images_in, 2**lod), res
                         )  # 先暂时将下采样函数赋给x
     if lod > 0:
         x = cset(
             x, (lod_in < lod), lambda: grow(res + 1, lod - 1)
         )  # 非第一层时,如果输入层数lod_in小于当前层lod的话,表明可以进入到下一级分辨率上了,将grow()赋给x;否则x还是保留为下采样函数。
     x = block(x(), res)
     y = lambda: x  # x执行一次自身的函数,构建出一个block,并将结果赋给y(以函数的形式)
     if res > 2:
         y = cset(
             y, (lod_in > lod), lambda: tflib.lerp(
                 x,
                 fromrgb(downscale2d(images_in, 2**
                                     (lod + 1)), res - 1), lod_in - lod)
         )  # 非最后一层时,如果输入层数lod_in大于当前层lod的话,表明需要进行插值操作,将lerp()赋给y;否则y还是保留为之前的操作。
     return y()
Exemplo n.º 8
0
def wgangp(G,
           D,
           aug,
           fake_labels,
           real_images,
           real_labels,
           wgan_epsilon=0.001,
           wgan_lambda=10,
           wgan_target=1,
           **_kwargs):
    minibatch_size = tf.shape(fake_labels)[0]
    fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    G_fake = eval_G(G, fake_latents, fake_labels)
    D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake')
    D_real = eval_D(D, aug, real_images, real_labels, report='real')

    # WGAN loss from "Wasserstein Generative Adversarial Networks".
    with tf.name_scope('Loss_main'):
        G_loss = -D_fake.scores  # pylint: disable=invalid-unary-operand-type
        D_loss = D_fake.scores - D_real.scores

    # Epsilon penalty from "Progressive Growing of GANs for Improved Quality, Stability, and Variation"
    with tf.name_scope('Loss_epsilon'):
        epsilon_penalty = report_stat(aug, 'Loss/epsilon_penalty',
                                      tf.square(D_real.scores))
        D_loss += epsilon_penalty * wgan_epsilon

    # Gradient penalty from "Improved Training of Wasserstein GANs".
    with tf.name_scope('Loss_GP'):
        mix_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                        0,
                                        1,
                                        dtype=G_fake.images.dtype)
        mix_images = tflib.lerp(tf.cast(real_images, G_fake.images.dtype),
                                G_fake.images, mix_factors)
        mix_labels = real_labels  # NOTE: Mixing is performed without respect to fake_labels.
        D_mix = eval_D(D, aug, mix_images, mix_labels, report='mix')
        mix_grads = tf.gradients(tf.reduce_sum(D_mix.scores), [mix_images])[0]
        mix_norms = tf.sqrt(tf.reduce_sum(tf.square(mix_grads), axis=[1, 2,
                                                                      3]))
        mix_norms = report_stat(aug, 'Loss/mix_norms', mix_norms)
        gradient_penalty = tf.square(mix_norms - wgan_target)
        D_reg = gradient_penalty * (wgan_lambda / (wgan_target**2))

    return report_loss(aug, G_loss, D_loss, None, D_reg)
Exemplo n.º 9
0
 def grow(res, lod):
     x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2 ** lod), res)
     if lod > 0:
         x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
     x = block(x(), res)
     y = lambda: x
     y = cset(
         y,
         (lod_in > lod),
         lambda: tflib.lerp(
             x,
             fromrgb(
                 naive_downsample_2d(images_in, factor=2 ** (lod + 1)), res - 1
             ),
             lod_in - lod,
         ),
     )
     return y()
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
    with tf.name_scope('DynamicRange'):
        x = tf.cast(x, tf.float32)
        x = misc.adjust_dynamic_range(x, drange_data, drange_net)
    if mirror_augment:
        with tf.name_scope('MirrorAugment'):
            random_vector = tf.random_uniform([tf.shape(x)[0]]) < 0.5
            x = tf.where(random_vector, x, tf.reverse(x, [3]))
            rotation_offset = 108
            indices_first = tf.range(rotation_offset)
            swaps = tf.constant([0, 7, 6, 5, 4, 3, 2, 1]) + rotation_offset
            indices_last = tf.range(rotation_offset + 8, tf.shape(labels)[1])
            indices = tf.concat([indices_first, swaps, indices_last], axis=0)
            mirrored_labels = tf.gather(labels, indices, axis=1)
            labels = tf.where(random_vector, labels, mirrored_labels)
    with tf.name_scope(
            'FadeLOD'
    ):  # Smooth crossfade between consecutive levels-of-detail.
        s = tf.shape(x)
        y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2])
        y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
        y = tf.tile(y, [1, 1, 1, 2, 1, 2])
        y = tf.reshape(y, [-1, s[1], s[2], s[3]])
        x = tflib.lerp(x, y, lod - tf.floor(lod))
    with tf.name_scope(
            'UpscaleLOD'
    ):  # Upscale to match the expected input/output size of the networks.
        s = tf.shape(x)
        factor = tf.cast(2**tf.floor(lod), tf.int32)
        x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
        x = tf.tile(x, [1, 1, 1, factor, 1, factor])
        x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
    with tf.name_scope('RandomizeLabels'):
        keep_probability = 0.80
        labels_bool = tf.cast(labels, tf.bool)
        mask = tf.random.uniform(tf.shape(labels), 0.0,
                                 1.0) > (1 - keep_probability)
        label_remove = tf.cast(tf.math.logical_and(labels_bool, mask),
                               dtype=tf.float32)
        # multiply_interval = (0.7, 1.3)
        # random_multiplier = tf.random.uniform(tf.shape(labels), multiply_interval[0], multiply_interval[1])
        # labels_multiply = label_remove * random_multiplier
        labels = tf.concat([labels[:, :1], label_remove[:, 1:]], axis=-1)
    return x, labels
Exemplo n.º 11
0
def D_hinge_gp(
    G,
    D,
    opt,
    training_set,
    minibatch_size,
    reals,
    labels,  # pylint: disable=unused-argument
    wgan_lambda=10.0,  # Weight for the gradient penalty term.
    wgan_target=1.0):  # Target value for gradient magnitudes.

    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True))
    fake_scores_out = fp32(
        D.get_output_for(fake_images_out, labels, is_training=True))
    real_scores_out = autosummary('Loss/scores/real', real_scores_out)
    fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
    loss = tf.maximum(0., 1. + fake_scores_out) + tf.maximum(
        0., 1. - real_scores_out)

    with tf.name_scope('GradientPenalty'):
        mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                           0.0,
                                           1.0,
                                           dtype=fake_images_out.dtype)
        mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype),
                                      fake_images_out, mixing_factors)
        mixed_scores_out = fp32(
            D.get_output_for(mixed_images_out, labels, is_training=True))
        mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out)
        mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
        mixed_grads = opt.undo_loss_scaling(
            fp32(
                tf.gradients(
                    mixed_loss, [mixed_images_out],
                    colocate_gradients_with_ops=colocate_gradients)[0]))
        mixed_norms = tf.sqrt(
            tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
        mixed_norms = autosummary('Loss/mixed_norms', mixed_norms)
        gradient_penalty = tf.square(mixed_norms - wgan_target)
    loss += gradient_penalty * (wgan_lambda / (wgan_target**2))
    return loss
Exemplo n.º 12
0
def process_reals(x, labels, lod, mirror_augment, mirror_augment_v,
                  spatial_augmentations, drange_data, drange_net):
    with tf.name_scope('DynamicRange'):
        x = tf.cast(x, tf.float32)
        x = misc.adjust_dynamic_range(x, drange_data, drange_net)
    if mirror_augment:
        with tf.name_scope('MirrorAugment'):
            x = tf.where(
                tf.random_uniform([tf.shape(x)[0]]) < 0.5, x,
                tf.reverse(x, [3]))
    if mirror_augment_v:
        with tf.name_scope('MirrorAugment_V'):
            x = tf.where(
                tf.random_uniform([tf.shape(x)[0]]) < 0.5, x,
                tf.reverse(x, [2]))
    if spatial_augmentations:
        with tf.name_scope('SpatialAugmentations'):
            pre = tf.transpose(x, [0, 2, 3, 1])
            post = tf.map_fn(misc.apply_random_aug, pre)
            x = tf.transpose(post, [0, 3, 1, 2])
        if save_image_summaries:
            with tf.name_scope('ImageSummaries'), tf.device('/cpu:0'):
                tf.summary.image("reals_pre-augment", pre)
                tf.summary.image("reals_post-augment", post)

    with tf.name_scope(
            'FadeLOD'
    ):  # Smooth crossfade between consecutive levels-of-detail.
        s = tf.shape(x)
        y = tf.reshape(x, [-1, s[1], s[2] // 2, 2, s[3] // 2, 2])
        y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
        y = tf.tile(y, [1, 1, 1, 2, 1, 2])
        y = tf.reshape(y, [-1, s[1], s[2], s[3]])
        x = tflib.lerp(x, y, lod - tf.floor(lod))
    with tf.name_scope(
            'UpscaleLOD'
    ):  # Upscale to match the expected input/output size of the networks.
        s = tf.shape(x)
        factor = tf.cast(2**tf.floor(lod), tf.int32)
        x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
        x = tf.tile(x, [1, 1, 1, factor, 1, factor])
        x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
    return x, labels
Exemplo n.º 13
0
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
    with tf.name_scope('DynamicRange'):
        x = tf.cast(x, tf.float32)
        x = misc.adjust_dynamic_range(x, drange_data, drange_net)
    if mirror_augment:
        with tf.name_scope('MirrorAugment'):
            x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3]))
    with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail.
        s = tf.shape(x)
        y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2])
        y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
        y = tf.tile(y, [1, 1, 1, 2, 1, 2])
        y = tf.reshape(y, [-1, s[1], s[2], s[3]])
        x = tflib.lerp(x, y, lod - tf.floor(lod))
    with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks.
        s = tf.shape(x)
        factor = tf.cast(2 ** tf.floor(lod), tf.int32)
        x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
        x = tf.tile(x, [1, 1, 1, factor, 1, factor])
        x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
    return x, labels
Exemplo n.º 14
0
 def grow(x, res, lod):
     y = block(res, x)
     img = lambda: upscale2d(torgb(res, y), 2**lod)
     img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod))
     if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
     return img()
    def _evaluate(self, Gs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph.
        distance_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                noise_vars = [
                    var for name, var in
                    Gs_clone.components.synthesis.vars.items()
                    if name.startswith('noise')
                ]

                # Generate random latents and interpolation t-values.
                lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                           Gs_clone.input_shape[1:])
                lerp_t = tf.random_uniform(
                    [self.minibatch_per_gpu], 0.0,
                    1.0 if self.sampling == 'full' else 0.0)

                # Interpolate in W or Z.
                if self.space == 'w':
                    dlat_t01 = Gs_clone.components.mapping.get_output_for(
                        lat_t01, None, is_validation=True)
                    dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
                    dlat_e0 = tflib.lerp(dlat_t0, dlat_t1,
                                         lerp_t[:, np.newaxis, np.newaxis])
                    dlat_e1 = tflib.lerp(
                        dlat_t0, dlat_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
                    dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1),
                                          dlat_t01.shape)
                else:  # space == 'z'
                    lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
                    lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
                    lat_e1 = slerp(lat_t0, lat_t1,
                                   lerp_t[:, np.newaxis] + self.epsilon)
                    lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1),
                                         lat_t01.shape)
                    dlat_e01 = Gs_clone.components.mapping.get_output_for(
                        lat_e01, None, is_validation=True)

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    images = Gs_clone.components.synthesis.get_output_for(
                        dlat_e01, is_validation=True, randomize_noise=False)

                # Crop only the face region.
                c = int(images.shape[2] // 8)
                images = images[:, :, c * 3:c * 7, c * 2:c * 6]

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                if images.shape[2] > 256:
                    factor = images.shape[2] // 256
                    images = tf.reshape(images, [
                        -1, images.shape[1], images.shape[2] // factor, factor,
                        images.shape[3] // factor, factor
                    ])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.
                images = (images + 1) * (255 / 2)

                # Evaluate perceptual distance.
                img_e0, img_e1 = images[0::2], images[1::2]
                distance_measure = misc.load_pkl(
                    'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl'
                )
                distance_expr.append(
                    distance_measure.get_output_for(img_e0, img_e1) *
                    (1 / self.epsilon**2))

        # Sampling loop.
        all_distances = []
        for _ in range(0, self.num_samples, minibatch_size):
            all_distances += tflib.run(distance_expr)
        all_distances = np.concatenate(all_distances, axis=0)

        # Reject outliers.
        lo = np.percentile(all_distances, 1, interpolation='lower')
        hi = np.percentile(all_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= all_distances, all_distances <= hi),
            all_distances)
        self._report_result(np.mean(filtered_distances))
Exemplo n.º 16
0
    def _evaluate(self, Gs, Gs_kwargs, num_gpus):
        Gs_kwargs = dict(Gs_kwargs)
        Gs_kwargs.update(self.Gs_overrides)
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph
        distance_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device("/gpu:%d" % gpu_idx):
                Gs_clone = Gs.clone()
                noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith("noise")]

                # Generate random latents and interpolation t-values
                lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:])
                lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == "full" else 0.0)
                labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1])

                # Interpolate in W or Z
                if self.space == "w":
                    dlat_t01 = Gs_clone.get_output_for(latents, labels, **Gs_kwargs, return_dlatents = True)[-1]
                    dlat_t01 = tf.cast(dlat_t01, tf.float32)
                    dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
                    dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis])
                    dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
                    dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis = 1), dlat_t01.shape)
                else:
                    lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
                    lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
                    lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon)
                    lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis = 1), lat_t01.shape)
                    dlat_e01 = Gs_clone.get_output_for(latents, labels, **Gs_kwargs, return_dlatents = True)[-1]

                # Synthesize images
                with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch
                    imgs = Gs_clone.get_output_for(dlat_e01, labels, randomize_noise = False, **Gs_kwargs, take_dlatents = True)[0]
                    imgs = tf.cast(imgs, tf.float32)

                # Crop only the face region
                if self.crop:
                    c = int(imgs.shape[2] // 8)
                    imgs = imgs[:, :, c*3 : c*7, c*2 : c*6]

                # Downsample image to 256x256 if it"s larger than that. VGG was built for 224x224 images
                factor = imgs.shape[2] // 256
                if factor > 1:
                    imgs = tf.reshape(imgs, [-1, imgs.shape[1], imgs.shape[2] // factor, factor, imgs.shape[3] // factor, factor])
                    imgs = tf.reduce_mean(imgs, axis=[3,5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG
                imgs = (imgs + 1) * (255 / 2)

                # Evaluate perceptual distance
                img_e0, img_e1 = imgs[0::2], imgs[1::2]
                distance_measure = misc.load_pkl("http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl")
                distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2))

        # Sampling loop
        all_distances = []
        for begin in range(0, self.num_samples, minibatch_size):
            self._report_progress(begin, self.num_samples)
            all_distances += tflib.run(distance_expr)
        all_distances = np.concatenate(all_distances, axis = 0)

        # Reject outliers
        lo = np.percentile(all_distances, 1, interpolation = "lower")
        hi = np.percentile(all_distances, 99, interpolation = "higher")
        filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances)
        self._report_result(np.mean(filtered_distances))
def G_style(
    latents_in,                                     # 第一个输入:Z码向量 [minibatch, latent_size].
    labels_in,                                      # 第二个输入:条件标签 [minibatch, label_size].
    truncation_psi          = 0.7,                  # 截断技巧的样式强度乘数。 None = disable.
    truncation_cutoff       = 8,                    # 要应用截断技巧的层数。 None = disable.
    truncation_psi_val      = None,                 # 验证期间要使用的truncation_psi的值。
    truncation_cutoff_val   = None,                 # 验证期间要使用的truncation_cutoff的值。
    dlatent_avg_beta        = 0.995,                # 在训练期间跟踪W的移动平均值的衰减率。 None = disable.
    style_mixing_prob       = 0.9,                  # 训练期间混合样式的概率。 None = disable.
    is_training             = False,                # 网络正在接受训练? 这个选择可以启用和禁用特定特征。
    is_validation           = False,                # 网络正在验证中? 这个选择用于确定truncation_psi的值。
    is_template_graph       = False,                # True表示由Network类构造的模板图,False表示实际评估。
    components              = dnnlib.EasyDict(),    # 子网络的容器。调用时候保留。
    **kwargs):                                      # 子网络的参数们 (G_mapping 和 G_synthesis)。

    # 参数验证。
    assert not is_training or not is_validation        # 不能同时出现训练/验证状态
    assert isinstance(components, dnnlib.EasyDict)     # components作为EasyDict类,后续被用来装下synthesis和mapping两个网络
    if is_validation:    # 把验证期间要使用的truncation_psi_val和truncation_cutoff_val值赋过来(默认是None),也就是验证期不使用截断
        truncation_psi = truncation_psi_val
        truncation_cutoff = truncation_cutoff_val
    if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1):
        truncation_psi = None  # 训练期间或截断率为1时,不使用截断
    if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0):
        truncation_cutoff = None  # 训练期间或截断层为0时,不使用截断
    if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1):
        dlatent_avg_beta = None  # 非训练期间或计算平均W时的衰减率为1时,不使用衰减
    if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0):
        style_mixing_prob = None  # 非训练期间或样式混合的概率小于等于0时,不使用样式混合

    # 设置子网络。
    if 'synthesis' not in components:  # 载入合成网络
        components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs)
    num_layers = components.synthesis.input_shape[1]  # num_layers = 18
    dlatent_size = components.synthesis.input_shape[2]  # dlatent_size = (18,512)
    if 'mapping' not in components:  # 载入映射网络
        components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs)

    # 设置变量。
    lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)  
    # 初始化为0。lod的定义式为:lod = resolution_log2 - res,其中resolution_log2(=10)表示最终分辨率级别,res表示当前层对应的分辨率级别(2-10)。
    dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False)  # 人脸平均值

    # 计算映射网络输出。
    dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs)

    # 更新W的移动平均值。
    if dlatent_avg_beta is not None:
        with tf.variable_scope('DlatentAvg'):
            batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)   # 找到新batch的dlatent平均值
            # ???
            update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) 
            # 把batch的dlatent平均值朝着总dlatent平均值以dlatent_avg_beta步幅靠近,作为新的人脸dlatent平均值
            # update_op 是一个用于sess的变量
            with tf.control_dependencies([update_op]):
                dlatents = tf.identity(dlatents)  # 确保update_op操作完成
            # with tf.control_dependencies: 在with包含的操作operation执行前先执行op列表即[update_up]:
            # tf.identity(x)是一个operation。所以会确保update_op操作完成。
            # tf.identity是返回一个一模一样新的tensor的op,这会增加一个新节点到gragh中

    # 执行样式混合正则化。
    if style_mixing_prob is not None:
        with tf.name_scope('StyleMix'):
            latents2 = tf.random_normal(tf.shape(latents_in))
            dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs)  # 用来做样式混合的随机中间向量
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]   # 层的索引:[[[0],[1],[2],[3],[4]...[17]]]
            cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2  # 当前层等于总层数减去lod_in的两倍,因为每个分辨率对应两层
            mixing_cutoff = tf.cond(
                tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,   # 如果随机值小于样式混合的概率,则从1到当前层随机选一个层,否则保留原层
                lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
                lambda: cur_layers)
            dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)  
            # 对于1到mixing_cutoff层保留dlatents的值,其余层采用dlatents2的值替换

    # 应用截断技巧。
    if truncation_psi is not None and truncation_cutoff is not None:
        with tf.variable_scope('Truncation'):
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]  # 层的索引:[[[0],[1],[2],[3],[4]...[17]]]
            ones = np.ones(layer_idx.shape, dtype=np.float32)  # ones:[[[1],[1],[1],[1],[1]...[1]]]
            coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones)  
            # 截断的步幅,需要截断的层步幅为truncation_psi,否则为1
            dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)  
            # 截断,用平均脸dlatent_avg朝着当前脸dlatents以coefs步幅靠近,最后得到的结果取代当前脸dlatents

    # 计算合成网络输出。
    with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]):
        images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs)
    return tf.identity(images_out, name='images_out')  # 返回生成的图片
Exemplo n.º 18
0
def D_loss(
    G,
    D,
    reals,  # A batch of real images
    labels,  # A batch of labels (default 0s if no labels)
    minibatch_size,  # Size of each minibatch
    loss_type,  # Loss type: logistic, hinge, wgan
    reg_type,  # Regularization type: r1, t2, gp (mixed)
    gamma=10.0,  # Regularization strength
    wgan_epsilon=0.001,  # Wasserstein epsilon (for wgan only)
    wgan_target=1.0):  # Wasserstein target (for wgan only)

    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_imgs_out = G.get_output_for(latents, labels, isegs,
                                     is_training=True)[0]

    real_scores_out = D.get_output_for(reals, labels, is_training=True)
    fake_scores_out = D.get_output_for(fake_imgs_out, labels, is_training=True)

    real_scores_out = autosummary("Loss/scores/real", real_scores_out)
    fake_scores_out = autosummary("Loss/scores/fake", fake_scores_out)

    if loss_type == "logistic":
        loss = tf.nn.softplus(fake_scores_out)
        loss += tf.nn.softplus(-real_scores_out)
    elif loss_type == "hinge":
        loss = tf.maximum(0.0, 1.0 + fake_scores_out)
        loss += tf.maximum(0.0, 1.0 - real_scores_out)
    elif loss_type == "wgan":
        loss = fake_scores_out - real_scores_out

    if loss_type == "wgan":
        with tf.name_scope("EpsilonPenalty"):
            epsilon_penalty = autosummary("Loss/epsilon_penalty",
                                          tf.square(real_scores_out))
            loss += epsilon_penalty * wgan_epsilon

    reg = None
    with tf.name_scope("GradientPenalty"):
        if reg_type in ["r1", "r2"]:
            if reg_type == "r1":
                grads = tf.gradients(tf.reduce_sum(real_scores_out),
                                     [reals])[0]
            else:
                grads = tf.gradients(tf.reduce_sum(fake_scores_out),
                                     [fake_imgs_out])[0]
            gradient_penalty = tf.reduce_sum(tf.square(grads), axis=[1, 2, 3])
            gradient_penalty = autosummary("Loss/gradient_penalty",
                                           gradient_penalty)
            reg = gradient_penalty * (gamma * 0.5)
        elif reg_type == "gp":
            mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                               0.0,
                                               1.0,
                                               dtype=fake_imgs_out.dtype)
            mixed_imgs_out = tflib.lerp(tf.cast(reals, fake_imgs_out.dtype),
                                        fake_imgs_out, mixing_factors)
            mixed_scores_out = D.get_output_for(mixed_imgs_out,
                                                labels,
                                                is_training=True)
            mixed_scores_out = autosummary("Loss/scores/mixed",
                                           mixed_scores_out)
            mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out),
                                       [mixed_imgs_out])[0]
            mixed_norms = tf.sqrt(
                tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
            mixed_norms = autosummary("Loss/mixed_norms", mixed_norms)
            gradient_penalty = tf.square(mixed_norms - wgan_target)
            reg = gradient_penalty * (gamma / (wgan_target**2))

    return loss, reg
    def _evaluate(self, Gs, Gs_kwargs, num_gpus, **kwargs):
        Gs_kwargs = dict(Gs_kwargs)
        Gs_kwargs.update(self.Gs_overrides)
        minibatch_per_gpu = (self.n_samples_per_dim - 1) // num_gpus + 1
        if (not self.no_mapping) and (not self.no_convert):
            Gs = Gs.convert(
                new_func_name='training.ps_sc_networks2.G_main_ps_sc')

        # Construct TensorFlow graph.
        n_continuous = Gs.input_shape[1]
        distance_expr = []
        eval_dim_phs = []
        lat_start_alpha_phs = []
        lat_end_alpha_phs = []
        lat_sample_phs = []
        lerps_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                if self.no_mapping:
                    noise_vars = [
                        var for name, var in Gs_clone.vars.items()
                        if name.startswith('noise')
                    ]
                else:
                    noise_vars = [
                        var for name, var in
                        Gs_clone.components.synthesis.vars.items()
                        if name.startswith('noise')
                    ]

                # Latent pairs placeholder
                eval_dim = tf.placeholder(tf.int32)
                lat_start_alpha = tf.placeholder(
                    tf.float32)  # should be in [0, 1]
                lat_end_alpha = tf.placeholder(
                    tf.float32)  # should be in [0, 1]
                eval_dim_phs.append(eval_dim)
                lat_start_alpha_phs.append(lat_start_alpha)
                lat_end_alpha_phs.append(lat_end_alpha)
                eval_dim_mask = tf.tile(
                    tf.one_hot(eval_dim, n_continuous)[tf.newaxis, :] > 0,
                    [minibatch_per_gpu, 1])
                lerp_t = tf.linspace(lat_start_alpha, lat_end_alpha,
                                     minibatch_per_gpu)  # [b]
                lerps_expr.append(lerp_t)
                lat_sample = tf.placeholder(tf.float32,
                                            shape=Gs_clone.input_shape[1:])
                lat_sample_phs.append(lat_sample)

                # lat_t0 = tf.zeros([minibatch_per_gpu] + Gs_clone.input_shape[1:])
                lat_t0 = tf.tile(lat_sample[tf.newaxis, :],
                                 [minibatch_per_gpu, 1])
                if self.use_bound_4:
                    lat_t0_min2 = tf.zeros_like(lat_t0) - 4
                else:
                    lat_t0_min2 = lat_t0 - 2
                lat_t0 = tf.where(eval_dim_mask, lat_t0_min2,
                                  lat_t0)  # [b, n_continuous]

                lat_t1 = tf.tile(lat_sample[tf.newaxis, :],
                                 [minibatch_per_gpu, 1])
                if self.use_bound_4:
                    lat_t1_add2 = tf.zeros_like(lat_t1) + 4
                else:
                    lat_t1_add2 = lat_t1 + 2
                lat_t1 = tf.where(eval_dim_mask, lat_t1_add2,
                                  lat_t1)  # [b, n_continuous]
                lat_e = tflib.lerp(lat_t0, lat_t1,
                                   lerp_t[:, tf.newaxis])  # [b, n_continuous]

                # labels = tf.reshape(self._get_random_labels_tf(minibatch_per_gpu), [minibatch_per_gpu, -1])
                labels = tf.zeros([minibatch_per_gpu, 0], dtype=tf.float32)
                if self.no_mapping:
                    dlat_e = lat_e
                else:
                    dlat_e = get_return_v(
                        Gs_clone.components.mapping.get_output_for(
                            lat_e, labels, **Gs_kwargs), 1)

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    if self.no_mapping:
                        images = get_return_v(
                            Gs_clone.get_output_for(dlat_e,
                                                    labels,
                                                    randomize_noise=False,
                                                    **Gs_kwargs), 1)
                    else:
                        images = get_return_v(
                            Gs_clone.components.synthesis.get_output_for(
                                dlat_e, randomize_noise=False, **Gs_kwargs), 1)
                    # print('images.shape:', images.get_shape().as_list())
                    images = tf.cast(images, tf.float32)

                # Crop only the face region.
                if self.crop:
                    c = int(images.shape[2] // 8)
                    images = images[:, :, c * 3:c * 7, c * 2:c * 6]

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                factor = images.shape[2] // 256
                if factor > 1:
                    images = tf.reshape(images, [
                        -1, images.shape[1], images.shape[2] // factor, factor,
                        images.shape[3] // factor, factor
                    ])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.
                images = (images + 1) * (255 / 2)

                # Evaluate perceptual distance.
                if images.get_shape().as_list()[1] == 1:
                    images = tf.tile(images, [1, 3, 1, 1])
                img_e0 = images[:-1]
                img_e1 = images[1:]
                distance_measure = misc.load_pkl(
                    'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl'
                )
                distance_tmp = distance_measure.get_output_for(img_e0, img_e1)
                print('distance_tmp.shape:',
                      distance_tmp.get_shape().as_list())
                distance_expr.append(distance_tmp)

        # Sampling loop
        n_segs_per_dim = (self.n_samples_per_dim - 1) // (
            (minibatch_per_gpu - 1) * num_gpus)
        self.n_samples_per_dim = n_segs_per_dim * (
            (minibatch_per_gpu - 1) * num_gpus) + 1
        alphas = np.linspace(0., 1., num=(n_segs_per_dim * num_gpus) + 1)
        traversals_dim = []
        for n in range(self.n_traversals):
            lat_sample_np = np.random.normal(size=Gs_clone.input_shape[1:])
            all_distances = []
            sum_distances = []
            for i in range(n_continuous):
                self._report_progress(i, n_continuous)
                dim_distances = []
                for j in range(n_segs_per_dim):
                    fd = {}
                    for k_gpu in range(num_gpus):
                        fd.update({
                            eval_dim_phs[k_gpu]:
                            i,
                            lat_start_alpha_phs[k_gpu]:
                            alphas[j * num_gpus + k_gpu],
                            lat_end_alpha_phs[k_gpu]:
                            alphas[j * num_gpus + k_gpu + 1],
                            lat_sample_phs[k_gpu]:
                            lat_sample_np
                        })
                    distance_expr_out, lerps_expr_out = tflib.run(
                        [distance_expr, lerps_expr], feed_dict=fd)
                    dim_distances += distance_expr_out
                    # dim_distances += tflib.run(distance_expr, feed_dict=fd)
                    # print(lerps_expr_out)
                dim_distances = np.concatenate(dim_distances, axis=0)
                # print('dim_distances.shape:', dim_distances.shape)
                all_distances.append(dim_distances)
                sum_distances.append(np.sum(dim_distances))
            traversals_dim.append(sum_distances)
        traversals_dim = np.array(
            traversals_dim)  # shape: (n_traversals, n_continuous)
        avg_distance_per_dim = np.mean(traversals_dim, axis=0)
        std_distance_per_dim = np.std(traversals_dim, axis=0)
        # pdb.set_trace()
        active_mask = np.array(avg_distance_per_dim) > self.active_thresh
        active_distances = np.extract(active_mask, avg_distance_per_dim)
        active_stds = np.extract(active_mask, std_distance_per_dim)
        sum_distance = np.sum(active_distances)
        mean_distance = np.sum(active_distances) / len(avg_distance_per_dim)
        mean_std = np.sum(active_stds) / len(avg_distance_per_dim)
        norm_dis_std = np.sqrt(mean_distance * mean_distance +
                               mean_std * mean_std)
        print('avg distance per dim:', avg_distance_per_dim)
        print('std distance per dim:', std_distance_per_dim)
        print('sum_distance:', sum_distance)
        print('mean_distance:', mean_distance)
        print('mean_std:', mean_std)
        print('norm_dis_std:', norm_dis_std)
        self._report_result(sum_distance, suffix='_sum_dist')
        self._report_result(mean_distance, suffix='_mean_dist')
        self._report_result(mean_std, suffix='_mean_std')
        self._report_result(norm_dis_std, suffix='_norm_dist_std')
        # pdb.set_trace()
        return {'tpl_per_dim': avg_distance_per_dim}
Exemplo n.º 20
0
    def _evaluate(self, Gs, Gs_kwargs, num_gpus):
        Gs_kwargs = dict(Gs_kwargs)
        Gs_kwargs.update(self.Gs_overrides)
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph.
        distance_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                noise_vars = [
                    var for name, var in
                    Gs_clone.components.synthesis.vars.items()
                    if name.startswith('noise')
                ]

                # Generate random latents and interpolation t-values.
                lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                           Gs_clone.input_shape[1:])
                lerp_t = tf.random_uniform(
                    [self.minibatch_per_gpu], 0.0,
                    1.0 if self.sampling == 'full' else 0.0)
                labels = tf.reshape(
                    tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu),
                            [1, 2]), [self.minibatch_per_gpu * 2, -1])

                # Interpolate in W or Z.
                if self.space == 'w':
                    dlat_t01 = Gs_clone.components.mapping.get_output_for(
                        lat_t01, labels, **Gs_kwargs)
                    dlat_t01 = tf.cast(dlat_t01, tf.float32)
                    dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
                    dlat_e0 = tflib.lerp(dlat_t0, dlat_t1,
                                         lerp_t[:, np.newaxis, np.newaxis])
                    dlat_e1 = tflib.lerp(
                        dlat_t0, dlat_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
                    dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1),
                                          dlat_t01.shape)
                else:  # space == 'z'
                    lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
                    lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
                    lat_e1 = slerp(lat_t0, lat_t1,
                                   lerp_t[:, np.newaxis] + self.epsilon)
                    lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1),
                                         lat_t01.shape)
                    dlat_e01 = Gs_clone.components.mapping.get_output_for(
                        lat_e01, labels, **Gs_kwargs)

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    images = Gs_clone.components.synthesis.get_output_for(
                        dlat_e01, randomize_noise=False, **Gs_kwargs)
                    images = tf.cast(images, tf.float32)

                # Crop only the face region.
                if self.crop:
                    c = int(images.shape[2] // 8)
                    images = images[:, :, c * 3:c * 7, c * 2:c * 6]

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                factor = images.shape[2] // 256
                if factor > 1:
                    images = tf.reshape(images, [
                        -1, images.shape[1], images.shape[2] // factor, factor,
                        images.shape[3] // factor, factor
                    ])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.
                images = (images + 1) * (255 / 2)

                # Evaluate perceptual distance.
                img_e0, img_e1 = images[0::2], images[1::2]
                distance_measure = misc.load_pkl(
                    'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2'
                )  # vgg16_zhang_perceptual.pkl
                distance_expr.append(
                    distance_measure.get_output_for(img_e0, img_e1) *
                    (1 / self.epsilon**2))

        # Sampling loop.
        all_distances = []
        for begin in range(0, self.num_samples, minibatch_size):
            self._report_progress(begin, self.num_samples)
            all_distances += tflib.run(distance_expr)
        all_distances = np.concatenate(all_distances, axis=0)

        # Reject outliers.
        lo = np.percentile(all_distances, 1, interpolation='lower')
        hi = np.percentile(all_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= all_distances, all_distances <= hi),
            all_distances)
        self._report_result(np.mean(filtered_distances))
Exemplo n.º 21
0
def D_wgan_gp(
        G,
        D,
        opt,
        training_set,
        minibatch_size,
        reals,
        labels,
        infogan_nz,
        wgan_lambda=10.0,  # Weight for the gradient penalty term.
        wgan_epsilon=0.001,  # Weight for the epsilon term, \epsilon_{drift}.
        wgan_target=1.0,  # Target value for gradient magnitudes.
        gpu_ix=None):

    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    fake_images_out = G.get_output_for(latents, labels, is_training=True)
    fake_scores_out = fp32(
        D.get_output_for(fake_images_out, labels, is_training=True))

    if infogan_nz > 0:
        with tf.name_scope('InfoGANLoss'):
            ops = fake_scores_out.graph.get_operations()

            def filter_fn(
                op
            ):  # Very similar to the corresponding function in hessian_penalties.py
                this_layer = 'QEncoding' in op.name
                this_gpu = 'GPU%d' % gpu_ix in op.name
                this_model = 'D_loss' in op.name
                op_found = this_layer and this_gpu and this_model
                return op_found

            r_op = list(filter(filter_fn, ops))
            for r in r_op:
                print('Using %s' % r.name)
            assert len(
                r_op) == 1, 'Found %s ops with name QEncoding' % len(r_op)
            encoding = fake_scores_out.graph.get_tensor_by_name('%s:0' %
                                                                r_op[0].name)
            print('Regularizing first %s Z components with InfoGAN Loss' %
                  infogan_nz)
            mutual_information_loss = tf.losses.mean_squared_error(
                latents[:, :infogan_nz], encoding)
            mutual_information_loss = autosummary('Loss/InfoGAN',
                                                  mutual_information_loss)
    else:
        mutual_information_loss = 0.0

    real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True))
    real_scores_out = autosummary('Loss/scores/real', real_scores_out)
    fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
    loss = fake_scores_out - real_scores_out

    with tf.name_scope('GradientPenalty'):
        mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1],
                                           0.0,
                                           1.0,
                                           dtype=fake_images_out.dtype)
        mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype),
                                      fake_images_out, mixing_factors)
        mixed_scores_out = fp32(
            D.get_output_for(mixed_images_out, labels, is_training=True))
        mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out)
        mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out))
        mixed_grads = opt.undo_loss_scaling(
            fp32(tf.gradients(mixed_loss, [mixed_images_out])[0]))
        mixed_norms = tf.sqrt(
            tf.reduce_sum(tf.square(mixed_grads), axis=[1, 2, 3]))
        mixed_norms = autosummary('Loss/mixed_norms', mixed_norms)
        gradient_penalty = tf.square(mixed_norms - wgan_target)
    loss += gradient_penalty * (wgan_lambda / (wgan_target**2))

    with tf.name_scope('EpsilonPenalty'):
        epsilon_penalty = autosummary('Loss/epsilon_penalty',
                                      tf.square(real_scores_out))
    loss += epsilon_penalty * wgan_epsilon
    return loss, mutual_information_loss
Exemplo n.º 22
0
def G_main(
    latents_in,  # First input: Latent vectors (Z) [minibatch, latent_size].
    labels_in,  # Second input: Conditioning labels [minibatch, label_size].
    latmask,  # mask for split-frame latents blending
    dconst,  # initial (const) layer displacement
    truncation_psi=0.5,  # Style strength multiplier for the truncation trick. None = disable.
    truncation_cutoff=None,  # Number of layers for which to apply the truncation trick. None = disable.
    truncation_psi_val=None,  # Value for truncation_psi to use during validation.
    truncation_cutoff_val=None,  # Value for truncation_cutoff to use during validation.
    dlatent_avg_beta=0.995,  # Decay for tracking the moving average of W during training. None = disable.
    style_mixing_prob=0.9,  # Probability of mixing styles during training. None = disable.
    is_training=False,  # Network is under training? Enables and disables specific features.
    is_validation=False,  # Network is under validation? Chooses which value to use for truncation_psi.
    return_dlatents=False,  # Return dlatents in addition to the images?
    is_template_graph=False,  # True = template graph constructed by the Network class, False = actual evaluation.
    components=dnnlib.EasyDict(
    ),  # Container for sub-networks. Retained between calls.
    mapping_func='G_mapping',  # Build func name for the mapping network.
    synthesis_func='G_synthesis_stylegan2',  # Build func name for the synthesis network.
    **kwargs):  # Arguments for sub-networks (mapping and synthesis).

    # Validate arguments.
    assert not is_training or not is_validation
    assert isinstance(components, dnnlib.EasyDict)
    if is_validation:
        truncation_psi = truncation_psi_val
        truncation_cutoff = truncation_cutoff_val
    if is_training or (truncation_psi is not None
                       and not tflib.is_tf_expression(truncation_psi)
                       and truncation_psi == 1):
        truncation_psi = None
    if is_training:
        truncation_cutoff = None
    if not is_training or (dlatent_avg_beta is not None
                           and not tflib.is_tf_expression(dlatent_avg_beta)
                           and dlatent_avg_beta == 1):
        dlatent_avg_beta = None
    if not is_training or (style_mixing_prob is not None
                           and not tflib.is_tf_expression(style_mixing_prob)
                           and style_mixing_prob <= 0):
        style_mixing_prob = None

    # Setup components.
    if 'synthesis' not in components:
        components.synthesis = tflib.Network(
            'G_synthesis', func_name=globals()[synthesis_func], **kwargs)
    num_layers = components.synthesis.input_shape[1]
    dlatent_size = components.synthesis.input_shape[2]
    if 'mapping' not in components:
        components.mapping = tflib.Network('G_mapping',
                                           func_name=globals()[mapping_func],
                                           dlatent_broadcast=num_layers,
                                           **kwargs)

    # Setup variables.
    lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
    dlatent_avg = tf.get_variable('dlatent_avg',
                                  shape=[dlatent_size],
                                  initializer=tf.initializers.zeros(),
                                  trainable=False)

    # Evaluate mapping network.
    dlatents = components.mapping.get_output_for(latents_in,
                                                 labels_in,
                                                 is_training=is_training,
                                                 **kwargs)
    dlatents = tf.cast(dlatents, tf.float32)

    # Update moving average of W.
    if dlatent_avg_beta is not None:
        with tf.variable_scope('DlatentAvg'):
            batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
            update_op = tf.assign(
                dlatent_avg,
                tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            with tf.control_dependencies([update_op]):
                dlatents = tf.identity(dlatents)

    # Perform style mixing regularization.
    if style_mixing_prob is not None:
        with tf.variable_scope('StyleMix'):
            latents2 = tf.random_normal(tf.shape(latents_in))
            dlatents2 = components.mapping.get_output_for(
                latents2, labels_in, is_training=is_training, **kwargs)
            dlatents2 = tf.cast(dlatents2, tf.float32)
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
            # original version
            mixing_cutoff = tf.cond(
                tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
                lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
                lambda: cur_layers)
            """ # Diff Augment version
            mixing_cutoff = tf.where_v2(
                tf.random_uniform([tf.shape(dlatents)[0]], 0.0, 1.0) < style_mixing_prob,
                tf.random_uniform([tf.shape(dlatents)[0]], 1, cur_layers, dtype=tf.int32),
                cur_layers[np.newaxis])[:, np.newaxis, np.newaxis]
            dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)
            """

    # Apply truncation trick.
    if truncation_psi is not None:
        with tf.variable_scope('Truncation'):
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            layer_psi = np.ones(layer_idx.shape, dtype=np.float32)
            if truncation_cutoff is None:
                layer_psi *= truncation_psi
            else:
                layer_psi = tf.where(layer_idx < truncation_cutoff,
                                     layer_psi * truncation_psi, layer_psi)
            dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi)

    # Evaluate synthesis network.
    deps = []
    if 'lod' in components.synthesis.vars:
        deps.append(tf.assign(components.synthesis.vars['lod'], lod_in))
    with tf.control_dependencies(deps):
        images_out = components.synthesis.get_output_for(
            dlatents,
            latmask,
            dconst,
            is_training=is_training,
            force_clean_graph=is_template_graph,
            **kwargs)

    # Return requested outputs.
    images_out = tf.identity(images_out, name='images_out')
    if return_dlatents:
        return images_out, dlatents
    return images_out
Exemplo n.º 23
0
    def _evaluate(self, Gs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph.
        distance_expr = []
        distance_b_expr = []
        distance_c_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                noise_vars = [
                    var for name, var in
                    Gs_clone.components.synthesis.vars.items()
                    if name.startswith('noise')
                ]

                # Generate random latents and interpolation t-values.

                lat_b_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                             Gs_clone.input_shapes[0][1:])
                lat_c_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                             Gs_clone.input_shapes[1][1:])
                lerp_t = tf.random_uniform(
                    [self.minibatch_per_gpu], 0.0,
                    1.0 if self.sampling == 'full' else 0.0)

                # Interpolate in W or Z.
                if self.space == 'w':

                    dlat_b_t01 = Gs_clone.components.mapping_b.get_output_for(
                        lat_b_t01, None, is_validation=True)
                    dlat_c_t01 = Gs_clone.components.mapping_c.get_output_for(
                        lat_c_t01, None, is_validation=True)

                    dlat_b_t0, dlat_b_t1 = dlat_b_t01[0::2], dlat_b_t01[1::2]
                    dlat_c_t0, dlat_c_t1 = dlat_c_t01[0::2], dlat_c_t01[1::2]

                    dlat_b_e0 = tflib.lerp(dlat_b_t0, dlat_b_t1,
                                           lerp_t[:, np.newaxis, np.newaxis])
                    dlat_c_e0 = tflib.lerp(dlat_c_t0, dlat_c_t1,
                                           lerp_t[:, np.newaxis, np.newaxis])
                    dlat_c_e1 = tflib.lerp(
                        dlat_c_t0, dlat_c_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)

                    tmp_b_e0 = dlat_b_e0[:, 0, :]
                    tmp_b_e1 = tflib.lerp(
                        dlat_b_t0, dlat_b_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)[:,
                                                                          0, :]

                    a1 = tf.reduce_sum(tmp_b_e0 * tmp_b_e1,
                                       axis=1,
                                       keepdims=True)
                    a2 = tf.reduce_sum(tmp_b_e0 * tmp_b_e0,
                                       axis=1,
                                       keepdims=True)
                    tmp_b_e1 = tmp_b_e1 - a1 / a2 * tmp_b_e0
                    tmp_b_e1 = tmp_b_e1 / tf.sqrt(
                        tf.reduce_sum(
                            tmp_b_e1 * tmp_b_e1, axis=1, keepdims=True))

                    dlat_b_e1 = tmp_b_e0 + self.epsilon * tmp_b_e1
                    dlat_b_e1 = tf.reshape(dlat_b_e1,
                                           [self.minibatch_per_gpu, 1, -1])
                    sh = dlat_b_e0.shape.as_list()
                    dlat_b_e1 = tf.tile(dlat_b_e1, [1, sh[1], 1])

                    # caculate sololy and whole ppl

                    # change b only
                    dlat_b_e02 = tf.reshape(
                        tf.stack([dlat_b_e0, dlat_b_e1], axis=1),
                        dlat_b_t01.shape)
                    dlat_c_e02 = tf.reshape(
                        tf.stack([dlat_c_e0, dlat_c_e0], axis=1),
                        dlat_c_t01.shape)
                    # change c only

                    dlat_b_e03 = tf.reshape(
                        tf.stack([dlat_b_e0, dlat_b_e0], axis=1),
                        dlat_b_t01.shape)
                    dlat_c_e03 = tf.reshape(
                        tf.stack([dlat_c_e0, dlat_c_e1], axis=1),
                        dlat_c_t01.shape)
                    #  change all
                    dlat_b_e04 = tf.reshape(
                        tf.stack([dlat_b_e0, dlat_b_e1], axis=1),
                        dlat_b_t01.shape)
                    dlat_c_e04 = tf.reshape(
                        tf.stack([dlat_c_e0, dlat_c_e1], axis=1),
                        dlat_c_t01.shape)
                else:  # space == 'z'

                    lat_b_t0, lat_b_t1 = lat_b_t01[0::2], lat_b_t01[1::2]
                    lat_c_t0, lat_c_t1 = lat_c_t01[0::2], lat_c_t01[1::2]

                    lat_b_e0 = slerp(lat_b_t0, lat_b_t1, lerp_t[:, np.newaxis])
                    lat_c_e0 = slerp(lat_c_t0, lat_c_t1, lerp_t[:, np.newaxis])

                    lat_b_e1 = slerp(lat_b_t0, lat_b_t1,
                                     lerp_t[:, np.newaxis] + self.epsilon)
                    lat_c_e1 = slerp(lat_c_t0, lat_c_t1,
                                     lerp_t[:, np.newaxis] + self.epsilon)

                    # chnage b only

                    lat_b_e02 = tf.reshape(
                        tf.stack([lat_b_e0, lat_b_e1], axis=1),
                        lat_b_t01.shape)
                    lat_c_e02 = tf.reshape(
                        tf.stack([lat_c_e0, lat_c_e0], axis=1),
                        lat_c_t01.shape)
                    dlat_b_e02 = Gs_clone.components.mapping_b.get_output_for(
                        lat_b_e02, None, is_validation=True)
                    dlat_c_e02 = Gs_clone.components.mapping_c.get_output_for(
                        lat_c_e02, None, is_validation=True)
                    #  change c only

                    lat_b_e03 = tf.reshape(
                        tf.stack([lat_b_e0, lat_b_e0], axis=1),
                        lat_b_t01.shape)
                    lat_c_e03 = tf.reshape(
                        tf.stack([lat_c_e0, lat_c_e1], axis=1),
                        lat_c_t01.shape)
                    dlat_b_e03 = Gs_clone.components.mapping_b.get_output_for(
                        lat_b_e03, None, is_validation=True)
                    dlat_c_e03 = Gs_clone.components.mapping_c.get_output_for(
                        lat_c_e03, None, is_validation=True)
                    # change a and b and c

                    lat_b_e04 = tf.reshape(
                        tf.stack([lat_b_e0, lat_b_e1], axis=1),
                        lat_b_t01.shape)
                    lat_c_e04 = tf.reshape(
                        tf.stack([lat_c_e0, lat_c_e1], axis=1),
                        lat_c_t01.shape)
                    dlat_b_e04 = Gs_clone.components.mapping_b.get_output_for(
                        lat_b_e04, None, is_validation=True)
                    dlat_c_e04 = Gs_clone.components.mapping_c.get_output_for(
                        lat_c_e04, None, is_validation=True)

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    images_2 = \
                    Gs_clone.components.synthesis.get_output_for(dlat_b_e02, dlat_c_e02, is_validation=True,
                                                                 randomize_noise=False)[-1]
                    images_3 = \
                    Gs_clone.components.synthesis.get_output_for(dlat_b_e03, dlat_c_e03, is_validation=True,
                                                                 randomize_noise=False)[-1]
                    images_4 = \
                    Gs_clone.components.synthesis.get_output_for(dlat_b_e04, dlat_c_e04, is_validation=True,
                                                                 randomize_noise=False)[-1]

                # Crop only the face region.
                if self.crop:
                    c = int(images_2.shape[2] // 8)
                    images_2 = images_2[:, :, c * 3:c * 7, c * 2:c * 6]
                    images_3 = images_3[:, :, c * 3:c * 7, c * 2:c * 6]
                    images_4 = images_4[:, :, c * 3:c * 7, c * 2:c * 6]

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                if images_2.shape[2] > 256:
                    factor = images_2.shape[2] // 256

                    images_2 = tf.reshape(images_2, [
                        -1, images_2.shape[1], images_2.shape[2] // factor,
                        factor, images_2.shape[3] // factor, factor
                    ])
                    images_2 = tf.reduce_mean(images_2, axis=[3, 5])
                    images_3 = tf.reshape(images_3, [
                        -1, images_3.shape[1], images_3.shape[2] // factor,
                        factor, images_3.shape[3] // factor, factor
                    ])
                    images_3 = tf.reduce_mean(images_3, axis=[3, 5])
                    images_4 = tf.reshape(images_4, [
                        -1, images_4.shape[1], images_4.shape[2] // factor,
                        factor, images_4.shape[3] // factor, factor
                    ])
                    images_4 = tf.reduce_mean(images_4, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.

                images_2 = (images_2 + 1) * (255 / 2)
                images_3 = (images_3 + 1) * (255 / 2)
                images_4 = (images_4 + 1) * (255 / 2)

                # Evaluate perceptual distance.

                img_2_e0, img_2_e1 = images_2[0::2], images_2[1::2]
                img_3_e0, img_3_e1 = images_3[0::2], images_3[1::2]
                img_4_e0, img_4_e1 = images_4[0::2], images_4[1::2]
                distance_measure = misc.load_pkl(
                    'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl'
                )  # vgg16_zhang_perceptual.pkl
                distance_b_expr.append(
                    distance_measure.get_output_for(img_2_e0, img_2_e1) *
                    (1 / self.epsilon**2))
                distance_c_expr.append(
                    distance_measure.get_output_for(img_3_e0, img_3_e1) *
                    (1 / self.epsilon**2))
                distance_expr.append(
                    distance_measure.get_output_for(img_4_e0, img_4_e1) *
                    (1 / self.epsilon**2))

        # Sampling loop.
        all_distances = []

        b_distance = []
        c_distance = []
        for _ in range(0, self.num_samples, minibatch_size):
            all_distances += tflib.run(distance_expr)

            b_distance += tflib.run(distance_b_expr)
            c_distance += tflib.run(distance_c_expr)
        all_distances = np.concatenate(all_distances, axis=0)

        b_distances = np.concatenate(b_distance, axis=0)
        c_distances = np.concatenate(c_distance, axis=0)
        # Reject outliers.
        lo = np.percentile(all_distances, 1, interpolation='lower')
        hi = np.percentile(all_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= all_distances, all_distances <= hi),
            all_distances)
        self._report_result(np.mean(filtered_distances), suffix='_bc')

        lo = np.percentile(b_distances, 1, interpolation='lower')
        hi = np.percentile(b_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= b_distances, b_distances <= hi), b_distances)
        self._report_result(np.mean(filtered_distances), suffix='_b')

        lo = np.percentile(c_distances, 1, interpolation='lower')
        hi = np.percentile(c_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= c_distances, c_distances <= hi), c_distances)
        self._report_result(np.mean(filtered_distances), suffix='_c')
Exemplo n.º 24
0
 def truncate(dlatents, truncation_psi, maxlayer = 8):
     dlatent_avg = tf.get_default_session().run(Gs.own_vars["dlatent_avg"])
     layer_idx = np.arange(16)[np.newaxis, :, np.newaxis]
     ones = np.ones(layer_idx.shape, dtype=np.float32)
     coefs = tf.where(layer_idx < maxlayer, truncation_psi * ones, ones)
     return tf.get_default_session().run(tflib.lerp(dlatent_avg, dlatents, coefs))
Exemplo n.º 25
0
def G_style(
    latents_in,  # First input: Latent vectors (Z) [minibatch, latent_size].
    labels_in,  # Second input: Conditioning labels [minibatch, label_size].
    truncation_psi=0.7,  # Style strength multiplier for the truncation trick. None = disable.
    truncation_cutoff=8,  # Number of layers for which to apply the truncation trick. None = disable.
    truncation_psi_val=None,  # Value for truncation_psi to use during validation.
    truncation_cutoff_val=None,  # Value for truncation_cutoff to use during validation.
    dlatent_avg_beta=0.995,  # Decay for tracking the moving average of W during training. None = disable.
    is_training=False,  # Network is under training? Enables and disables specific features.
    is_validation=False,  # Network is under validation? Chooses which value to use for truncation_psi.
    components=dnnlib.EasyDict(
    ),  # Container for sub-networks. Retained between calls.
    **kwargs):  # Arguments for sub-networks (G_mapping and G_synthesis).

    # Validate arguments.
    assert not is_training or not is_validation
    assert isinstance(components, dnnlib.EasyDict)
    if is_validation:
        truncation_psi = truncation_psi_val
        truncation_cutoff = truncation_cutoff_val
    if is_training or (truncation_psi is not None
                       and not tflib.is_tf_expression(truncation_psi)
                       and truncation_psi == 1):
        truncation_psi = None
    if is_training or (truncation_cutoff is not None
                       and not tflib.is_tf_expression(truncation_cutoff)
                       and truncation_cutoff <= 0):
        truncation_cutoff = None
    if not is_training or (dlatent_avg_beta is not None
                           and not tflib.is_tf_expression(dlatent_avg_beta)
                           and dlatent_avg_beta == 1):
        dlatent_avg_beta = None

    # Setup components.
    if "synthesis" not in components:
        components.synthesis = tflib.Network(num_inputs=1,
                                             name="G_synthesis",
                                             func_name=G_synthesis,
                                             **kwargs)
    num_layers = components.synthesis.input_shape[1]
    dlatent_size = components.synthesis.input_shape[2]
    if "mapping" not in components:
        components.mapping = tflib.Network(num_inputs=2,
                                           name="G_mapping",
                                           func_name=G_mapping,
                                           dlatent_broadcast=num_layers,
                                           **kwargs)

    # Setup variables.
    dlatent_avg = tf.get_variable(
        "dlatent_avg",
        shape=[dlatent_size],
        initializer=tf.initializers.zeros(),
        trainable=False,
    )

    # Evaluate mapping network.
    dlatents = components.mapping.get_output_for(latents_in, labels_in,
                                                 **kwargs)

    # Update moving average of W.
    if dlatent_avg_beta is not None:
        with tf.variable_scope("DlatentAvg"):
            batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
            update_op = tf.assign(
                dlatent_avg,
                tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            with tf.control_dependencies([update_op]):
                dlatents = tf.identity(dlatents)

    # Apply truncation trick.
    if truncation_psi is not None and truncation_cutoff is not None:
        with tf.variable_scope("Truncation"):
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            ones = np.ones(layer_idx.shape, dtype=np.float32)
            coefs = tf.where(layer_idx < truncation_cutoff,
                             truncation_psi * ones, ones)
            dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)

    # Evaluate synthesis network.
    images_out = components.synthesis.get_output_for(dlatents, **kwargs)
    return images_out
 def grow(x, res, lod):
     y = block(res, x)
     img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod)
     img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod))
     if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
     return img()
    def _evaluate(self, Gs, num_gpus):
        minibatch_size = num_gpus * self.minibatch_per_gpu

        # Construct TensorFlow graph.
        distance_expr = []
        for gpu_idx in range(num_gpus):
            with tf.device('/gpu:%d' % gpu_idx):
                Gs_clone = Gs.clone()
                try:  # StyleGAN
                    noise_vars = [
                        var for name, var in
                        Gs_clone.components.synthesis.vars.items()
                        if name.startswith('noise')
                    ]
                except AttributeError:  # ProGAN
                    noise_vars = []

                # Generate random latents and interpolation t-values.
                lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] +
                                           Gs_clone.input_shape[1:])
                lerp_t = tf.random_uniform(
                    [self.minibatch_per_gpu], 0.0,
                    1.0 if self.sampling == 'full' else 0.0)

                # Interpolate in W or Z.
                if self.space == 'w':
                    dlat_t01 = Gs_clone.components.mapping.get_output_for(
                        lat_t01, None, is_validation=True)
                    dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
                    dlat_e0 = tflib.lerp(dlat_t0, dlat_t1,
                                         lerp_t[:, np.newaxis, np.newaxis])
                    dlat_e1 = tflib.lerp(
                        dlat_t0, dlat_t1,
                        lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
                    dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1),
                                          dlat_t01.shape)
                else:  # space == 'z'
                    lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
                    lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
                    lat_e1 = slerp(lat_t0, lat_t1,
                                   lerp_t[:, np.newaxis] + self.epsilon)
                    lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1),
                                         lat_t01.shape)
                    try:  # StyleGAN:
                        dlat_e01 = Gs_clone.components.mapping.get_output_for(
                            lat_e01, None, is_validation=True)
                    except AttributeError:  # ProGAN
                        dlat_e01 = lat_e01

                # Synthesize images.
                with tf.control_dependencies([
                        var.initializer for var in noise_vars
                ]):  # use same noise inputs for the entire minibatch
                    try:  # StyleGAN
                        images = Gs_clone.components.synthesis.get_output_for(
                            dlat_e01,
                            is_validation=True,
                            randomize_noise=False)
                    except AttributeError:  # ProGAN
                        images = Gs_clone.get_output_for(dlat_e01,
                                                         None,
                                                         is_validation=True,
                                                         randomize_noise=False)

                # Upscale images to 256x256 for VGG
                if images.shape[2] < 256:
                    images = tf.transpose(
                        images, [0, 2, 3, 1])  # (B, C, H, W) --> (B, H, W, C)
                    images = tf.compat.v1.image.resize(images, (256, 256))
                    images = tf.transpose(
                        images,
                        [0, 3, 1, 2])  # (B, 256, 256, C) --> (B, C, 256, 256)

                # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
                elif images.shape[2] > 256:
                    factor = images.shape[2] // 256
                    images = tf.reshape(images, [
                        -1, images.shape[1], images.shape[2] // factor, factor,
                        images.shape[3] // factor, factor
                    ])
                    images = tf.reduce_mean(images, axis=[3, 5])

                # Scale dynamic range from [-1,1] to [0,255] for VGG.
                images = (images + 1) * (255 / 2)

                # Evaluate perceptual distance.
                img_e0, img_e1 = images[0::2], images[1::2]
                distance_measure = misc.load_pkl(
                    'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2'
                )  # vgg16_zhang_perceptual.pkl
                distance_expr.append(
                    distance_measure.get_output_for(img_e0, img_e1) *
                    (1 / self.epsilon**2))

        # Sampling loop.
        all_distances = []
        for _ in range(0, self.num_samples, minibatch_size):
            all_distances += tflib.run(distance_expr)
        all_distances = np.concatenate(all_distances, axis=0)

        # Reject outliers.
        lo = np.percentile(all_distances, 1, interpolation='lower')
        hi = np.percentile(all_distances, 99, interpolation='higher')
        filtered_distances = np.extract(
            np.logical_and(lo <= all_distances, all_distances <= hi),
            all_distances)
        self._report_result(np.mean(filtered_distances))
Exemplo n.º 28
0
def G_style(
    latents_in,  # First input: Latent vectors (Z) [minibatch, latent_size].
    labels_in,  # Second input: Conditioning labels [minibatch, label_size].
    truncation_psi=0.7,  # Style strength multiplier for the truncation trick. None = disable.
    truncation_cutoff=8,  # Number of layers for which to apply the truncation trick. None = disable.
    truncation_psi_val=None,  # Value for truncation_psi to use during validation.
    truncation_cutoff_val=None,  # Value for truncation_cutoff to use during validation.
    dlatent_avg_beta=0.995,  # Decay for tracking the moving average of W during training. None = disable.
    style_mixing_prob=0.9,  # Probability of mixing styles during training. None = disable.
    is_training=False,  # Network is under training? Enables and disables specific features.
    is_validation=False,  # Network is under validation? Chooses which value to use for truncation_psi.
    is_template_graph=False,  # True = template graph constructed by the Network class, False = actual evaluation.
    components=dnnlib.EasyDict(
    ),  # Container for sub-networks. Retained between calls.
    **kwargs):  # Arguments for sub-networks (G_mapping and G_synthesis).

    # Validate arguments.
    assert not is_training or not is_validation
    assert isinstance(components, dnnlib.EasyDict)
    if is_validation:
        truncation_psi = truncation_psi_val
        truncation_cutoff = truncation_cutoff_val
    if is_training or (truncation_psi is not None
                       and not tflib.is_tf_expression(truncation_psi)
                       and truncation_psi == 1):
        truncation_psi = None
    if is_training or (truncation_cutoff is not None
                       and not tflib.is_tf_expression(truncation_cutoff)
                       and truncation_cutoff <= 0):
        truncation_cutoff = None
    if not is_training or (dlatent_avg_beta is not None
                           and not tflib.is_tf_expression(dlatent_avg_beta)
                           and dlatent_avg_beta == 1):
        dlatent_avg_beta = None
    if not is_training or (style_mixing_prob is not None
                           and not tflib.is_tf_expression(style_mixing_prob)
                           and style_mixing_prob <= 0):
        style_mixing_prob = None

    # Setup components.
    if 'synthesis' not in components:
        components.synthesis = tflib.Network('G_synthesis',
                                             func_name=G_synthesis,
                                             **kwargs)
    num_layers = components.synthesis.input_shape[1]
    dlatent_size = components.synthesis.input_shape[2]
    if 'mapping' not in components:
        components.mapping = tflib.Network('G_mapping',
                                           func_name=G_mapping,
                                           dlatent_broadcast=num_layers,
                                           **kwargs)

    # Setup variables.
    lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
    dlatent_avg = tf.get_variable('dlatent_avg',
                                  shape=[dlatent_size],
                                  initializer=tf.initializers.zeros(),
                                  trainable=False)

    # Evaluate mapping network.
    dlatents = components.mapping.get_output_for(latents_in, labels_in,
                                                 **kwargs)

    # Update moving average of W.
    if dlatent_avg_beta is not None:
        with tf.variable_scope('DlatentAvg'):
            batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
            update_op = tf.assign(
                dlatent_avg,
                tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            with tf.control_dependencies([update_op]):
                dlatents = tf.identity(dlatents)

    # Perform style mixing regularization.
    if style_mixing_prob is not None:
        with tf.name_scope('StyleMix'):
            latents2 = tf.random_normal(tf.shape(latents_in))
            dlatents2 = components.mapping.get_output_for(
                latents2, labels_in, **kwargs)
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
            mixing_cutoff = tf.cond(
                tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
                lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
                lambda: cur_layers)
            dlatents = tf.where(
                tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)),
                dlatents, dlatents2)

    # Apply truncation trick.
    if truncation_psi is not None and truncation_cutoff is not None:
        with tf.variable_scope('Truncation'):
            layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
            ones = np.ones(layer_idx.shape, dtype=np.float32)
            coefs = tf.where(layer_idx < truncation_cutoff,
                             truncation_psi * ones, ones)
            dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)

    # Evaluate synthesis network.
    with tf.control_dependencies(
        [tf.assign(components.synthesis.find_var('lod'), lod_in)]):
        images_out = components.synthesis.get_output_for(
            dlatents, force_clean_graph=is_template_graph, **kwargs)
    return tf.identity(images_out, name='images_out')
def G_logistic_ns_pathreg_interpolate(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01,
                                      pl_weight=2.0):
    _ = opt
    latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
    labels = training_set.get_random_labels_tf(minibatch_size)

    latent_interpolate = tf.random_normal([1] + G.input_shapes[0][1:])
    label_interpolate = training_set.get_random_labels_tf(1)

    # fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True)

    # 1. get dlatents for the batch by running G.mapping_latent (Shape: [4, 16, 512])
    dlatents = G.components.mapping_latent.get_output_for(latents, is_training=True)
    dlatent_interpolate = G.components.mapping_latent.get_output_for(latent_interpolate, is_training=True)

    # 2. define an interpolation magnitude
    # interpolation_mag = 0.5
    interpolation_mag = tf.clip_by_value(tf.random.normal([1], 0.5, 0.15), 0, 1)

    # 3. interpolate between interpolate between dlatents[0] and dlatents[1]  (Shape: [1, 16, 512])
    dlatent_interpolate = tf.expand_dims(tflib.lerp(dlatents[0], dlatent_interpolate, interpolation_mag), axis=0)

    # 4. replace dlatents[0] with interpolated dlatent (Shape: [4, 16, 512])
    dlatents_replaced = tf.concat([dlatent_interpolate, dlatents[1:]], axis=0)

    # 5. mix the labels (Shape: [1, 127])
    # wx1 = tf.where(labels[0] > labels[1], labels[0] * interpolation_mag, labels[0])
    # wx2 = tf.where(labels[0] < labels[1], labels[1] * (1 - interpolation_mag), labels[1])
    # mixed_label = tf.expand_dims(tf.clip_by_value(wx1 + wx2, 0, 1), axis=0)
    mixed_label = labels[0] * interpolation_mag + label_interpolate[0] * (1 - interpolation_mag)

    # 6. replace labels[0] with the new mixed label
    labels = tf.concat([mixed_label, labels[1:]], axis=0)

    # 7. run G.mapping_label
    dlabel = G.components.mapping_label.get_output_for(labels, is_training=True)

    # 8. add the mapped vectors
    fake_dlatents_out = dlatents_replaced + dlabel

    # 9. run G.synthesis with the new dlatents and generate the fake images
    fake_images_out = G.components.synthesis.get_output_for(fake_dlatents_out, is_training=True)

    fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
    loss = tf.nn.softplus(-fake_scores_out)  # -log(sigmoid(fake_scores_out))

    # Path length regularization.
    with tf.name_scope('PathReg'):
        # Evaluate the regularization term using a smaller minibatch to conserve memory.
        if pl_minibatch_shrink > 1:
            pl_minibatch = minibatch_size // pl_minibatch_shrink
            pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:])
            pl_labels = training_set.get_random_labels_tf(pl_minibatch)
            fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True,
                                                                  return_dlatents=True)

        # Compute |J*y|.
        pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:]))
        pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0]
        pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
        pl_lengths = autosummary('Loss/pl_lengths', pl_lengths)

        # Track exponential moving average of |J*y|.
        with tf.control_dependencies(None):
            pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32)
        pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var)
        pl_update = tf.assign(pl_mean_var, pl_mean)

        # Calculate (|J*y|-a)^2.
        with tf.control_dependencies([pl_update]):
            pl_penalty = tf.square(pl_lengths - pl_mean)
            pl_penalty = autosummary('Loss/pl_penalty', pl_penalty)

        # Apply weight.
        #
        # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
        # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
        #
        # gamma_pl = pl_weight / num_pixels / num_affine_layers
        # = 2 / (r^2) / (log2(r) * 2 - 2)
        # = 1 / (r^2 * (log2(r) - 1))
        # = ln(2) / (r^2 * (ln(r) - ln(2))
        #
        reg = pl_penalty * pl_weight

    return loss, reg