Пример #1
0
def truncate_fancy(dlat,
                   dlat_avg,
                   model_scale=18,
                   truncation_psi=0.7,
                   minlayer=0,
                   maxlayer=8,
                   do_clip=False):
    layer_idx = np.arange(model_scale)[np.newaxis, :, np.newaxis]
    ones = np.ones(layer_idx.shape, dtype=np.float32)
    coefs = np.where(layer_idx < maxlayer, truncation_psi * ones, ones)
    if minlayer > 0:
        coefs[0, :minlayer, :] = ones[0, :minlayer, :]
    if do_clip:
        return tflib.lerp_clip(dlat_avg, dlat, coefs).eval()
    else:
        return tflib.lerp(dlat_avg, dlat, coefs)
Пример #2
0
def D_basic(
    images_in,  # First input: Images [minibatch, channel, height, width].
    labels_in,  # Second input: Labels [minibatch, label_size].
    num_channels=1,  # Number of input color channels. Overridden based on dataset.
    resolution=32,  # Input resolution. Overridden based on dataset.
    label_size=0,  # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
    fmap_base=8192,  # Overall multiplier for the number of feature maps.
    fmap_decay=1.0,  # log2 feature map reduction when doubling the resolution.
    fmap_max=512,  # Maximum number of feature maps in any layer.
    nonlinearity='lrelu',  # Activation function: 'relu', 'lrelu',
    use_wscale=True,  # Enable equalized learning rate?
    mbstd_group_size=4,  # Group size for the minibatch standard deviation layer, 0 = disable.
    mbstd_num_features=1,  # Number of features for the minibatch standard deviation layer.
    dtype='float32',  # Data type to use for activations and outputs.
    fused_scale='auto',  # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
    blur_filter=[
        1, 2, 1
    ],  # Low-pass filter to apply when resampling activations. None = no filtering.
    structure='auto',  # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
    is_template_graph=False,  # True = template graph constructed by the Network class, False = actual evaluation.
    **_kwargs):  # Ignore unrecognized keyword args.

    resolution_log2 = int(np.log2(resolution))
    assert resolution == 2**resolution_log2 and resolution >= 4

    def nf(stage):
        return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max)

    def blur(x):
        return blur2d(x, blur_filter) if blur_filter else x

    if structure == 'auto':
        structure = 'linear' if is_template_graph else 'recursive'
    act, gain = {
        'relu': (tf.nn.relu, np.sqrt(2)),
        'lrelu': (leaky_relu, np.sqrt(2))
    }[nonlinearity]

    images_in.set_shape([None, num_channels, resolution, resolution])
    labels_in.set_shape([None, label_size])
    images_in = tf.cast(images_in, dtype)
    labels_in = tf.cast(labels_in, dtype)
    lod_in = tf.cast(
        tf.get_variable('lod', initializer=np.float32(0.0), trainable=False),
        dtype)
    scores_out = None

    # Building blocks.
    def fromrgb(x, res):  # res = 2..resolution_log2
        with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
            return act(
                apply_bias(
                    conv2d(x,
                           fmaps=nf(res - 1),
                           kernel=1,
                           gain=gain,
                           use_wscale=use_wscale)))

    def block(x, res):  # res = 2..resolution_log2
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            if res >= 3:  # 8x8 and up
                with tf.variable_scope('Conv0'):
                    x = act(
                        apply_bias(
                            conv2d(x,
                                   fmaps=nf(res - 1),
                                   kernel=3,
                                   gain=gain,
                                   use_wscale=use_wscale)))
                with tf.variable_scope('Conv1_down'):
                    x = act(
                        apply_bias(
                            conv2d_downscale2d(blur(x),
                                               fmaps=nf(res - 2),
                                               kernel=3,
                                               gain=gain,
                                               use_wscale=use_wscale,
                                               fused_scale=fused_scale)))
            else:  # 4x4
                if mbstd_group_size > 1:
                    x = minibatch_stddev_layer(x, mbstd_group_size,
                                               mbstd_num_features)
                with tf.variable_scope('Conv'):
                    x = act(
                        apply_bias(
                            conv2d(x,
                                   fmaps=nf(res - 1),
                                   kernel=3,
                                   gain=gain,
                                   use_wscale=use_wscale)))
                with tf.variable_scope('Dense0'):
                    x = act(
                        apply_bias(
                            dense(x,
                                  fmaps=nf(res - 2),
                                  gain=gain,
                                  use_wscale=use_wscale)))
                with tf.variable_scope('Dense1'):
                    x = apply_bias(
                        dense(x,
                              fmaps=max(label_size, 1),
                              gain=1,
                              use_wscale=use_wscale))
            return x

    # Fixed structure: simple and efficient, but does not support progressive growing.
    if structure == 'fixed':
        x = fromrgb(images_in, resolution_log2)
        for res in range(resolution_log2, 2, -1):
            x = block(x, res)
        scores_out = block(x, 2)

    # Linear structure: simple but inefficient.
    if structure == 'linear':
        img = images_in
        x = fromrgb(img, resolution_log2)
        for res in range(resolution_log2, 2, -1):
            lod = resolution_log2 - res
            x = block(x, res)
            img = downscale2d(img)
            y = fromrgb(img, res - 1)
            with tf.variable_scope('Grow_lod%d' % lod):
                x = tflib.lerp_clip(x, y, lod_in - lod)
        scores_out = block(x, 2)

    # Recursive structure: complex but efficient.
    if structure == 'recursive':

        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)

        def grow(res, lod):
            x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
            if lod > 0:
                x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
            x = block(x(), res)
            y = lambda: x
            if res > 2:
                y = cset(
                    y, (lod_in > lod), lambda: tflib.lerp(
                        x,
                        fromrgb(downscale2d(images_in, 2**(lod + 1)), res - 1),
                        lod_in - lod))
            return y()

        scores_out = grow(2, resolution_log2 - 2)

    # Label conditioning from "Which Training Methods for GANs do actually Converge?"
    if label_size:
        with tf.variable_scope('LabelSwitch'):
            scores_out = tf.reduce_sum(scores_out * labels_in,
                                       axis=1,
                                       keepdims=True)

    assert scores_out.dtype == tf.as_dtype(dtype)
    scores_out = tf.identity(scores_out, name='scores_out')
    return scores_out
Пример #3
0
def G_synthesis(
    dlatents_in,  # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
    dlatent_size=512,  # Disentangled latent (W) dimensionality.
    num_channels=3,  # Number of output color channels.
    resolution=1024,  # Output resolution.
    fmap_base=8192,  # Overall multiplier for the number of feature maps.
    fmap_decay=1.0,  # log2 feature map reduction when doubling the resolution.
    fmap_max=512,  # Maximum number of feature maps in any layer.
    use_styles=True,  # Enable style inputs?
    const_input_layer=True,  # First layer is a learned constant?
    use_noise=True,  # Enable noise inputs?
    randomize_noise=True,  # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
    nonlinearity='lrelu',  # Activation function: 'relu', 'lrelu'
    use_wscale=True,  # Enable equalized learning rate?
    use_pixel_norm=False,  # Enable pixelwise feature vector normalization?
    use_instance_norm=True,  # Enable instance normalization?
    dtype='float32',  # Data type to use for activations and outputs.
    fused_scale='auto',  # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
    blur_filter=[
        1, 2, 1
    ],  # Low-pass filter to apply when resampling activations. None = no filtering.
    structure='auto',  # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
    is_template_graph=False,  # True = template graph constructed by the Network class, False = actual evaluation.
    force_clean_graph=False,  # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
    **_kwargs):  # Ignore unrecognized keyword args.

    resolution_log2 = int(np.log2(resolution))
    assert resolution == 2**resolution_log2 and resolution >= 4

    def nf(stage):
        return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max)

    def blur(x):
        return blur2d(x, blur_filter) if blur_filter else x

    if is_template_graph: force_clean_graph = True
    if force_clean_graph: randomize_noise = False
    if structure == 'auto':
        structure = 'linear' if force_clean_graph else 'recursive'
    act, gain = {
        'relu': (tf.nn.relu, np.sqrt(2)),
        'lrelu': (leaky_relu, np.sqrt(2))
    }[nonlinearity]
    num_layers = resolution_log2 * 2 - 2
    num_styles = num_layers if use_styles else 1
    images_out = None

    # Primary inputs.
    dlatents_in.set_shape([None, num_styles, dlatent_size])
    dlatents_in = tf.cast(dlatents_in, dtype)
    lod_in = tf.cast(
        tf.get_variable('lod', initializer=np.float32(0), trainable=False),
        dtype)

    # Noise inputs.
    noise_inputs = []
    if use_noise:
        for layer_idx in range(num_layers):
            res = layer_idx // 2 + 2
            shape = [1, use_noise, 2**res, 2**res]
            noise_inputs.append(
                tf.get_variable('noise%d' % layer_idx,
                                shape=shape,
                                initializer=tf.initializers.random_normal(),
                                trainable=False))

    # Things to do at the end of each layer.
    def layer_epilogue(x, layer_idx):
        if use_noise:
            x = apply_noise(x,
                            noise_inputs[layer_idx],
                            randomize_noise=randomize_noise)
        x = apply_bias(x)
        x = act(x)
        if use_pixel_norm:
            x = pixel_norm(x)
        if use_instance_norm:
            x = instance_norm(x)
        if use_styles:
            x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale)
        return x

    # Early layers.
    with tf.variable_scope('4x4'):
        if const_input_layer:
            with tf.variable_scope('Const'):
                x = tf.get_variable('const',
                                    shape=[1, nf(1), 4, 4],
                                    initializer=tf.initializers.ones())
                x = layer_epilogue(
                    tf.tile(tf.cast(x, dtype),
                            [tf.shape(dlatents_in)[0], 1, 1, 1]), 0)
        else:
            with tf.variable_scope('Dense'):
                x = dense(
                    dlatents_in[:, 0],
                    fmaps=nf(1) * 16,
                    gain=gain / 4,
                    use_wscale=use_wscale
                )  # tweak gain to match the official implementation of Progressing GAN
                x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0)
        with tf.variable_scope('Conv'):
            x = layer_epilogue(
                conv2d(x,
                       fmaps=nf(1),
                       kernel=3,
                       gain=gain,
                       use_wscale=use_wscale), 1)

    # Building blocks for remaining layers.
    def block(res, x):  # res = 3..resolution_log2
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            with tf.variable_scope('Conv0_up'):
                x = layer_epilogue(
                    blur(
                        upscale2d_conv2d(x,
                                         fmaps=nf(res - 1),
                                         kernel=3,
                                         gain=gain,
                                         use_wscale=use_wscale,
                                         fused_scale=fused_scale)),
                    res * 2 - 4)
            with tf.variable_scope('Conv1'):
                x = layer_epilogue(
                    conv2d(x,
                           fmaps=nf(res - 1),
                           kernel=3,
                           gain=gain,
                           use_wscale=use_wscale), res * 2 - 3)
            return x

    def torgb(res, x):  # res = 2..resolution_log2
        lod = resolution_log2 - res
        with tf.variable_scope('ToRGB_lod%d' % lod):
            return apply_bias(
                conv2d(x,
                       fmaps=num_channels,
                       kernel=1,
                       gain=1,
                       use_wscale=use_wscale))

    # Fixed structure: simple and efficient, but does not support progressive growing.
    if structure == 'fixed':
        for res in range(3, resolution_log2 + 1):
            x = block(res, x)
        images_out = torgb(resolution_log2, x)

    # Linear structure: simple but inefficient.
    if structure == 'linear':
        images_out = torgb(2, x)
        for res in range(3, resolution_log2 + 1):
            lod = resolution_log2 - res
            x = block(res, x)
            img = torgb(res, x)
            images_out = upscale2d(images_out)
            with tf.variable_scope('Grow_lod%d' % lod):
                images_out = tflib.lerp_clip(img, images_out, lod_in - lod)

    # Recursive structure: complex but efficient.
    if structure == 'recursive':

        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)

        def grow(x, res, lod):
            y = block(res, x)
            img = lambda: upscale2d(torgb(res, y), 2**lod)
            img = cset(
                img, (lod_in > lod), lambda: upscale2d(
                    tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)),
                               lod_in - lod), 2**lod))
            if lod > 0:
                img = cset(img, (lod_in < lod),
                           lambda: grow(y, res + 1, lod - 1))
            return img()

        images_out = grow(x, 3, resolution_log2 - 3)

    assert images_out.dtype == tf.as_dtype(dtype)
    return tf.identity(images_out, name='images_out')
def D_stylegan(
    images_in,                          # First input: Images [minibatch, channel, height, width].
    labels_in,                          # Second input: Labels [minibatch, label_size].
    num_channels        = 3,            # Number of input color channels. Overridden based on dataset.
    resolution          = 1024,         # Input resolution. Overridden based on dataset.
    label_size          = 0,            # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
    fmap_base           = 16 << 10,     # Overall multiplier for the number of feature maps.
    fmap_decay          = 1.0,          # log2 feature map reduction when doubling the resolution.
    fmap_min            = 1,            # Minimum number of feature maps in any layer.
    fmap_max            = 512,          # Maximum number of feature maps in any layer.
    nonlinearity        = 'lrelu',      # Activation function: 'relu', 'lrelu', etc.
    mbstd_group_size    = 4,            # Group size for the minibatch standard deviation layer, 0 = disable.
    mbstd_num_features  = 1,            # Number of features for the minibatch standard deviation layer.
    dtype               = 'float32',    # Data type to use for activations and outputs.
    resample_kernel     = [1,3,3,1],    # Low-pass filter to apply when resampling activations. None = no filtering.
    structure           = 'auto',       # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
    is_template_graph   = False,        # True = template graph constructed by the Network class, False = actual evaluation.
    **_kwargs):                         # Ignore unrecognized keyword args.

    resolution_log2 = int(np.log2(resolution))
    assert resolution == 2**resolution_log2 and resolution >= 4
    def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
    if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'
    act = nonlinearity

    images_in.set_shape([None, num_channels, resolution, resolution])
    labels_in.set_shape([None, label_size])
    images_in = tf.cast(images_in, dtype)
    labels_in = tf.cast(labels_in, dtype)
    lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)

    # Building blocks for spatial layers.
    def fromrgb(x, res): # res = 2..resolution_log2
        with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
            return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act)
    def block(x, res): # res = 2..resolution_log2
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            with tf.variable_scope('Conv0'):
                x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act)
            with tf.variable_scope('Conv1_down'):
                x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act)
            return x

    # Fixed structure: simple and efficient, but does not support progressive growing.
    if structure == 'fixed':
        x = fromrgb(images_in, resolution_log2)
        for res in range(resolution_log2, 2, -1):
            x = block(x, res)

    # Linear structure: simple but inefficient.
    if structure == 'linear':
        img = images_in
        x = fromrgb(img, resolution_log2)
        for res in range(resolution_log2, 2, -1):
            lod = resolution_log2 - res
            x = block(x, res)
            with tf.variable_scope('Downsample_lod%d' % lod):
                img = downsample_2d(img)
            y = fromrgb(img, res - 1)
            with tf.variable_scope('Grow_lod%d' % lod):
                x = tflib.lerp_clip(x, y, lod_in - lod)

    # Recursive structure: complex but efficient.
    if structure == 'recursive':
        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
        def grow(res, lod):
            x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res)
            if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
            x = block(x(), res); y = lambda: x
            y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod))
            return y()
        x = grow(3, resolution_log2 - 3)

    # Final layers at 4x4 resolution.
    with tf.variable_scope('4x4'):
        if mbstd_group_size > 1:
            with tf.variable_scope('MinibatchStddev'):
                x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
        with tf.variable_scope('Conv'):
            x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
        with tf.variable_scope('Dense0'):
            x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act)

    # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?"
    with tf.variable_scope('Output'):
        x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1)))
        if labels_in.shape[1] > 0:
            x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True)
    scores_out = x

    # Output.
    assert scores_out.dtype == tf.as_dtype(dtype)
    scores_out = tf.identity(scores_out, name='scores_out')
    return scores_out
def G_synthesis_stylegan_revised(
    dlatents_in,                        # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
    dlatent_size        = 512,          # Disentangled latent (W) dimensionality.
    num_channels        = 3,            # Number of output color channels.
    resolution          = 1024,         # Output resolution.
    fmap_base           = 16 << 10,     # Overall multiplier for the number of feature maps.
    fmap_decay          = 1.0,          # log2 feature map reduction when doubling the resolution.
    fmap_min            = 1,            # Minimum number of feature maps in any layer.
    fmap_max            = 512,          # Maximum number of feature maps in any layer.
    randomize_noise     = True,         # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
    nonlinearity        = 'lrelu',      # Activation function: 'relu', 'lrelu', etc.
    dtype               = 'float32',    # Data type to use for activations and outputs.
    resample_kernel     = [1,3,3,1],    # Low-pass filter to apply when resampling activations. None = no filtering.
    fused_modconv       = True,         # Implement modulated_conv2d_layer() as a single fused op?
    structure           = 'auto',       # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
    is_template_graph   = False,        # True = template graph constructed by the Network class, False = actual evaluation.
    force_clean_graph   = False,        # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
    **_kwargs):                         # Ignore unrecognized keyword args.

    resolution_log2 = int(np.log2(resolution))
    assert resolution == 2**resolution_log2 and resolution >= 4
    def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
    if is_template_graph: force_clean_graph = True
    if force_clean_graph: randomize_noise = False
    if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'
    act = nonlinearity
    num_layers = resolution_log2 * 2 - 2
    images_out = None

    # Primary inputs.
    dlatents_in.set_shape([None, num_layers, dlatent_size])
    dlatents_in = tf.cast(dlatents_in, dtype)
    lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)

    # Noise inputs.
    noise_inputs = []
    for layer_idx in range(num_layers - 1):
        res = (layer_idx + 5) // 2
        shape = [1, 1, 2**res, 2**res]
        noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))

    # Single convolution layer with all the bells and whistles.
    def layer(x, layer_idx, fmaps, kernel, up=False):
        x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv)
        if randomize_noise:
            noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
        else:
            noise = tf.cast(noise_inputs[layer_idx], x.dtype)
        noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros())
        x += noise * tf.cast(noise_strength, x.dtype)
        return apply_bias_act(x, act=act)

    # Early layers.
    with tf.variable_scope('4x4'):
        with tf.variable_scope('Const'):
            x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal())
            x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1])
        with tf.variable_scope('Conv'):
            x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)

    # Building blocks for remaining layers.
    def block(res, x): # res = 3..resolution_log2
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            with tf.variable_scope('Conv0_up'):
                x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True)
            with tf.variable_scope('Conv1'):
                x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3)
            return x
    def torgb(res, x): # res = 2..resolution_log2
        with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)):
            return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv))

    # Fixed structure: simple and efficient, but does not support progressive growing.
    if structure == 'fixed':
        for res in range(3, resolution_log2 + 1):
            x = block(res, x)
        images_out = torgb(resolution_log2, x)

    # Linear structure: simple but inefficient.
    if structure == 'linear':
        images_out = torgb(2, x)
        for res in range(3, resolution_log2 + 1):
            lod = resolution_log2 - res
            x = block(res, x)
            img = torgb(res, x)
            with tf.variable_scope('Upsample_lod%d' % lod):
                images_out = upsample_2d(images_out)
            with tf.variable_scope('Grow_lod%d' % lod):
                images_out = tflib.lerp_clip(img, images_out, lod_in - lod)

    # Recursive structure: complex but efficient.
    if structure == 'recursive':
        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
        def grow(x, res, lod):
            y = block(res, x)
            img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod)
            img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod))
            if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
            return img()
        images_out = grow(x, 3, resolution_log2 - 3)

    assert images_out.dtype == tf.as_dtype(dtype)
    return tf.identity(images_out, name='images_out')
def D_basic(
    images_in,                          # 第一个输入:图片 [minibatch, channel, height, width].
    labels_in,                          # 第二个输入:标签 [minibatch, label_size].
    num_channels        = 1,            # 输入颜色通道数。 根据数据集覆盖。
    resolution          = 32,           # 输入分辨率。 根据数据集覆盖。
    label_size          = 0,            # 标签的维数,0表示没有标签。根据数据集覆盖。
    fmap_base           = 8192,         # 特征图的总数目,这儿取8192因为512*(18-2)=8192。
    fmap_decay          = 1.0,          # 当分辨率翻倍时以log2降低特征图,这儿指示降低的速率。
    fmap_max            = 512,          # 在任何层中特征图的最大数量。
    nonlinearity        = 'lrelu',      # 激活函数: 'relu', 'lrelu'。
    use_wscale          = True,         # 启用均等的学习率?
    mbstd_group_size    = 4,            # 小批量标准偏差层的组大小,0表示禁用。
    mbstd_num_features  = 1,            # 小批量标准偏差层的特征数量。
    dtype               = 'float32',    # 用于激活和输出的数据类型。
    fused_scale         = 'auto',       # True = 融合卷积+缩放,False = 单独操作,'auto'= 自动决定。
    blur_filter         = [1,2,1],      # 重采样激活时应用的低通卷积核(Low-pass filter)。None表示不过滤。
    structure           = 'auto',       # 'fixed' = 无渐进式增长,'linear' = 人类可读,'recursive' = 有效,'auto' = 自动选择。
    is_template_graph   = False,        # True表示由Network类构造的模板图,False表示实际评估。
    **_kwargs):                         # 忽略无法识别的关键字参数。

    resolution_log2 = int(np.log2(resolution))  # 计算分辨率是2的多少次方
    assert resolution == 2**resolution_log2 and resolution >= 4  # 分辨率需要大于等于32,因为训练从学习生成32*32的图片开始
    def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)  # nf()返回在第stage层中特征图的数量————当stage<=4时,特征图数量为512;当stage>4时,每多一层特征图数量就减半。
    def blur(x): return blur2d(x, blur_filter) if blur_filter else x  # 对图片进行滤波模糊操作,有利于降噪
    if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'  # 依据is_template_graph选择架构为'linear'或'recursive'
    act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]  # 激活函数
    # 输入处理
    images_in.set_shape([None, num_channels, resolution, resolution])
    labels_in.set_shape([None, label_size])
    images_in = tf.cast(images_in, dtype)
    labels_in = tf.cast(labels_in, dtype)
    lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)  # 输入的分辨率级别, lod = resolution_log2 - res
    scores_out = None  # 输出分数

    # 构建block块。
    def fromrgb(x, res):  # res从2增加到resolution_log2;这个函数实现RGB图像到特征图的转换。
        with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
            return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale)))  
            # 简单卷积实现,并应用激活函数
    def block(x, res):  # res从2增加到resolution_log2;这些层被写在函数里方便网络需要时再创建。
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            if res >= 3:  # 8x8分辨率及以上
                with tf.variable_scope('Conv0'):
                    x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))  # 构建一个卷积层
                with tf.variable_scope('Conv1_down'):
                    x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)))  # 构建一个下采样层
            else:  # 4x4分辨率,得到判别分数scores_out
                if mbstd_group_size > 1:
                    x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)  # 构建一个小偏量标准偏差层
                with tf.variable_scope('Conv'):
                    x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))  # 卷积
                with tf.variable_scope('Dense0'):
                    x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale)))  # 全连接
                with tf.variable_scope('Dense1'):
                    x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale))  # 全连接
            return x

    # 固定结构:简单高效,但不支持渐进式增长。
    if structure == 'fixed':
        x = fromrgb(images_in, resolution_log2)  # 将输入图片转换为特征x
        for res in range(resolution_log2, 2, -1):
            x = block(x, res)  # 相当于直接构建了一个从1024*1024分辨率降到4*4分辨率的下采样网络
        scores_out = block(x, 2)  # 输出为判别分数

    # 线性结构:简单但效率低下。
    if structure == 'linear':
        img = images_in
        x = fromrgb(img, resolution_log2)  # 将输入图片转换为特征x
        for res in range(resolution_log2, 2, -1):  # res从resolution_log2降低到3
            lod = resolution_log2 - res
            x = block(x, res)
            img = downscale2d(img)  # 通过downscale2d()构建下采样层,将当前分辨率缩小一倍
            y = fromrgb(img, res - 1)
            with tf.variable_scope('Grow_lod%d' % lod):
                x = tflib.lerp_clip(x, y, lod_in - lod)  # 依靠含大小值裁剪的线性插值实现图片缩小,相当于在过渡阶段实现平滑过渡
        scores_out = block(x, 2)

    # 递归结构:复杂但高效。
    if structure == 'recursive':  
    # 注意判别器在训练时是输入图片先进入lod最小的层,但是构建判别网络时是lod从大往小构建,所以递归的过程是与生成器相反的。
        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)  
            # 返回一个函数,依据是否满足new_cond决定返回new_lambda函数还是cur_lambda函数
        def grow(res, lod):
            x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)  # 先暂时将下采样函数赋给x
            if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))  
            # 非第一层时,如果输入层数lod_in小于当前层lod的话,表明可以进入到下一级分辨率上了,将grow()赋给x;否则x还是保留为下采样函数。
            x = block(x(), res); y = lambda: x  # x执行一次自身的函数,构建出一个block,并将结果赋给y(以函数的形式)
            if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod))  # 非最后一层时,如果输入层数lod_in大于当前层lod的话,表明需要进行插值操作,将lerp()赋给y;否则y还是保留为之前的操作。
            return y()
        scores_out = grow(2, resolution_log2 - 2)  # 构建判别网络时是lod从大往小构建,所以一开始的lod输入为8

    # 标签条件来自“哪种GAN训练方法实际上会收敛?”
    if label_size:
        with tf.variable_scope('LabelSwitch'):
            scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True)

    assert scores_out.dtype == tf.as_dtype(dtype)
    scores_out = tf.identity(scores_out, name='scores_out')
    return scores_out  # 输出

#----------------------------------------------------------------------------
def G_synthesis(
    dlatents_in,                        # 输入:解缠的中间向量 (W) [minibatch, num_layers, dlatent_size].
    dlatent_size        = 512,          # 解缠的中间向量 (W) 的维度。
    num_channels        = 3,            # 输出颜色通道数。
    resolution          = 1024,         # 输出分辨率。
    fmap_base           = 8192,         # 特征图的总数目,这儿取8192因为512*(18-2)=8192。   
    fmap_decay          = 1.0,          # 当分辨率翻倍时以log2降低特征图,这儿指示降低的速率。
    fmap_max            = 512,          # 在任何层中特征图的最大数量。
    use_styles          = True,         # 启用样式输入
    const_input_layer   = True,         # 第一层是常数?
    use_noise           = True,         # 启用噪音输入?
    randomize_noise     = True,         # True表示每次都随机化噪声输入(不确定),False表示从变量中读取噪声输入。
    nonlinearity        = 'lrelu',      # 激活函数: 'relu', 'lrelu'
    use_wscale          = True,         # 启用均等的学习率?
    use_pixel_norm      = False,        # 启用逐像素特征向量归一化?
    use_instance_norm   = True,         # 启用实例规一化?
    dtype               = 'float32',    # 用于激活和输出的数据类型。
    fused_scale         = 'auto',       # True = 融合卷积+缩放,False = 单独操作,'auto'= 自动决定。
    blur_filter         = [1,2,1],      # 重采样激活时应用的低通卷积核(Low-pass filter)。None表示不过滤。
    structure           = 'auto',       # 'fixed' = 无渐进式增长,'linear' = 人类可读,'recursive' = 有效,'auto' = 自动选择。
    is_template_graph   = False,        # True表示由Network类构造的模板图,False表示实际评估。
    force_clean_graph   = False,        # True表示构建一个在TensorBoard中看起来很漂亮的干净图形,False表示默认设置。
    **_kwargs):                         # 忽略无法识别的关键字参数。

    resolution_log2 = int(np.log2(resolution))  # 计算分辨率是2的多少次方
    assert resolution == 2**resolution_log2 and resolution >= 4   # 分辨率需要大于等于32,因为训练从学习生成32*32的图片开始
    def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)  
    # nf()返回在第stage层中特征图的数量————当stage<=4时,特征图数量为512;当stage>4时,每多一层特征图数量就减半。
    def blur(x): return blur2d(x, blur_filter) if blur_filter else x  # 对图片进行滤波模糊操作,有利于降噪
    if is_template_graph: force_clean_graph = True
    if force_clean_graph: randomize_noise = False
    if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'  
    # 依据force_clean_graph选择架构为'linear'或'recursive'
    act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]  # 激活函数
    num_layers = resolution_log2 * 2 - 2  # 因为每个分辨率有两层,所以层数为:分辨率级别(10)*2-2=18
    num_styles = num_layers if use_styles else 1  # 样式层数
    images_out = None

    # 主要输入。
    dlatents_in.set_shape([None, num_styles, dlatent_size])  # dlatents_in是通过广播得到的中间向量,维度是(?,18,512)
    dlatents_in = tf.cast(dlatents_in, dtype)
    lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)  # lod_in是一个指定当前输入分辨率级别的参数,规定lod = resolution_log2 - res

    # 创建噪音。
    noise_inputs = []
    if use_noise:
        for layer_idx in range(num_layers):
            res = layer_idx // 2 + 2  # [2,2,3,3,…,10,10]
            shape = [1, use_noise, 2**res, 2**res]  # 不同层的噪音shape从[1,1,4,4]一直到[1,1,1024,1024]
            noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))  # 随机初始化噪音

    # ★每一层最后需要做的事情。
    def layer_epilogue(x, layer_idx):
        if use_noise:
            x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise)  # 应用噪音
        x = apply_bias(x)  # 应用偏置
        x = act(x)  # 应用激活函数
        if use_pixel_norm:
            x = pixel_norm(x)  # 逐像素归一化
        if use_instance_norm:
            x = instance_norm(x)  # 实例归一化
        if use_styles:
            x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale)  # 样式调制,AdaIN
        return x

    # 早期的层。
    with tf.variable_scope('4x4'):
        if const_input_layer:  # 合成网络的起点是否为固定常数,StyleGAN中选用固定常数。
            with tf.variable_scope('Const'):
                x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones())  # 初始为常数变量,shape为(1,512,4,4)
                x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0)  # 第0层的层末调制
        else:
            with tf.variable_scope('Dense'):
                x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale)  # 调整增益值以匹配ProGAN的官方实现(ProGAN的初始起点不是常数,而就是latent)
                x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0)
        with tf.variable_scope('Conv'):
            x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1)  # 第1层为卷积层,添加层末调制

    # 为剩余层构建block块。
    def block(res, x):  # res从3增加到resolution_log2;这些层被写在函数里方便网络需要时再创建。
        with tf.variable_scope('%dx%d' % (2**res, 2**res)):
            with tf.variable_scope('Conv0_up'):  # 第2,4,6…,16层为上采样层;上采样之后会加一个模糊滤波以降噪。
                x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4)
            with tf.variable_scope('Conv1'):  # 第3,5,7…,17层为卷积层
                x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3)
            return x
    def torgb(res, x): # res从2增加到resolution_log2;这个函数实现特征图到RGB图像的转换。
        lod = resolution_log2 - res
        with tf.variable_scope('ToRGB_lod%d' % lod):
            return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale))  # ToRGB是通过一个简单卷积实现的

    # 固定结构:简单高效,但不支持渐进式增长。
    if structure == 'fixed':
        for res in range(3, resolution_log2 + 1):  # res从3增加到resolution_log2
            x = block(res, x)  # 相当于直接构建了一个1024*1024分辨率的生成器网络
        images_out = torgb(resolution_log2, x)

    # ★线性结构:简单但效率低下。
    if structure == 'linear':
        images_out = torgb(2, x)
        for res in range(3, resolution_log2 + 1):  # res从3增加到resolution_log2
            lod = resolution_log2 - res
            x = block(res, x)
            img = torgb(res, x)
            images_out = upscale2d(images_out)  # 通过upscale2d()构建上采样层,将当前分辨率放大一倍
            with tf.variable_scope('Grow_lod%d' % lod):
                images_out = tflib.lerp_clip(img, images_out, lod_in - lod)  
                # 依靠含大小值裁剪的线性插值实现图片放大,相当于在过渡阶段实现平滑过渡

    # ★递归结构:复杂但高效。
    # lambda: 匿名函数
    if structure == 'recursive':
        def cset(cur_lambda, new_cond, new_lambda):
            return lambda: tf.cond(new_cond, new_lambda, cur_lambda)  
            # 返回一个函数,依据是否满足new_cond决定返回new_lambda函数还是cur_lambda函数
        def grow(x, res, lod):
            y = block(res, x)
            img = lambda: upscale2d(torgb(res, y), 2**lod)
            img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod))  
            # 如果输入层数lod_in超过当前层lod的话(但同时小于lod+1),实现从lod对应分辨率到lod_in对应分辨率的扩增,采用线性插值;否则按lod处理。
            if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))  
            # 如果lod_in小于lod且不是最后一层的话(也就是前者的res超过后者的res),表明可以进入到下一级分辨率上了,此时res+1, lod-1
            return img()
        images_out = grow(x, 3, resolution_log2 - 3)  
        # res一开始为3,lod一开始为resolution_log2 - res,利用递归就可以构建res从3增加到resolution_log2的全部架构

    assert images_out.dtype == tf.as_dtype(dtype)
    return tf.identity(images_out, name='images_out')  # 输出