def truncate_fancy(dlat, dlat_avg, model_scale=18, truncation_psi=0.7, minlayer=0, maxlayer=8, do_clip=False): layer_idx = np.arange(model_scale)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = np.where(layer_idx < maxlayer, truncation_psi * ones, ones) if minlayer > 0: coefs[0, :minlayer, :] = ones[0, :minlayer, :] if do_clip: return tflib.lerp_clip(dlat_avg, dlat, coefs).eval() else: return tflib.lerp(dlat_avg, dlat, coefs)
def D_basic( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels=1, # Number of input color channels. Overridden based on dataset. resolution=32, # Input resolution. Overridden based on dataset. label_size=0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base=8192, # Overall multiplier for the number of feature maps. fmap_decay=1.0, # log2 feature map reduction when doubling the resolution. fmap_max=512, # Maximum number of feature maps in any layer. nonlinearity='lrelu', # Activation function: 'relu', 'lrelu', use_wscale=True, # Enable equalized learning rate? mbstd_group_size=4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features=1, # Number of features for the minibatch standard deviation layer. dtype='float32', # Data type to use for activations and outputs. fused_scale='auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. blur_filter=[ 1, 2, 1 ], # Low-pass filter to apply when resampling activations. None = no filtering. structure='auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph=False, # True = template graph constructed by the Network class, False = actual evaluation. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max) def blur(x): return blur2d(x, blur_filter) if blur_filter else x if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' act, gain = { 'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2)) }[nonlinearity] images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) lod_in = tf.cast( tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) scores_out = None # Building blocks. def fromrgb(x, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): return act( apply_bias( conv2d(x, fmaps=nf(res - 1), kernel=1, gain=gain, use_wscale=use_wscale))) def block(x, res): # res = 2..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): if res >= 3: # 8x8 and up with tf.variable_scope('Conv0'): x = act( apply_bias( conv2d(x, fmaps=nf(res - 1), kernel=3, gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Conv1_down'): x = act( apply_bias( conv2d_downscale2d(blur(x), fmaps=nf(res - 2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale))) else: # 4x4 if mbstd_group_size > 1: x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = act( apply_bias( conv2d(x, fmaps=nf(res - 1), kernel=3, gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Dense0'): x = act( apply_bias( dense(x, fmaps=nf(res - 2), gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Dense1'): x = apply_bias( dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale)) return x # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': x = fromrgb(images_in, resolution_log2) for res in range(resolution_log2, 2, -1): x = block(x, res) scores_out = block(x, 2) # Linear structure: simple but inefficient. if structure == 'linear': img = images_in x = fromrgb(img, resolution_log2) for res in range(resolution_log2, 2, -1): lod = resolution_log2 - res x = block(x, res) img = downscale2d(img) y = fromrgb(img, res - 1) with tf.variable_scope('Grow_lod%d' % lod): x = tflib.lerp_clip(x, y, lod_in - lod) scores_out = block(x, 2) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(res, lod): x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res) y = lambda: x if res > 2: y = cset( y, (lod_in > lod), lambda: tflib.lerp( x, fromrgb(downscale2d(images_in, 2**(lod + 1)), res - 1), lod_in - lod)) return y() scores_out = grow(2, resolution_log2 - 2) # Label conditioning from "Which Training Methods for GANs do actually Converge?" if label_size: with tf.variable_scope('LabelSwitch'): scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True) assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out
def G_synthesis( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. dlatent_size=512, # Disentangled latent (W) dimensionality. num_channels=3, # Number of output color channels. resolution=1024, # Output resolution. fmap_base=8192, # Overall multiplier for the number of feature maps. fmap_decay=1.0, # log2 feature map reduction when doubling the resolution. fmap_max=512, # Maximum number of feature maps in any layer. use_styles=True, # Enable style inputs? const_input_layer=True, # First layer is a learned constant? use_noise=True, # Enable noise inputs? randomize_noise=True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. nonlinearity='lrelu', # Activation function: 'relu', 'lrelu' use_wscale=True, # Enable equalized learning rate? use_pixel_norm=False, # Enable pixelwise feature vector normalization? use_instance_norm=True, # Enable instance normalization? dtype='float32', # Data type to use for activations and outputs. fused_scale='auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. blur_filter=[ 1, 2, 1 ], # Low-pass filter to apply when resampling activations. None = no filtering. structure='auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph=False, # True = template graph constructed by the Network class, False = actual evaluation. force_clean_graph=False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max) def blur(x): return blur2d(x, blur_filter) if blur_filter else x if is_template_graph: force_clean_graph = True if force_clean_graph: randomize_noise = False if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' act, gain = { 'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2)) }[nonlinearity] num_layers = resolution_log2 * 2 - 2 num_styles = num_layers if use_styles else 1 images_out = None # Primary inputs. dlatents_in.set_shape([None, num_styles, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) lod_in = tf.cast( tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) # Noise inputs. noise_inputs = [] if use_noise: for layer_idx in range(num_layers): res = layer_idx // 2 + 2 shape = [1, use_noise, 2**res, 2**res] noise_inputs.append( tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Things to do at the end of each layer. def layer_epilogue(x, layer_idx): if use_noise: x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise) x = apply_bias(x) x = act(x) if use_pixel_norm: x = pixel_norm(x) if use_instance_norm: x = instance_norm(x) if use_styles: x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale) return x # Early layers. with tf.variable_scope('4x4'): if const_input_layer: with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones()) x = layer_epilogue( tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0) else: with tf.variable_scope('Dense'): x = dense( dlatents_in[:, 0], fmaps=nf(1) * 16, gain=gain / 4, use_wscale=use_wscale ) # tweak gain to match the official implementation of Progressing GAN x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0) with tf.variable_scope('Conv'): x = layer_epilogue( conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1) # Building blocks for remaining layers. def block(res, x): # res = 3..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0_up'): x = layer_epilogue( blur( upscale2d_conv2d(x, fmaps=nf(res - 1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res * 2 - 4) with tf.variable_scope('Conv1'): x = layer_epilogue( conv2d(x, fmaps=nf(res - 1), kernel=3, gain=gain, use_wscale=use_wscale), res * 2 - 3) return x def torgb(res, x): # res = 2..resolution_log2 lod = resolution_log2 - res with tf.variable_scope('ToRGB_lod%d' % lod): return apply_bias( conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': for res in range(3, resolution_log2 + 1): x = block(res, x) images_out = torgb(resolution_log2, x) # Linear structure: simple but inefficient. if structure == 'linear': images_out = torgb(2, x) for res in range(3, resolution_log2 + 1): lod = resolution_log2 - res x = block(res, x) img = torgb(res, x) images_out = upscale2d(images_out) with tf.variable_scope('Grow_lod%d' % lod): images_out = tflib.lerp_clip(img, images_out, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(x, res, lod): y = block(res, x) img = lambda: upscale2d(torgb(res, y), 2**lod) img = cset( img, (lod_in > lod), lambda: upscale2d( tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img() images_out = grow(x, 3, resolution_log2 - 3) assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out')
def D_stylegan( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels = 3, # Number of input color channels. Overridden based on dataset. resolution = 1024, # Input resolution. Overridden based on dataset. label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' act = nonlinearity images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) # Building blocks for spatial layers. def fromrgb(x, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act) def block(x, res): # res = 2..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) with tf.variable_scope('Conv1_down'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) return x # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': x = fromrgb(images_in, resolution_log2) for res in range(resolution_log2, 2, -1): x = block(x, res) # Linear structure: simple but inefficient. if structure == 'linear': img = images_in x = fromrgb(img, resolution_log2) for res in range(resolution_log2, 2, -1): lod = resolution_log2 - res x = block(x, res) with tf.variable_scope('Downsample_lod%d' % lod): img = downsample_2d(img) y = fromrgb(img, res - 1) with tf.variable_scope('Grow_lod%d' % lod): x = tflib.lerp_clip(x, y, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(res, lod): x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res); y = lambda: x y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod)) return y() x = grow(3, resolution_log2 - 3) # Final layers at 4x4 resolution. with tf.variable_scope('4x4'): if mbstd_group_size > 1: with tf.variable_scope('MinibatchStddev'): x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) with tf.variable_scope('Dense0'): x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" with tf.variable_scope('Output'): x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) if labels_in.shape[1] > 0: x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) scores_out = x # Output. assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out
def G_synthesis_stylegan_revised( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. dlatent_size = 512, # Disentangled latent (W) dimensionality. num_channels = 3, # Number of output color channels. resolution = 1024, # Output resolution. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op? structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) if is_template_graph: force_clean_graph = True if force_clean_graph: randomize_noise = False if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' act = nonlinearity num_layers = resolution_log2 * 2 - 2 images_out = None # Primary inputs. dlatents_in.set_shape([None, num_layers, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) # Noise inputs. noise_inputs = [] for layer_idx in range(num_layers - 1): res = (layer_idx + 5) // 2 shape = [1, 1, 2**res, 2**res] noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Single convolution layer with all the bells and whistles. def layer(x, layer_idx, fmaps, kernel, up=False): x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) if randomize_noise: noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) return apply_bias_act(x, act=act) # Early layers. with tf.variable_scope('4x4'): with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal()) x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) with tf.variable_scope('Conv'): x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) # Building blocks for remaining layers. def block(res, x): # res = 3..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0_up'): x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) with tf.variable_scope('Conv1'): x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) return x def torgb(res, x): # res = 2..resolution_log2 with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)): return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': for res in range(3, resolution_log2 + 1): x = block(res, x) images_out = torgb(resolution_log2, x) # Linear structure: simple but inefficient. if structure == 'linear': images_out = torgb(2, x) for res in range(3, resolution_log2 + 1): lod = resolution_log2 - res x = block(res, x) img = torgb(res, x) with tf.variable_scope('Upsample_lod%d' % lod): images_out = upsample_2d(images_out) with tf.variable_scope('Grow_lod%d' % lod): images_out = tflib.lerp_clip(img, images_out, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(x, res, lod): y = block(res, x) img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod) img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img() images_out = grow(x, 3, resolution_log2 - 3) assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out')
def D_basic( images_in, # 第一个输入:图片 [minibatch, channel, height, width]. labels_in, # 第二个输入:标签 [minibatch, label_size]. num_channels = 1, # 输入颜色通道数。 根据数据集覆盖。 resolution = 32, # 输入分辨率。 根据数据集覆盖。 label_size = 0, # 标签的维数,0表示没有标签。根据数据集覆盖。 fmap_base = 8192, # 特征图的总数目,这儿取8192因为512*(18-2)=8192。 fmap_decay = 1.0, # 当分辨率翻倍时以log2降低特征图,这儿指示降低的速率。 fmap_max = 512, # 在任何层中特征图的最大数量。 nonlinearity = 'lrelu', # 激活函数: 'relu', 'lrelu'。 use_wscale = True, # 启用均等的学习率? mbstd_group_size = 4, # 小批量标准偏差层的组大小,0表示禁用。 mbstd_num_features = 1, # 小批量标准偏差层的特征数量。 dtype = 'float32', # 用于激活和输出的数据类型。 fused_scale = 'auto', # True = 融合卷积+缩放,False = 单独操作,'auto'= 自动决定。 blur_filter = [1,2,1], # 重采样激活时应用的低通卷积核(Low-pass filter)。None表示不过滤。 structure = 'auto', # 'fixed' = 无渐进式增长,'linear' = 人类可读,'recursive' = 有效,'auto' = 自动选择。 is_template_graph = False, # True表示由Network类构造的模板图,False表示实际评估。 **_kwargs): # 忽略无法识别的关键字参数。 resolution_log2 = int(np.log2(resolution)) # 计算分辨率是2的多少次方 assert resolution == 2**resolution_log2 and resolution >= 4 # 分辨率需要大于等于32,因为训练从学习生成32*32的图片开始 def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) # nf()返回在第stage层中特征图的数量————当stage<=4时,特征图数量为512;当stage>4时,每多一层特征图数量就减半。 def blur(x): return blur2d(x, blur_filter) if blur_filter else x # 对图片进行滤波模糊操作,有利于降噪 if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' # 依据is_template_graph选择架构为'linear'或'recursive' act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] # 激活函数 # 输入处理 images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) # 输入的分辨率级别, lod = resolution_log2 - res scores_out = None # 输出分数 # 构建block块。 def fromrgb(x, res): # res从2增加到resolution_log2;这个函数实现RGB图像到特征图的转换。 with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale))) # 简单卷积实现,并应用激活函数 def block(x, res): # res从2增加到resolution_log2;这些层被写在函数里方便网络需要时再创建。 with tf.variable_scope('%dx%d' % (2**res, 2**res)): if res >= 3: # 8x8分辨率及以上 with tf.variable_scope('Conv0'): x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) # 构建一个卷积层 with tf.variable_scope('Conv1_down'): x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale))) # 构建一个下采样层 else: # 4x4分辨率,得到判别分数scores_out if mbstd_group_size > 1: x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) # 构建一个小偏量标准偏差层 with tf.variable_scope('Conv'): x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) # 卷积 with tf.variable_scope('Dense0'): x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale))) # 全连接 with tf.variable_scope('Dense1'): x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale)) # 全连接 return x # 固定结构:简单高效,但不支持渐进式增长。 if structure == 'fixed': x = fromrgb(images_in, resolution_log2) # 将输入图片转换为特征x for res in range(resolution_log2, 2, -1): x = block(x, res) # 相当于直接构建了一个从1024*1024分辨率降到4*4分辨率的下采样网络 scores_out = block(x, 2) # 输出为判别分数 # 线性结构:简单但效率低下。 if structure == 'linear': img = images_in x = fromrgb(img, resolution_log2) # 将输入图片转换为特征x for res in range(resolution_log2, 2, -1): # res从resolution_log2降低到3 lod = resolution_log2 - res x = block(x, res) img = downscale2d(img) # 通过downscale2d()构建下采样层,将当前分辨率缩小一倍 y = fromrgb(img, res - 1) with tf.variable_scope('Grow_lod%d' % lod): x = tflib.lerp_clip(x, y, lod_in - lod) # 依靠含大小值裁剪的线性插值实现图片缩小,相当于在过渡阶段实现平滑过渡 scores_out = block(x, 2) # 递归结构:复杂但高效。 if structure == 'recursive': # 注意判别器在训练时是输入图片先进入lod最小的层,但是构建判别网络时是lod从大往小构建,所以递归的过程是与生成器相反的。 def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) # 返回一个函数,依据是否满足new_cond决定返回new_lambda函数还是cur_lambda函数 def grow(res, lod): x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) # 先暂时将下采样函数赋给x if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) # 非第一层时,如果输入层数lod_in小于当前层lod的话,表明可以进入到下一级分辨率上了,将grow()赋给x;否则x还是保留为下采样函数。 x = block(x(), res); y = lambda: x # x执行一次自身的函数,构建出一个block,并将结果赋给y(以函数的形式) if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) # 非最后一层时,如果输入层数lod_in大于当前层lod的话,表明需要进行插值操作,将lerp()赋给y;否则y还是保留为之前的操作。 return y() scores_out = grow(2, resolution_log2 - 2) # 构建判别网络时是lod从大往小构建,所以一开始的lod输入为8 # 标签条件来自“哪种GAN训练方法实际上会收敛?” if label_size: with tf.variable_scope('LabelSwitch'): scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True) assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out # 输出 #----------------------------------------------------------------------------
def G_synthesis( dlatents_in, # 输入:解缠的中间向量 (W) [minibatch, num_layers, dlatent_size]. dlatent_size = 512, # 解缠的中间向量 (W) 的维度。 num_channels = 3, # 输出颜色通道数。 resolution = 1024, # 输出分辨率。 fmap_base = 8192, # 特征图的总数目,这儿取8192因为512*(18-2)=8192。 fmap_decay = 1.0, # 当分辨率翻倍时以log2降低特征图,这儿指示降低的速率。 fmap_max = 512, # 在任何层中特征图的最大数量。 use_styles = True, # 启用样式输入 const_input_layer = True, # 第一层是常数? use_noise = True, # 启用噪音输入? randomize_noise = True, # True表示每次都随机化噪声输入(不确定),False表示从变量中读取噪声输入。 nonlinearity = 'lrelu', # 激活函数: 'relu', 'lrelu' use_wscale = True, # 启用均等的学习率? use_pixel_norm = False, # 启用逐像素特征向量归一化? use_instance_norm = True, # 启用实例规一化? dtype = 'float32', # 用于激活和输出的数据类型。 fused_scale = 'auto', # True = 融合卷积+缩放,False = 单独操作,'auto'= 自动决定。 blur_filter = [1,2,1], # 重采样激活时应用的低通卷积核(Low-pass filter)。None表示不过滤。 structure = 'auto', # 'fixed' = 无渐进式增长,'linear' = 人类可读,'recursive' = 有效,'auto' = 自动选择。 is_template_graph = False, # True表示由Network类构造的模板图,False表示实际评估。 force_clean_graph = False, # True表示构建一个在TensorBoard中看起来很漂亮的干净图形,False表示默认设置。 **_kwargs): # 忽略无法识别的关键字参数。 resolution_log2 = int(np.log2(resolution)) # 计算分辨率是2的多少次方 assert resolution == 2**resolution_log2 and resolution >= 4 # 分辨率需要大于等于32,因为训练从学习生成32*32的图片开始 def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) # nf()返回在第stage层中特征图的数量————当stage<=4时,特征图数量为512;当stage>4时,每多一层特征图数量就减半。 def blur(x): return blur2d(x, blur_filter) if blur_filter else x # 对图片进行滤波模糊操作,有利于降噪 if is_template_graph: force_clean_graph = True if force_clean_graph: randomize_noise = False if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' # 依据force_clean_graph选择架构为'linear'或'recursive' act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] # 激活函数 num_layers = resolution_log2 * 2 - 2 # 因为每个分辨率有两层,所以层数为:分辨率级别(10)*2-2=18 num_styles = num_layers if use_styles else 1 # 样式层数 images_out = None # 主要输入。 dlatents_in.set_shape([None, num_styles, dlatent_size]) # dlatents_in是通过广播得到的中间向量,维度是(?,18,512) dlatents_in = tf.cast(dlatents_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) # lod_in是一个指定当前输入分辨率级别的参数,规定lod = resolution_log2 - res # 创建噪音。 noise_inputs = [] if use_noise: for layer_idx in range(num_layers): res = layer_idx // 2 + 2 # [2,2,3,3,…,10,10] shape = [1, use_noise, 2**res, 2**res] # 不同层的噪音shape从[1,1,4,4]一直到[1,1,1024,1024] noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # 随机初始化噪音 # ★每一层最后需要做的事情。 def layer_epilogue(x, layer_idx): if use_noise: x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise) # 应用噪音 x = apply_bias(x) # 应用偏置 x = act(x) # 应用激活函数 if use_pixel_norm: x = pixel_norm(x) # 逐像素归一化 if use_instance_norm: x = instance_norm(x) # 实例归一化 if use_styles: x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale) # 样式调制,AdaIN return x # 早期的层。 with tf.variable_scope('4x4'): if const_input_layer: # 合成网络的起点是否为固定常数,StyleGAN中选用固定常数。 with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones()) # 初始为常数变量,shape为(1,512,4,4) x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0) # 第0层的层末调制 else: with tf.variable_scope('Dense'): x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # 调整增益值以匹配ProGAN的官方实现(ProGAN的初始起点不是常数,而就是latent) x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0) with tf.variable_scope('Conv'): x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1) # 第1层为卷积层,添加层末调制 # 为剩余层构建block块。 def block(res, x): # res从3增加到resolution_log2;这些层被写在函数里方便网络需要时再创建。 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0_up'): # 第2,4,6…,16层为上采样层;上采样之后会加一个模糊滤波以降噪。 x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4) with tf.variable_scope('Conv1'): # 第3,5,7…,17层为卷积层 x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3) return x def torgb(res, x): # res从2增加到resolution_log2;这个函数实现特征图到RGB图像的转换。 lod = resolution_log2 - res with tf.variable_scope('ToRGB_lod%d' % lod): return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) # ToRGB是通过一个简单卷积实现的 # 固定结构:简单高效,但不支持渐进式增长。 if structure == 'fixed': for res in range(3, resolution_log2 + 1): # res从3增加到resolution_log2 x = block(res, x) # 相当于直接构建了一个1024*1024分辨率的生成器网络 images_out = torgb(resolution_log2, x) # ★线性结构:简单但效率低下。 if structure == 'linear': images_out = torgb(2, x) for res in range(3, resolution_log2 + 1): # res从3增加到resolution_log2 lod = resolution_log2 - res x = block(res, x) img = torgb(res, x) images_out = upscale2d(images_out) # 通过upscale2d()构建上采样层,将当前分辨率放大一倍 with tf.variable_scope('Grow_lod%d' % lod): images_out = tflib.lerp_clip(img, images_out, lod_in - lod) # 依靠含大小值裁剪的线性插值实现图片放大,相当于在过渡阶段实现平滑过渡 # ★递归结构:复杂但高效。 # lambda: 匿名函数 if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) # 返回一个函数,依据是否满足new_cond决定返回new_lambda函数还是cur_lambda函数 def grow(x, res, lod): y = block(res, x) img = lambda: upscale2d(torgb(res, y), 2**lod) img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) # 如果输入层数lod_in超过当前层lod的话(但同时小于lod+1),实现从lod对应分辨率到lod_in对应分辨率的扩增,采用线性插值;否则按lod处理。 if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) # 如果lod_in小于lod且不是最后一层的话(也就是前者的res超过后者的res),表明可以进入到下一级分辨率上了,此时res+1, lod-1 return img() images_out = grow(x, 3, resolution_log2 - 3) # res一开始为3,lod一开始为resolution_log2 - res,利用递归就可以构建res从3增加到resolution_log2的全部架构 assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out') # 输出