def decoder(self, z, is_training, reuse=False): """Implements the Bernoulli decoder.""" height = self.input_height width = self.input_width with tf.variable_scope("decoder", reuse=reuse): net = tf.nn.relu( batch_norm(linear(z, 1024, scope="de_fc1"), is_training=is_training, scope="de_bn1")) net = tf.nn.relu( batch_norm(linear(net, 128 * (height // 4) * (width // 4), scope="de_fc2"), is_training=is_training, scope="de_bn2")) net = tf.reshape(net, [self.batch_size, height // 4, width // 4, 128]) net = tf.nn.relu( batch_norm(deconv2d( net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="de_dc3"), is_training=is_training, scope="de_bn3")) out = tf.nn.sigmoid( deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="de_dc4")) return out
def generator(self, z, is_training, reuse=False): height = self.input_height width = self.input_width batch_size = self.batch_size with tf.variable_scope("generator", reuse=reuse): net = linear(z, 1024, scope="g_fc1") net = batch_norm(net, is_training=is_training, scope="g_bn1") net = lrelu(net) net = linear(net, 128 * (height // 4) * (width // 4), scope="g_fc2") net = batch_norm(net, is_training=is_training, scope="g_bn2") net = lrelu(net) net = tf.reshape(net, [batch_size, height // 4, width // 4, 128]) net = deconv2d(net, [batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="g_dc3") net = batch_norm(net, is_training=is_training, scope="g_bn3") net = lrelu(net) net = deconv2d(net, [batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return out
def discriminator(self, x, is_training, reuse=False): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2") # [bs, h/4, w/4, 128] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3") # [bs, 1024] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4") # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def discriminator(self, x, is_training, reuse=False): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3", use_sn=sn) # [bs, 1024] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4", use_sn=sn) # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d( x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d( net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6", use_sn=sn) # [bs, 64] if self.discriminator_normalization == consts.BATCH_NORM: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear( code, 128 * (height // 4) * (width // 4), scope="d_fc1", use_sn=sn) # [bs, h/4 * w/4 * 128] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def attention_block(self, entities, reuse, name="attention_block"): """Performs non-local pairwise relational computations. Args: entities: A tensor of shape (B, K, D) where K is the number of entities. reuse: Whether to reuse the weights. name: The name of the block. Returns: Updated entity representation (B, K, D) """ # Estimate local dimensions to support background channel. k, z_dim = entities.get_shape().as_list()[1:3] r_entities = tf.reshape(entities, [self.batch_size * k, z_dim]) with tf.variable_scope(name, reuse=reuse): queries = ops.layer_norm( tf.nn.relu( ops.linear(r_entities, self.embedding_dim, scope="q_fc")), reuse, "q_ln") queries = tf.reshape(queries, [self.batch_size, k, self.embedding_dim]) keys = ops.layer_norm( tf.nn.relu( ops.linear(r_entities, self.embedding_dim, scope="k_fc")), reuse, "k_ln") keys = tf.reshape(keys, [self.batch_size, k, self.embedding_dim]) values = ops.layer_norm( tf.nn.relu( ops.linear(r_entities, self.embedding_dim, scope="v_fc")), reuse, "v_ln") values = tf.reshape(values, [self.batch_size, k, self.embedding_dim]) attention_weights = tf.matmul(queries, tf.transpose(keys, [0, 2, 1])) norm_attention_weights = tf.nn.softmax( attention_weights / tf.sqrt(tf.cast(self.embedding_dim, tf.float32)), axis=2) attention = tf.matmul(norm_attention_weights, values) r_attention = tf.reshape(attention, [self.batch_size * k, self.embedding_dim]) # Project back to original space. u_entities = tf.nn.relu(ops.linear(r_attention, z_dim, "e_fc1")) u_entities = tf.nn.relu(ops.linear(u_entities, z_dim, "e_fc2")) u_entities = ops.layer_norm(u_entities + r_entities, reuse, "e_ln") return tf.reshape(u_entities, [self.batch_size, k, z_dim])
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2") # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6") # [bs, 64] if self.discriminator_batchnorm: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear(code, 128 * (height // 4) * (width // 4), scope="d_fc1") # [bs, h/4 * w/4 * 128] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def resnet_cifar_generator(noise, is_training, reuse=None, colors=3): batch_size = noise.get_shape().as_list()[0] with tf.variable_scope("generator", reuse=reuse): # Map noise to the actual seed. output = ops.linear( noise, 4 * 4 * 256, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. output = tf.reshape( output, [batch_size, 4, 4, 256], name="fc_reshaped") for block_idx in range(3): block_scope = "B%d" % (block_idx + 1) output = generator_block(output, in_channels=256, out_channels=256, scale="up", block_scope=block_scope, is_training=is_training, reuse=reuse) # Final processing of the output. output = batch_norm_resnet( output, is_training=is_training, scope="final_norm") output = tf.nn.relu(output) output = ops.conv2d( output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") output = tf.nn.sigmoid(output) print ("Generator output shape: ", output) return output
def resnet_cifar_discriminator(inputs, is_training, discriminator_normalization, reuse=None): _validate_image_inputs(inputs) colors = inputs.get_shape().as_list()[-1] assert colors in [1, 3] with tf.variable_scope("discriminator", values=[inputs], reuse=reuse): output = inputs channels = colors for block_idx in range(4): block_scope = "B%d" % block_idx scale = "down" if block_idx <= 1 else "none" output = discriminator_block( output, in_channels=channels, out_channels=128, scale=scale, block_scope=block_scope, is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) channels = 128 # Final part - ReLU output = tf.nn.relu(output) # Global sum pooling (it's actually "mean" here, as that's what they had in # their implementation for resnet5). There was no implementation for Cifar. pre_logits = tf.reduce_mean(output, axis=[1, 2]) # dense -> 1 use_sn = discriminator_normalization == consts.SPECTRAL_NORM out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, None
def resnet5_discriminator(inputs, is_training, discriminator_normalization, reuse=None): """ResNet style discriminator. Construct discriminator network from inputs to the final endpoint. Args: inputs: A tensor of size [batch_size, height, width, channels]. Must be floating point. is_training: Is the model currently being trained. discriminator_normalization: which type of normalization to apply. reuse: Whether or not the network variables should be reused. `scope` must be given to be reused. Returns: out: The prediction of the discrminator (in [0, 1]). Shape: [bs, 1] out_logit: The pre-softmax activations for discrimination real/generated, a tensor of size [batch_size, 1] Raises: ValueError: If the input image shape is not 4-dimensional, if the spatial dimensions aren't defined at graph construction time, if the spatial dimensions aren't square, or if the spatial dimensions aren"t a power of two. """ _validate_image_inputs(inputs) colors = inputs.get_shape().as_list()[-1] assert colors in [1, 3] ch = 64 with tf.variable_scope("discriminator", values=[inputs], reuse=reuse): output = discriminator_block( inputs, in_channels=colors, out_channels=ch, scale="down", block_scope="B0", is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) # Magic in/out channel numbers copied from SN paper. magic = [(1, 2), (2, 4), (4, 4), (4, 8), (8, 8)] for block_idx in range(5): block_scope = "B%d" % (block_idx + 1) in_channels = ch * magic[block_idx][0] out_channels = ch * magic[block_idx][1] print ("Resnet5 disc, block %d in=%d out=%d" % ( block_idx, in_channels, out_channels)) output = discriminator_block( output, in_channels=in_channels, out_channels=out_channels, scale="down", block_scope=block_scope, is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) # Final part output = tf.nn.relu(output) pre_logits = tf.reduce_mean(output, axis=[1, 2]) use_sn = discriminator_normalization == consts.SPECTRAL_NORM out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, None
def resnet5_generator(noise, is_training, reuse=None, colors=3, output_shape=128, unused_ablation_type=""): # Input is a noise tensor of shape [bs, z_dim] assert len(noise.get_shape().as_list()) == 2 # Calculate / define a few numbers. batch_size = noise.get_shape().as_list()[0] # Each block upscales by a factor of 2: seed_size = 4 # We want the last block to have 64 channels: ch = 64 with tf.variable_scope("generator", reuse=reuse): # Map noise to the actual seed. output = ops.linear(noise, ch * 8 * seed_size * seed_size, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. output = tf.reshape( output, [batch_size, seed_size, seed_size, ch * 8], name="fc_reshaped") # Magic in/out channel numbers copied from SN paper. magic = [(8, 8), (8, 4), (4, 4), (4, 2), (2, 1)] up_layers = np.log2(float(output_shape) / seed_size) assert up_layers.is_integer(), "log2(%d/%d) must be an integer" % ( output_shape, seed_size) assert up_layers <= 5 and up_layers >= 0, "Invalid output_shape %d" % ( output_shape) up_layers = int(up_layers) for block_idx in range(5): block_scope = "B%d" % (block_idx + 1) in_channels = ch * magic[block_idx][0] out_channels = ch * magic[block_idx][1] print("Resnet5, block %d in=%d out=%d" % (block_idx, in_channels, out_channels)) if block_idx < up_layers: scale = "up" else: scale = "none" output = generator_block(output, in_channels=in_channels, out_channels=out_channels, scale=scale, block_scope=block_scope, is_training=is_training, reuse=reuse) # Final processing of the output. output = batch_norm_resnet(output, is_training=is_training, scope="final_norm") output = tf.nn.relu(output) output = ops.conv2d( output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") output = tf.nn.sigmoid(output) print("Generator output shape: ", output) return output
def decoder(self, z, is_training, reuse=False): """Implements the Bernoulli decoder.""" height = self.input_height width = self.input_width with tf.variable_scope("decoder", reuse=reuse): net = tf.nn.relu( batch_norm(linear(z, 1024, scope="de_fc1"), is_training=is_training, scope="de_bn1")) net = tf.nn.relu( batch_norm(linear(net, 128 * (height // 4) * (width // 4), scope="de_fc2"), is_training=is_training, scope="de_bn2")) net = tf.reshape(net, [self.batch_size, height // 4, width // 4, 128]) net = tf.nn.relu(batch_norm(deconv2d( net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="de_dc3"), is_training=is_training, scope="de_bn3")) out = tf.nn.sigmoid(deconv2d( net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="de_dc4")) return out
def sn_generator(z, batch_size, output_height, output_width, output_c_dim, is_training, reuse=False): """Returns the output tensor of the SNDCGAN generator. Details are available at https://openreview.net/pdf?id=B1QRgziT-. Args: z: latent code, shape [batch_size, latent_dimensionality] batch_size: Batch size. output_height: Output image height. output_width: Output image width. output_c_dim: Number of color channels. is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: net: The generated image Tensor with entries in [0, 1]. """ s_h, s_w = output_height, output_width s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) with tf.variable_scope("generator", reuse=reuse): net = linear(z, s_h8 * s_w8 * 512, scope="g_fc1") net = batch_norm_dcgan(net, is_training, scope="g_bn1", epsilon=2e-5) net = tf.nn.relu(net) net = tf.reshape(net, [batch_size, s_h8, s_w8, 512]) net = deconv2d(net, [batch_size, s_h4, s_w4, 256], 4, 4, 2, 2, name="g_dc2") net = batch_norm_dcgan(net, is_training, scope="g_bn2", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h2, s_w2, 128], 4, 4, 2, 2, name="g_dc3") net = batch_norm_dcgan(net, is_training, scope="g_bn3", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h, s_w, 64], 4, 4, 2, 2, name="g_dc4") net = batch_norm_dcgan(net, is_training, scope="g_bn4", epsilon=2e-5) net = tf.nn.relu(net) net = deconv2d( net, [batch_size, s_h, s_w, output_c_dim], 3, 3, 1, 1, name="g_dc5") out = tf.tanh(net) # NOTE: this normalization is introduced to match current image # preprocessing, which normalize the real image to range [0, 1]. # In author's implementation, they simply use the tanh activation function # and normalize the image to range [-1, 1]. out = tf.div(out + 1.0, 2.0) return out
def encoder(self, x, is_training, reuse=False): """Implements the Gaussian Encoder.""" with tf.variable_scope("encoder", reuse=reuse): net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="en_conv1")) net = conv2d(net, 128, 4, 4, 2, 2, name="en_conv2") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) net = linear(net, 1024, scope="en_fc3") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn3") net = lrelu(net) gaussian_params = linear(net, 2 * self.z_dim, scope="en_fc4") mean = gaussian_params[:, :self.z_dim] stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, self.z_dim:]) return mean, stddev
def encoder(self, x, is_training, reuse=False): """Implements the Gaussian Encoder.""" with tf.variable_scope("encoder", reuse=reuse): net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="en_conv1")) net = conv2d(net, 128, 4, 4, 2, 2, name="en_conv2") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) net = linear(net, 1024, scope="en_fc3") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn3") net = lrelu(net) gaussian_params = linear(net, 2 * self.z_dim, scope="en_fc4") mean = gaussian_params[:, :self.z_dim] stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, self.z_dim:]) return mean, stddev
def generator(self, z, is_training, reuse=False): height = self.input_height width = self.input_width batch_size = self.batch_size with tf.variable_scope("generator", reuse=reuse): net = linear(z, 1024, scope="g_fc1") net = batch_norm(net, is_training=is_training, scope="g_bn1") net = lrelu(net) net = linear(net, 128 * (height // 4) * (width // 4), scope="g_fc2") net = batch_norm(net, is_training=is_training, scope="g_bn2") net = lrelu(net) net = tf.reshape(net, [batch_size, height // 4, width // 4, 128]) net = deconv2d(net, [batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="g_dc3") net = batch_norm(net, is_training=is_training, scope="g_bn3") net = lrelu(net) net = deconv2d(net, [batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return out
def resnet_stl_discriminator(inputs, is_training, discriminator_normalization, reuse=None): _validate_image_inputs(inputs, validate_power2=False) colors = inputs.get_shape().as_list()[-1] assert colors in [1, 3] ch = 64 with tf.variable_scope("discriminator", values=[inputs], reuse=reuse): output = discriminator_block( inputs, in_channels=colors, out_channels=ch, scale="down", block_scope="B0", is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) # in/out channel numbers copied from SN paper. magic = [(1, 2), (2, 4), (4, 8), (8, 16)] for block_idx in range(4): block_scope = "B%d" % (block_idx + 1) in_channels = ch * magic[block_idx][0] out_channels = ch * magic[block_idx][1] print("Resnet5 disc, block %d in=%d out=%d" % (block_idx, in_channels, out_channels)) if block_idx < 3: scale = "down" else: scale = "none" output = discriminator_block( output, in_channels=in_channels, out_channels=out_channels, scale=scale, block_scope=block_scope, is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) # Final part output = tf.nn.relu(output) pre_logits = tf.reduce_mean(output, axis=[1, 2]) use_sn = discriminator_normalization == consts.SPECTRAL_NORM out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, None
def resnet107_discriminator(inputs, is_training, discriminator_normalization, reuse=None): _validate_image_inputs(inputs) colors = inputs.get_shape().as_list()[-1] assert colors in [1, 3] ch = 64 with tf.variable_scope("discriminator", values=[inputs], reuse=reuse): output = ops.conv2d(inputs, output_dim=ch // 4, k_h=3, k_w=3, d_h=1, d_w=1, name="color_conv") in_channels = ch // 4 out_channels = ch // 2 for superblock in range(6): for i in range(5): block_scope = "B_%d_%d" % (superblock, i) output = discriminator_block( output, in_channels=in_channels, out_channels=in_channels, scale="none", block_scope=block_scope, is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) # We want to downscale 5 times. if superblock < 5: output = discriminator_block( output, in_channels=in_channels, out_channels=out_channels, scale="down", block_scope="B_%d_up" % superblock, is_training=is_training, reuse=reuse, discriminator_normalization=discriminator_normalization) in_channels *= 2 out_channels *= 2 # Final part output = tf.reshape(output, [-1, 4 * 4 * 8 * ch]) use_sn = discriminator_normalization == consts.SPECTRAL_NORM out_logit = ops.linear(output, 1, scope="disc_final_fc", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, None
def discriminator(x, batch_size, is_training, discriminator_normalization, reuse=False): """Returns the outputs of the DCGAN discriminator. Details are available at https://arxiv.org/abs/1511.06434. Notable changes include BatchNorm in the discriminator and LeakyReLU for all layers. Args: x: input images, shape [bs, h, w, channels]. batch_size: integer, number of samples in batch. is_training: boolean, are we in train or eval model. discriminator_normalization: which type of normalization to apply. reuse: boolean, should params be re-used. Returns: out: A float (in [0, 1]) with discriminator prediction. out_logit: Logits (activations of the last linear layer). net: Logits of the last ReLu layer. """ assert discriminator_normalization in [ consts.NO_NORMALIZATION, consts.SPECTRAL_NORM, consts.BATCH_NORM] bs = batch_size df_dim = 64 # Dimension of filters in first convolutional layer. use_sn = discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): net = lrelu(conv2d(x, df_dim, 5, 5, 2, 2, name="d_conv1", use_sn=use_sn)) net = conv2d(net, df_dim * 2, 5, 5, 2, 2, name="d_conv2", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn1") net = lrelu(net) net = conv2d(net, df_dim * 4, 5, 5, 2, 2, name="d_conv3", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn2") net = lrelu(net) net = conv2d(net, df_dim * 8, 5, 5, 2, 2, name="d_conv4", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn3") net = lrelu(net) out_logit = linear( tf.reshape(net, [bs, -1]), 1, scope="d_fc4", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def resnet107_generator(noise, is_training, reuse=None, colors=3): # Input is a noise tensor of shape [bs, z_dim] assert len(noise.get_shape().as_list()) == 2 # Calculate / define a few numbers. batch_size = noise.get_shape().as_list()[0] ch = 64 with tf.variable_scope("generator", reuse=reuse): # Map noise to the actual seed. output = ops.linear(noise, 4 * 4 * 8 * ch, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. output = tf.reshape(output, [batch_size, 4, 4, 8 * ch], name="fc_reshaped") in_channels = 8 * ch out_channels = 4 * ch for superblock in range(6): for i in range(5): block_scope = "B_%d_%d" % (superblock, i) output = generator_block(output, in_channels=in_channels, out_channels=in_channels, scale="none", block_scope=block_scope, is_training=is_training, reuse=reuse) # We want to upscale 5 times. if superblock < 5: output = generator_block(output, in_channels=in_channels, out_channels=out_channels, scale="up", block_scope="B_%d_up" % superblock, is_training=is_training, reuse=reuse) in_channels /= 2 out_channels /= 2 output = ops.conv2d(output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") output = tf.nn.sigmoid(output) print("Generator output shape: ", output) return output
def sn_discriminator(x, batch_size, reuse=False, use_sn=False): """Returns the outputs of the SNDCGAN discriminator. Details are available at https://openreview.net/pdf?id=B1QRgziT-. Args: x: input images, shape [bs, h, w, channels]. batch_size: integer, number of samples in batch. reuse: boolean, should params be re-used. Returns: out: A float (in [0, 1]) with discriminator prediction. out_logit: Logits (activations of the last linear layer). net: Logits of the last ReLu layer. """ # In compare gan framework, the image preprocess normalize image pixel to # range [0, 1], while author used [-1, 1]. Apply this trick to input image # instead of changing our preprocessing function. x = x * 2.0 - 1.0 with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] normal = tf.random_normal_initializer net = conv2d( x, 64, 3, 3, 1, 1, name="d_conv1", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 128, 4, 4, 2, 2, name="d_conv2", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 128, 3, 3, 1, 1, name="d_conv3", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 256, 4, 4, 2, 2, name="d_conv4", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 256, 3, 3, 1, 1, name="d_conv5", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 512, 4, 4, 2, 2, name="d_conv6", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 512, 3, 3, 1, 1, name="d_conv7", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = tf.reshape(net, [batch_size, -1]) out_logit = linear(net, 1, scope="d_fc1", use_sn=use_sn) out_logit = tf.squeeze(out_logit) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def aggregate_heads(self, heads, reuse, name="aggregate_heads"): """Returns the aggregated heads.""" # Estimate local dimensions to support background channel. k, z_dim = heads[0].get_shape().as_list()[1:3] with tf.variable_scope(name, reuse=reuse): heads = tf.concat(heads, axis=2) heads_r = tf.reshape(heads, [self.batch_size * k, self.n_heads * z_dim]) heads_a = tf.nn.relu( ops.linear(tf.concat(heads_r, axis=2), z_dim, "a_fc1")) heads_a = ops.layer_norm(heads_a, reuse, "a_ln") heads_a = tf.reshape(heads_a, [self.batch_size, k, z_dim]) return heads_a
def generator(z, batch_size, output_height, output_width, output_c_dim, is_training, reuse=False): """Returns the output tensor of the DCGAN generator. Details are available at https://arxiv.org/abs/1511.06434. Notable changes include BatchNorm in the generator, ReLu instead of LeakyReLu and ReLu in generator, except for output which uses TanH. Args: z: latent code, shape [batch_size, latent_dimensionality] batch_size: Batch size. output_height: Output image height. output_width: Output image width. output_c_dim: Number of color channels. is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: net: The generated image Tensor with entries in [0, 1]. """ gf_dim = 64 # Dimension of filters in first convolutional layer. bs = batch_size with tf.variable_scope("generator", reuse=reuse): s_h, s_w = output_height, output_width s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) net = linear(z, gf_dim * 8 * s_h16 * s_w16, scope="g_fc1") net = tf.reshape(net, [-1, s_h16, s_w16, gf_dim * 8]) net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn1")) net = deconv2d(net, [bs, s_h8, s_w8, gf_dim*4], 5, 5, 2, 2, name="g_dc1") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn2")) net = deconv2d(net, [bs, s_h4, s_w4, gf_dim*2], 5, 5, 2, 2, name="g_dc2") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn3")) net = deconv2d(net, [bs, s_h2, s_w2, gf_dim*1], 5, 5, 2, 2, name="g_dc3") net = tf.nn.relu(batch_norm_dcgan(net, is_training, scope="g_bn4")) net = deconv2d(net, [bs, s_h, s_w, output_c_dim], 5, 5, 2, 2, name="g_dc4") net = 0.5 * tf.nn.tanh(net) + 0.5 return net
def resnet_stl_generator(noise, is_training, reuse=None, colors=3): batch_size = noise.get_shape().as_list()[0] with tf.variable_scope("generator", reuse=reuse): # Map noise to the actual seed. output = ops.linear(noise, 6 * 6 * 512, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. output = tf.reshape(output, [batch_size, 6, 6, 512], name="fc_reshaped") ch = 64 # in/out channel numbers copied from SN paper. magic = [(8, 4), (4, 2), (2, 1)] for block_idx in range(3): block_scope = "B%d" % (block_idx + 1) in_channels = ch * magic[block_idx][0] out_channels = ch * magic[block_idx][1] output = generator_block( output, in_channels=in_channels, out_channels=out_channels, scale="up", block_scope=block_scope, is_training=is_training, reuse=reuse) # Final processing of the output. output = batch_norm_resnet( output, is_training=is_training, scope="final_norm") output = tf.nn.relu(output) output = ops.conv2d( output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") output = tf.nn.sigmoid(output) print("Generator output shape: ", output) return output
def discriminator(self, x, is_training, reuse=False, batch_size_multiplier=1): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ # print("\n"*5) # print(reuse) # print("\n"*5) sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] # print("X: ", x) net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] # net = dropout(net, 0.1) net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] # net = dropout(net, 0.1) # print("Before: ", net) if self.discriminator_normalization == consts.BATCH_NORM: net0, net1 = tf.split(net, 2, 0) net0 = batch_norm(net0, is_training=is_training, scope="d_bn2") net1 = batch_norm(net1, is_training=is_training, scope="d_bn2", reuse=True) net = tf.concat([net0, net1], 0) # print("After: ", net) net = lrelu(net) net = tf.reshape(net, [self.batch_size * batch_size_multiplier, -1 ]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3", use_sn=sn) # [bs, 1024] # net = dropout(net, 0.1) if self.discriminator_normalization == consts.BATCH_NORM: net0, net1 = tf.split(net, 2, 0) net0 = batch_norm(net0, is_training=is_training, scope="d_bn3") net1 = batch_norm(net1, is_training=is_training, scope="d_bn3", reuse=True) net = tf.concat([net0, net1], 0) net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4", use_sn=sn) # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net