def discriminator(self, x, is_training, reuse=False): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3", use_sn=sn) # [bs, 1024] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4", use_sn=sn) # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def discriminator(self, x, is_training, reuse=False): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2") # [bs, h/4, w/4, 128] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3") # [bs, 1024] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4") # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def generator(self, z, is_training, reuse=False): height = self.input_height width = self.input_width batch_size = self.batch_size with tf.variable_scope("generator", reuse=reuse): net = linear(z, 1024, scope="g_fc1") net = batch_norm(net, is_training=is_training, scope="g_bn1") net = lrelu(net) net = linear(net, 128 * (height // 4) * (width // 4), scope="g_fc2") net = batch_norm(net, is_training=is_training, scope="g_bn2") net = lrelu(net) net = tf.reshape(net, [batch_size, height // 4, width // 4, 128]) net = deconv2d(net, [batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="g_dc3") net = batch_norm(net, is_training=is_training, scope="g_bn3") net = lrelu(net) net = deconv2d(net, [batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return out
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d( x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d( net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6", use_sn=sn) # [bs, 64] if self.discriminator_normalization == consts.BATCH_NORM: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear( code, 128 * (height // 4) * (width // 4), scope="d_fc1", use_sn=sn) # [bs, h/4 * w/4 * 128] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_normalization == consts.BATCH_NORM: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def discriminator(x, batch_size, is_training, discriminator_normalization, reuse=False): """Returns the outputs of the DCGAN discriminator. Details are available at https://arxiv.org/abs/1511.06434. Notable changes include BatchNorm in the discriminator and LeakyReLU for all layers. Args: x: input images, shape [bs, h, w, channels]. batch_size: integer, number of samples in batch. is_training: boolean, are we in train or eval model. discriminator_normalization: which type of normalization to apply. reuse: boolean, should params be re-used. Returns: out: A float (in [0, 1]) with discriminator prediction. out_logit: Logits (activations of the last linear layer). net: Logits of the last ReLu layer. """ assert discriminator_normalization in [ consts.NO_NORMALIZATION, consts.SPECTRAL_NORM, consts.BATCH_NORM] bs = batch_size df_dim = 64 # Dimension of filters in first convolutional layer. use_sn = discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): net = lrelu(conv2d(x, df_dim, 5, 5, 2, 2, name="d_conv1", use_sn=use_sn)) net = conv2d(net, df_dim * 2, 5, 5, 2, 2, name="d_conv2", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn1") net = lrelu(net) net = conv2d(net, df_dim * 4, 5, 5, 2, 2, name="d_conv3", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn2") net = lrelu(net) net = conv2d(net, df_dim * 8, 5, 5, 2, 2, name="d_conv4", use_sn=use_sn) if discriminator_normalization == consts.BATCH_NORM: net = batch_norm_dcgan(net, is_training, scope="d_bn3") net = lrelu(net) out_logit = linear( tf.reshape(net, [bs, -1]), 1, scope="d_fc4", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def discriminator(self, x, is_training, reuse=False): """BEGAN discriminator (auto-encoder). This implementation doesn't match the one from the paper, but is similar to our "standard" discriminator (same 2 conv layers, using lrelu). However, it still has less parameters (1.3M vs 8.5M) because of the huge linear layer in the standard discriminator. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction recon_error: L1 reconstrunction error of the auto-encoder code: the representation (bottleneck layer of the auto-encoder) """ height = self.input_height width = self.input_width with tf.variable_scope("discriminator", reuse=reuse): # Encoding step (Mapping from [bs, h, w, c] to [bs, 64]) net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2") # [bs, h/4, w/4, 128] net = tf.reshape(net, [self.batch_size, -1]) # [bs, h * w * 8] code = linear(net, 64, scope="d_fc6") # [bs, 64] if self.discriminator_batchnorm: code = batch_norm(code, is_training=is_training, scope="d_bn1") code = lrelu(code) # Decoding step (Mapping from [bs, 64] to [bs, h, w, c]) net = linear(code, 128 * (height // 4) * (width // 4), scope="d_fc1") # [bs, h/4 * w/4 * 128] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn2") net = lrelu(net) net = tf.reshape(net, [ self.batch_size, height // 4, width // 4, 128]) # [bs, h/4, w/4, 128] net = deconv2d(net, [self.batch_size, height // 2, width // 2, 64], 4, 4, 2, 2, name="d_deconv1") # [bs, h/2, w/2, 64] if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="d_bn3") net = lrelu(net) net = deconv2d(net, [self.batch_size, height, width, self.c_dim], 4, 4, 2, 2, name="d_deconv2") # [bs, h, w, c] out = tf.nn.sigmoid(net) # Reconstruction loss. recon_error = tf.reduce_mean(tf.abs(out - x)) return out, recon_error, code
def encoder(self, x, is_training, reuse=False): """Implements the Gaussian Encoder.""" with tf.variable_scope("encoder", reuse=reuse): net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="en_conv1")) net = conv2d(net, 128, 4, 4, 2, 2, name="en_conv2") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn2") net = lrelu(net) net = tf.reshape(net, [self.batch_size, -1]) net = linear(net, 1024, scope="en_fc3") if self.discriminator_batchnorm: net = batch_norm(net, is_training=is_training, scope="en_bn3") net = lrelu(net) gaussian_params = linear(net, 2 * self.z_dim, scope="en_fc4") mean = gaussian_params[:, :self.z_dim] stddev = 1e-6 + tf.nn.softplus(gaussian_params[:, self.z_dim:]) return mean, stddev
def sn_discriminator(x, batch_size, reuse=False, use_sn=False): """Returns the outputs of the SNDCGAN discriminator. Details are available at https://openreview.net/pdf?id=B1QRgziT-. Args: x: input images, shape [bs, h, w, channels]. batch_size: integer, number of samples in batch. reuse: boolean, should params be re-used. Returns: out: A float (in [0, 1]) with discriminator prediction. out_logit: Logits (activations of the last linear layer). net: Logits of the last ReLu layer. """ # In compare gan framework, the image preprocess normalize image pixel to # range [0, 1], while author used [-1, 1]. Apply this trick to input image # instead of changing our preprocessing function. x = x * 2.0 - 1.0 with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] normal = tf.random_normal_initializer net = conv2d( x, 64, 3, 3, 1, 1, name="d_conv1", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 128, 4, 4, 2, 2, name="d_conv2", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 128, 3, 3, 1, 1, name="d_conv3", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 256, 4, 4, 2, 2, name="d_conv4", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 256, 3, 3, 1, 1, name="d_conv5", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 512, 4, 4, 2, 2, name="d_conv6", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d( net, 512, 3, 3, 1, 1, name="d_conv7", initializer=normal, use_sn=use_sn) net = lrelu(net, leak=0.1) net = tf.reshape(net, [batch_size, -1]) out_logit = linear(net, 1, scope="d_fc1", use_sn=use_sn) out_logit = tf.squeeze(out_logit) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def discriminator(self, x, is_training, reuse=False, batch_size_multiplier=1): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out: a float (in [0, 1]) with discriminator prediction out_logit: the value "out" before sigmoid net: the architecture """ # print("\n"*5) # print(reuse) # print("\n"*5) sn = self.discriminator_normalization == consts.SPECTRAL_NORM with tf.variable_scope("discriminator", reuse=reuse): # Mapping x from [bs, h, w, c] to [bs, 1] # print("X: ", x) net = conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=sn) # [bs, h/2, w/2, 64] # net = dropout(net, 0.1) net = lrelu(net) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=sn) # [bs, h/4, w/4, 128] # net = dropout(net, 0.1) # print("Before: ", net) if self.discriminator_normalization == consts.BATCH_NORM: net0, net1 = tf.split(net, 2, 0) net0 = batch_norm(net0, is_training=is_training, scope="d_bn2") net1 = batch_norm(net1, is_training=is_training, scope="d_bn2", reuse=True) net = tf.concat([net0, net1], 0) # print("After: ", net) net = lrelu(net) net = tf.reshape(net, [self.batch_size * batch_size_multiplier, -1 ]) # [bs, h * w * 8] net = linear(net, 1024, scope="d_fc3", use_sn=sn) # [bs, 1024] # net = dropout(net, 0.1) if self.discriminator_normalization == consts.BATCH_NORM: net0, net1 = tf.split(net, 2, 0) net0 = batch_norm(net0, is_training=is_training, scope="d_bn3") net1 = batch_norm(net1, is_training=is_training, scope="d_bn3", reuse=True) net = tf.concat([net0, net1], 0) net = lrelu(net) out_logit = linear(net, 1, scope="d_fc4", use_sn=sn) # [bs, 1] out = tf.nn.sigmoid(out_logit) return out, out_logit, net