def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ use_sn = self._spectral_norm batch_size = x.shape.as_list()[0] # Resulting shape: [bs, h/2, w/2, 64]. net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name="d_conv1", use_sn=use_sn)) # Resulting shape: [bs, h/4, w/4, 128]. net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=use_sn) net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn2") net = lrelu(net) # Resulting shape: [bs, h * w * 8]. net = tf.reshape(net, [batch_size, -1]) # Resulting shape: [bs, 1024]. net = linear(net, 1024, scope="d_fc3", use_sn=use_sn) net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn3") net = lrelu(net) # Resulting shape: [bs, 1]. out_logit = linear(net, 1, scope="d_fc4", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ del y h, w, c = self._image_shape bs = z.shape.as_list()[0] net = linear(z, 1024, scope="g_fc1") net = lrelu(batch_norm(net, is_training=is_training, name="g_bn1")) net = linear(net, 128 * (h // 4) * (w // 4), scope="g_fc2") net = lrelu(batch_norm(net, is_training=is_training, name="g_bn2")) net = tf.reshape(net, [bs, h // 4, w // 4, 128]) net = deconv2d(net, [bs, h // 2, w // 2, 64], 4, 4, 2, 2, name="g_dc3") net = lrelu(batch_norm(net, is_training=is_training, name="g_bn3")) net = deconv2d(net, [bs, h, w, c], 4, 4, 2, 2, name="g_dc4") out = tf.nn.sigmoid(net) return out
def f(name, x, width, n_out=None): with tf.variable_scope(name): with tf.variable_scope('dense1'): x = ops.linear(x, width, use_sn=False, use_bias=True) x = ops.lrelu(x) with tf.variable_scope('dense2'): x = ops.linear(x, n_out, use_sn=False, use_bias=True) x = ops.lrelu(x) return x
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, 28, 28, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ resnet_ops.validate_image_inputs(x, False) colors = x.shape[3].value if colors not in [1, 3]: raise ValueError( "Number of color channels not supported: {}".format(colors)) output = x if self._wavelet_deconv: # Add WaveletDeconv layer output = ops.waveletDeconv(output) # End WaveletDeconv layer for block_idx in range(2): # make it same as generator block = self._resnet_block( name="B{}".format(block_idx + 1), in_channels=colors if block_idx == 0 else 128, out_channels=128, scale="down" if block_idx <= 1 else "none") output = block(output, z=None, y=y, is_training=is_training) # Final part - ReLU output = tf.nn.relu(output) h = tf.reduce_mean(output, axis=[1, 2]) out_logit = ops.linear(h, 1, scope="disc_final_fc", use_sn=self._spectral_norm) if self._project_y: if y is None: raise ValueError( "You must provide class information y to project.") embedded_y = ops.linear(y, 128, use_bias=False, scope="embedding_fc", use_sn=self._spectral_norm) out_logit += tf.reduce_sum(embedded_y * h, axis=1, keepdims=True) out = tf.nn.sigmoid(out_logit) return out, out_logit, h
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ # Each block upscales by a factor of 2. seed_size = 4 image_size = self._image_shape[0] # Map noise to the actual seed. net = ops.linear( z, self._ch * self._channels[0] * seed_size * seed_size, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. net = tf.reshape( net, [-1, seed_size, seed_size, self._ch * self._channels[0]], name="fc_reshaped") up_layers = np.log2(float(image_size) / seed_size) if not up_layers.is_integer(): raise ValueError("log2({}/{}) must be an integer.".format( image_size, seed_size)) if up_layers < 0 or up_layers > 5: raise ValueError("Invalid image_size {}.".format(image_size)) up_layers = int(up_layers) for block_idx in range(5): block = self._resnet_block( name="B{}".format(block_idx + 1), in_channels=self._ch * self._channels[block_idx], out_channels=self._ch * self._channels[block_idx + 1], scale="up" if block_idx < up_layers else "none") net = block(net, z=z, y=y, is_training=is_training) net = self.batch_norm( net, z=z, y=y, is_training=is_training, name="final_norm") net = tf.nn.relu(net) net = ops.conv2d(net, output_dim=self._image_shape[2], k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") net = tf.nn.sigmoid(net) return net
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ resnet_ops.validate_image_inputs(x) colors = x.get_shape().as_list()[-1] assert colors in [1, 3] ch = 64 output = ops.conv2d( x, output_dim=ch // 4, k_h=3, k_w=3, d_h=1, d_w=1, name="color_conv") in_channels = ch // 4 out_channels = ch // 2 for superblock in range(6): for i in range(5): block = self._resnet_block( name="B_{}_{}".format(superblock, i), in_channels=in_channels, out_channels=in_channels, scale="none") output = block(output, z=None, y=y, is_training=is_training) # We want to downscale 5 times. if superblock < 5: block = self._resnet_block( name="B_{}_up".format(superblock), in_channels=in_channels, out_channels=out_channels, scale="down") output = block(output, z=None, y=y, is_training=is_training) in_channels *= 2 out_channels *= 2 # Final part output = tf.reshape(output, [-1, 4 * 4 * 8 * ch]) out_logit = ops.linear(output, 1, scope="disc_final_fc", use_sn=self._spectral_norm) out = tf.nn.sigmoid(out_logit) return out, out_logit, output
def discriminator(self, x, y, is_training, reuse=False, rotation_head=False): """Discriminator network with augmented auxiliary predictions. Args: x: an input image tensor. y: Tensor with label indices. is_training: boolean, whether or not it is a training call. reuse: boolean, whether or not to reuse the variables. rotation_head: If True add a rotation head on top of the discriminator logits. Returns: real_probs: the [0, 1] probability tensor of x being real images. real_scores: the unbounded score tensor of x being real images. rotation_scores: the categorical probablity of x being rotated in one of the four directions. """ if not rotation_head: return super(SSGAN, self).discriminator(x, y=y, is_training=is_training, reuse=reuse) real_probs, real_scores, final = super(SSGAN, self).discriminator( x, y=y, is_training=is_training, reuse=reuse) # Hack to get whether to use spectral norm for the rotation head below. # Spectral norm is configured on the architecture (AbstractGenerator or # AbstrtactDiscriminator). The layer below is be part of the architecture. discriminator = { c.RESNET5_ARCH: resnet5.Discriminator, c.RESNET5_BIGGAN_ARCH: resnet5_biggan.Discriminator, c.RESNET_CIFAR: resnet_cifar.Discriminator, c.SNDCGAN_ARCH: sndcgan.Discriminator, }[self._architecture]() use_sn = discriminator._spectral_norm # pylint: disable=protected-access with tf.variable_scope("discriminator_rotation", reuse=reuse): rotation_scores = linear(tf.reshape(final, (tf.shape(x)[0], -1)), NUM_ROTATIONS, scope="score_classify", use_sn=use_sn) return real_probs, real_scores, rotation_scores
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ z_shape = z.get_shape().as_list() if len(z_shape) != 2: raise ValueError("Expected shape [batch_size, z_dim], got %s." % z_shape) ch = 64 colors = self._image_shape[2] # Map noise to the actual seed. output = ops.linear(z, 4 * 4 * 8 * ch, scope="fc_noise") # Reshape the seed to be a rank-4 Tensor. output = tf.reshape(output, [-1, 4, 4, 8 * ch], name="fc_reshaped") in_channels = 8 * ch out_channels = 4 * ch for superblock in range(6): for i in range(5): block = self._resnet_block( name="B_{}_{}".format(superblock, i), in_channels=in_channels, out_channels=in_channels, scale="none") output = block(output, z=z, y=y, is_training=is_training) # We want to upscale 5 times. if superblock < 5: block = self._resnet_block( name="B_{}_up".format(superblock), in_channels=in_channels, out_channels=out_channels, scale="up") output = block(output, z=z, y=y, is_training=is_training) in_channels /= 2 out_channels /= 2 output = ops.conv2d( output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") output = tf.nn.sigmoid(output) return output
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ resnet_ops.validate_image_inputs(x) colors = x.shape[3].value if colors not in [1, 3]: raise ValueError("Number of color channels not supported: {}".format( colors)) block = self._resnet_block( name="B0", in_channels=colors, out_channels=self._ch, scale="down") output = block(x, z=None, y=y, is_training=is_training) for block_idx in range(5): block = self._resnet_block( name="B{}".format(block_idx + 1), in_channels=self._ch * self._channels[block_idx], out_channels=self._ch * self._channels[block_idx + 1], scale="down") output = block(output, z=None, y=y, is_training=is_training) output = tf.nn.relu(output) pre_logits = tf.reduce_mean(output, axis=[1, 2]) out_logit = ops.linear(pre_logits, 1, scope="disc_final_fc", use_sn=self._spectral_norm) out = tf.nn.sigmoid(out_logit) return out, out_logit, pre_logits
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ del is_training, y use_sn = self._spectral_norm # In compare gan framework, the image preprocess normalize image pixel to # range [0, 1], while author used [-1, 1]. Apply this trick to input image # instead of changing our preprocessing function. x = x * 2.0 - 1.0 net = conv2d(x, 64, 3, 3, 1, 1, name="d_conv1", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 128, 4, 4, 2, 2, name="d_conv2", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 128, 3, 3, 1, 1, name="d_conv3", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 256, 4, 4, 2, 2, name="d_conv4", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 256, 3, 3, 1, 1, name="d_conv5", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 512, 4, 4, 2, 2, name="d_conv6", use_sn=use_sn) net = lrelu(net, leak=0.1) net = conv2d(net, 512, 3, 3, 1, 1, name="d_conv7", use_sn=use_sn) net = lrelu(net, leak=0.1) batch_size = x.shape.as_list()[0] net = tf.reshape(net, [batch_size, -1]) out_logit = linear(net, 1, scope="d_fc1", use_sn=use_sn) out = tf.nn.sigmoid(out_logit) return out, out_logit, net
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size, 32, 32, colors] with values in [0, 1]. """ ch = 64 colors = self._image_shape[2] batch_size = z.get_shape().as_list()[0] magic = [(8, 4), (4, 2), (2, 1)] output = ops.linear(z, 6 * 6 * 512, scope="fc_noise") output = tf.reshape(output, [batch_size, 6, 6, 512], name="fc_reshaped") for block_idx in range(3): block = self._resnet_block(name="B{}".format(block_idx + 1), in_channels=ch * magic[block_idx][0], out_channels=ch * magic[block_idx][1], scale="up") output = block(output, z=z, y=y, is_training=is_training) output = self.batch_norm(output, z=z, y=y, is_training=is_training, scope="final_norm") output = tf.nn.relu(output) output = ops.conv2d(output, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv") return tf.nn.sigmoid(output)
def discriminator_with_rotation_head(self, x, y, is_training): """Discriminator network with augmented auxiliary predictions. Args: x: an input image tensor. y: Tensor with label indices. is_training: boolean, whether or not it is a training call. Returns: real_probs: the [0, 1] probability tensor of x being real images. real_scores: the unbounded score tensor of x being real images. rotation_scores: the categorical probablity of x being rotated in one of the four directions. """ real_probs, real_scores, final = self.discriminator( x=x, y=y, is_training=is_training) use_sn = self._discriminator._spectral_norm # pylint: disable=protected-access with tf.variable_scope("discriminator_rotation", reuse=tf.AUTO_REUSE): rotation_scores = linear(tf.reshape(final, (tf.shape(x)[0], -1)), NUM_ROTATIONS, scope="score_classify", use_sn=use_sn) return real_probs, real_scores, rotation_scores
def apply(self, x): # x will be of shape [batch_size, 2 * aux_ip_size * aux_ip_size * aux_ip_channels] net = linear(x, self._aux_ip_channels, scope="aux_fc1") net = tf.nn.relu(net) net = linear(net, self._num_groups, scope="aux_fc2") return net
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ logging.info("[Discriminator] inputs are x=%s, y=%s", x.shape, None if y is None else y.shape) resnet_ops.validate_image_inputs(x) in_channels, out_channels = self._get_in_out_channels( colors=x.shape[-1].value, resolution=x.shape[1].value) num_blocks = len(in_channels) net = x for block_idx in range(num_blocks): name = "B{}".format(block_idx + 1) is_last_block = block_idx == num_blocks - 1 block = self._resnet_block( name=name, in_channels=in_channels[block_idx], out_channels=out_channels[block_idx], scale="none" if is_last_block else "down") net = block(net, z=None, y=y, is_training=is_training) if name in self._blocks_with_attention: logging.info("[Discriminator] Applying non-local block to %s", net.shape) net = ops.non_local_block(net, "non_local_block", use_sn=self._spectral_norm) # Final part logging.info("[Discriminator] before final processing: %s", net.shape) net = tf.nn.relu(net) h = tf.math.reduce_sum(net, axis=[1, 2]) out_logit = ops.linear(h, 1, scope="final_fc", use_sn=self._spectral_norm) logging.info("[Discriminator] after final processing: %s", net.shape) if self._project_y: if y is None: raise ValueError("You must provide class information y to project.") with tf.variable_scope("embedding_fc"): y_embedding_dim = out_channels[-1] # We do not use ops.linear() below since it does not have an option to # override the initializer. kernel = tf.get_variable( "kernel", [y.shape[1], y_embedding_dim], tf.float32, initializer=tf.initializers.glorot_normal()) if self._spectral_norm: kernel = ops.spectral_norm(kernel) embedded_y = tf.matmul(y, kernel) logging.info("[Discriminator] embedded_y for projection: %s", embedded_y.shape) out_logit += tf.reduce_sum(embedded_y * h, axis=1, keepdims=True) out = tf.nn.sigmoid(out_logit) return out, out_logit, h
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ shape_or_none = lambda t: None if t is None else t.shape logging.info("[Generator] inputs are z=%s, y=%s", z.shape, shape_or_none(y)) # Each block upscales by a factor of 2. seed_size = 4 z_dim = z.shape[1].value in_channels, out_channels = self._get_in_out_channels() num_blocks = len(in_channels) if self._embed_z: z = ops.linear(z, z_dim, scope="embed_z", use_sn=False, use_bias=self._embed_bias) if self._embed_y: y = ops.linear(y, self._embed_y_dim, scope="embed_y", use_sn=False, use_bias=self._embed_bias) y_per_block = num_blocks * [y] if self._hierarchical_z: z_per_block = tf.split(z, num_blocks + 1, axis=1) z0, z_per_block = z_per_block[0], z_per_block[1:] if y is not None: y_per_block = [tf.concat([zi, y], 1) for zi in z_per_block] else: z0 = z z_per_block = num_blocks * [z] logging.info("[Generator] z0=%s, z_per_block=%s, y_per_block=%s", z0.shape, [str(shape_or_none(t)) for t in z_per_block], [str(shape_or_none(t)) for t in y_per_block]) # Map noise to the actual seed. net = ops.linear( z0, in_channels[0] * seed_size * seed_size, scope="fc_noise", use_sn=self._spectral_norm) # Reshape the seed to be a rank-4 Tensor. net = tf.reshape( net, [-1, seed_size, seed_size, in_channels[0]], name="fc_reshaped") for block_idx in range(num_blocks): name = "B{}".format(block_idx + 1) block = self._resnet_block( name=name, in_channels=in_channels[block_idx], out_channels=out_channels[block_idx], scale="up") net = block( net, z=z_per_block[block_idx], y=y_per_block[block_idx], is_training=is_training) if name in self._blocks_with_attention: logging.info("[Generator] Applying non-local block to %s", net.shape) net = ops.non_local_block(net, "non_local_block", use_sn=self._spectral_norm) # Final processing of the net. # Use unconditional batch norm. logging.info("[Generator] before final processing: %s", net.shape) net = ops.batch_norm(net, is_training=is_training, name="final_norm") net = tf.nn.relu(net) net = ops.conv2d(net, output_dim=self._image_shape[2], k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv", use_sn=self._spectral_norm) logging.info("[Generator] after final processing: %s", net.shape) net = (tf.nn.tanh(net) + 1.0) / 2.0 return net
def apply(self, z, y, is_training): self.call_arg_list.append(dict(z=z, y=y, is_training=is_training)) batch_size = z.shape[0].value out = arch_ops.linear(z, np.prod(self._image_shape), scope="fc_noise") out = tf.nn.sigmoid(out) return tf.reshape(out, [batch_size] + list(self._image_shape))
def apply(self, x, y, is_training): self.call_arg_list.append(dict(x=x, y=y, is_training=is_training)) h = tf.reduce_mean(x, axis=[1, 2]) out = arch_ops.linear(h, 1) return tf.nn.sigmoid(out), out, h
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] of one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ batch_size = z.shape[0].value s_h, s_w, colors = self._image_shape s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) net = linear(z, s_h8 * s_w8 * 512, scope="g_fc1") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn1") net = tf.nn.relu(net) net = tf.reshape(net, [batch_size, s_h8, s_w8, 512]) net = deconv2d(net, [batch_size, s_h4, s_w4, 256], 4, 4, 2, 2, name="g_dc2") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn2") net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h2, s_w2, 128], 4, 4, 2, 2, name="g_dc3") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn3") net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h, s_w, 64], 4, 4, 2, 2, name="g_dc4") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn4") net = tf.nn.relu(net) net = deconv2d(net, [batch_size, s_h, s_w, colors], 3, 3, 1, 1, name="g_dc5") out = tf.tanh(net) # This normalization from [-1, 1] to [0, 1] is introduced for consistency # with other models. out = tf.div(out + 1.0, 2.0) return out
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ shape_or_none = lambda t: None if t is None else t.shape logging.info("[Generator] inputs are z=%s, y=%s", z.shape, shape_or_none(y)) seed_size = 4 if self._embed_y: y = ops.linear(y, self._embed_y_dim, scope="embed_y", use_sn=False, use_bias=False) if y is not None: y = tf.concat([z, y], axis=1) z = y in_channels, out_channels = self._get_in_out_channels() num_blocks = len(in_channels) # Map noise to the actual seed. net = ops.linear(z, in_channels[0] * seed_size * seed_size, scope="fc_noise", use_sn=self._spectral_norm) # Reshape the seed to be a rank-4 Tensor. net = tf.reshape(net, [-1, seed_size, seed_size, in_channels[0]], name="fc_reshaped") for block_idx in range(num_blocks): scale = "none" if block_idx % 2 == 0 else "up" block = self._resnet_block(name="B{}".format(block_idx + 1), in_channels=in_channels[block_idx], out_channels=out_channels[block_idx], scale=scale) net = block(net, z=z, y=y, is_training=is_training) # At resolution 64x64 there is a self-attention block. if scale == "up" and net.shape[1].value == 64: logging.info("[Generator] Applying non-local block to %s", net.shape) net = ops.non_local_block(net, "non_local_block", use_sn=self._spectral_norm) # Final processing of the net. # Use unconditional batch norm. logging.info("[Generator] before final processing: %s", net.shape) net = ops.batch_norm(net, is_training=is_training, name="final_norm") net = tf.nn.relu(net) colors = self._image_shape[2] if self._experimental_fast_conv_to_rgb: net = ops.conv2d(net, output_dim=128, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv", use_sn=self._spectral_norm) net = net[:, :, :, :colors] else: net = ops.conv2d(net, output_dim=colors, k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv", use_sn=self._spectral_norm) logging.info("[Generator] after final processing: %s", net.shape) net = (tf.nn.tanh(net) + 1.0) / 2.0 return net
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size] + self._image_shape with values in [0, 1]. """ gf_dim = 64 # Dimension of filters in first convolutional layer. bs = z.shape[0].value s_h, s_w, colors = self._image_shape s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) net = linear(z, gf_dim * 8 * s_h16 * s_w16, scope="g_fc1") net = tf.reshape(net, [-1, s_h16, s_w16, gf_dim * 8]) net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn1") net = tf.nn.relu(net) net = deconv2d(net, [bs, s_h8, s_w8, gf_dim * 4], 5, 5, 2, 2, name="g_dc1") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn2") net = tf.nn.relu(net) net = deconv2d(net, [bs, s_h4, s_w4, gf_dim * 2], 5, 5, 2, 2, name="g_dc2") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn3") net = tf.nn.relu(net) net = deconv2d(net, [bs, s_h2, s_w2, gf_dim * 1], 5, 5, 2, 2, name="g_dc3") net = self.batch_norm(net, z=z, y=y, is_training=is_training, name="g_bn4") net = tf.nn.relu(net) net = deconv2d(net, [bs, s_h, s_w, colors], 5, 5, 2, 2, name="g_dc4") net = 0.5 * tf.nn.tanh(net) + 0.5 return net
def apply(self, x): # x will be of shape [batch_size, 2 * aux_ip_size * aux_ip_size * aux_ip_channels] net = linear(x, 1, scope="aux_fc") return net
def apply(self, z, y, is_training): """Build the generator network for the given inputs. Args: z: `Tensor` of shape [batch_size, z_dim] with latent code. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: boolean, are we in train or eval model. Returns: A tensor of size [batch_size, 28, 28, colors] with values in [0, 1]. """ assert self._image_shape[0] == 28 assert self._image_shape[1] == 28 num_blocks = 2 # update network to generate 28x28 noise z_dim = z.shape[1].value if self._embed_z: z = ops.linear(z, z_dim, scope="embed_z", use_sn=self._spectral_norm) if self._embed_y: y = ops.linear(y, z_dim, scope="embed_y", use_sn=self._spectral_norm) y_per_block = num_blocks * [y] if self._hierarchical_z: z_per_block = tf.split(z, num_blocks + 1, axis=1) z0, z_per_block = z_per_block[0], z_per_block[1:] if y is not None: y_per_block = [tf.concat([zi, y], 1) for zi in z_per_block] else: z0 = z z_per_block = num_blocks * [z] init_channels = 256 output = ops.linear(z0, 7 * 7 * init_channels, scope="fc_noise", use_sn=self._spectral_norm) output = tf.reshape(output, [-1, 7, 7, init_channels], name="fc_reshaped") for block_idx in range(num_blocks): block = self._resnet_block(name="B{}".format(block_idx + 1), in_channels=init_channels, out_channels=init_channels, scale="up") output = block(output, z=z_per_block[block_idx], y=y_per_block[block_idx], is_training=is_training) # Final processing of the output. output = self.batch_norm(output, z=z, y=y, is_training=is_training, name="final_norm") output = tf.nn.relu(output) output = ops.conv2d( output, output_dim=self._image_shape[2], k_h=3, k_w=3, d_h=1, d_w=1, name="final_conv", use_sn=self._spectral_norm, ) if self._wavelet_deconv: # Add WaveletDeconv layer output = ops.waveletDeconv(output) # End WaveletDeconv layer return tf.nn.sigmoid(output)
def discriminator_with_additonal_heads(self, x, y, is_training): """Discriminator architecture with additional heads. Possible heads built on top of feature representation of the discriminator: (1) Classify the image to the correct class. (2) Classify the rotation of the image. Args: x: An input image tensor. y: One-hot encoded label. Passing all zeros implies no label was passed. is_training: boolean, whether or not it is a training call. Returns: Tuple of 5 Tensors: (1) discriminator predictions (in [0, 1]), (2) the corresponding logits, (3) predictions (logits) of the rotation of x from the auxiliary head, (4) logits of the class prediction from the auxiliary head, (5) Indicator vector identifying whether y contained a label or -1. """ d_probs, d_logits, x_rep = self.discriminator(x, y=y, is_training=is_training) use_sn = self.discriminator._spectral_norm # pylint: disable=protected-access is_label_available = tf.cast( tf.cast(tf.reduce_sum(y, axis=1, keepdims=True), tf.float32) > 0.5, tf.float32) assert x_rep.shape.ndims == 2, x_rep.shape # Predict the rotation of the image. rotation_logits = None if "rotation" in self._self_supervision: with tf.variable_scope("discriminator_rotation", reuse=tf.AUTO_REUSE): rotation_logits = ops.linear(x_rep, NUM_ROTATIONS, scope="score_classify", use_sn=use_sn) logging.info("[Discriminator] rotation head %s -> %s", x_rep.shape, rotation_logits) if not self._project_y: return d_probs, d_logits, rotation_logits, None, is_label_available # Predict the class of the image. aux_logits = None if self._use_predictor: with tf.variable_scope("discriminator_predictor", reuse=tf.AUTO_REUSE): aux_logits = ops.linear(x_rep, y.shape[1], use_bias=True, scope="predictor_linear", use_sn=use_sn) # Apply the projection discriminator if needed. if self._use_soft_pred: y_predicted = tf.nn.softmax(aux_logits) else: y_predicted = tf.one_hot(tf.arg_max(aux_logits, 1), aux_logits.shape[1]) y = (1.0 - is_label_available) * y_predicted + is_label_available * y y = tf.stop_gradient(y) logging.info( "[Discriminator] %s -> aux_logits=%s, y_predicted=%s", aux_logits.shape, aux_logits.shape, y_predicted.shape) class_embedding = self.get_class_embedding( y=y, embedding_dim=x_rep.shape[-1].value, use_sn=use_sn) d_logits += tf.reduce_sum(class_embedding * x_rep, axis=1, keepdims=True) d_probs = tf.nn.sigmoid(d_logits) return d_probs, d_logits, rotation_logits, aux_logits, is_label_available
def apply(self, x, y, is_training): """Apply the discriminator on a input. Args: x: `Tensor` of shape [batch_size, ?, ?, ?] with real or fake images. y: `Tensor` of shape [batch_size, num_classes] with one hot encoded labels. is_training: Boolean, whether the architecture should be constructed for training or inference. Returns: Tuple of 3 Tensors, the final prediction of the discriminator, the logits before the final output activation function and logits form the second last layer. """ bs = x.shape[0].value df_dim = 64 # Dimension of filters in the first convolutional layer. net = lrelu( conv2d(x, df_dim, 5, 5, 2, 2, name="d_conv1", use_sn=self._spectral_norm)) net = conv2d(net, df_dim * 2, 5, 5, 2, 2, name="d_conv2", use_sn=self._spectral_norm) net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn1") net = lrelu(net) net = conv2d(net, df_dim * 4, 5, 5, 2, 2, name="d_conv3", use_sn=self._spectral_norm) net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn2") net = lrelu(net) net = conv2d(net, df_dim * 8, 5, 5, 2, 2, name="d_conv4", use_sn=self._spectral_norm) net = self.batch_norm(net, y=y, is_training=is_training, name="d_bn3") net = lrelu(net) out_logit = linear(tf.reshape(net, [bs, -1]), 1, scope="d_fc4", use_sn=self._spectral_norm) out = tf.nn.sigmoid(out_logit) return out, out_logit, net