def __loss(self): print("****** Compute Loss ******") if self.network_type == 'binary': self.loss_nll = bernoulli_loss(self.x, self.pixel_params, masks=self.masks, output_mean=False) else: self.loss_nll = mix_logistic_loss(self.x, self.pixel_params, masks=self.masks, output_mean=False) self.bits_per_dim = tf.reduce_mean( bits_per_dim_tf(nll=self.loss_nll, dim=tf.reduce_sum(1 - self.masks, axis=[1, 2]) * self.num_channels)) self.loss_nll = tf.reduce_mean(self.loss_nll) self.lam = 0.0 if self.reg_type is None: self.loss_reg = 0 elif self.reg_type == 'kld': self.kld = compute_gaussian_kld(self.z_mu, self.z_log_sigma_sq) self.loss_reg = self.beta * tf.maximum(self.lam, self.kld) elif self.reg_type == 'mmd': # self.mmd = estimate_mmd(tf.random_normal(int_shape(self.z)), self.z) self.mmd = estimate_mmd( tf.random_normal(tf.stack([256, self.z_dim])), self.z) self.loss_reg = self.beta * tf.maximum(self.lam, self.mmd) elif self.reg_type == 'tc': self.mi, self.tc, self.dwkld = estimate_mi_tc_dwkld( self.z, self.z_mu, self.z_log_sigma_sq, N=10000) self.loss_reg = self.mi + self.beta * self.tc + self.dwkld self.loss = self.loss_nll + self.loss_reg
def __loss(self): print("****** Compute Loss ******") if self.network_type == 'binary': self.loss = bernoulli_loss(self.x, self.pixel_params, masks=self.masks, output_mean=False) else: self.loss = mix_logistic_loss(self.x, self.pixel_params, masks=self.masks, output_mean=False) self.bits_per_dim = tf.reduce_mean(bits_per_dim_tf(nll=self.loss, dim=tf.reduce_sum(1-self.masks, axis=[1,2])*self.num_channels)) self.loss = tf.reduce_mean(self.loss) self.loss_nll = self.loss
def _loss(self, x, outputs): l = tf.reduce_mean(bernoulli_loss(x, outputs, sum_all=False)) return l
def __model(self): print("****** Building Graph ******") # placeholders if self.network_type == 'binary': self.num_channels = 1 else: self.num_channels = 3 self.x = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size, self.num_channels)) self.x_bar = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size, self.num_channels)) self.is_training = tf.placeholder(tf.bool, shape=()) self.dropout_p = tf.placeholder(tf.float32, shape=()) self.masks = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size)) self.input_masks = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size)) # choose network size if self.img_size == 32: if self.network_type == 'large': encoder = conv_encoder_32_large_bn decoder = conv_decoder_32_large_mixture_logistic encoder_q = conv_encoder_32_q else: raise Exception("unknown network type") elif self.img_size == 28: if self.network_type == 'binary': encoder = conv_encoder_28_binary decoder = conv_decoder_28_binary encoder_q = conv_encoder_28_binary_q else: raise Exception("unknown network type") kwargs = { "nonlinearity": self.nonlinearity, "bn": self.bn, "kernel_initializer": self.kernel_initializer, "kernel_regularizer": self.kernel_regularizer, "is_training": self.is_training, "counters": self.counters, } with arg_scope([encoder, decoder, encoder_q], **kwargs): inputs = self.x self.num_particles = 16 inputs = inputs * broadcast_masks_tf( self.masks, num_channels=self.num_channels) inputs = tf.concat( [inputs, broadcast_masks_tf(self.masks, num_channels=1)], axis=-1) inputs_pos = tf.concat( [self.x, broadcast_masks_tf(self.masks, num_channels=1)], axis=-1) inputs = tf.concat([inputs, inputs_pos], axis=0) z_mu, z_log_sigma_sq = encoder(inputs, self.z_dim) self.z_mu_pr, self.z_mu = z_mu[:self.batch_size], z_mu[self. batch_size:] self.z_log_sigma_sq_pr, self.z_log_sigma_sq = z_log_sigma_sq[:self.batch_size], z_log_sigma_sq[ self.batch_size:] self.z_mu, self.z_log_sigma_sq = self.z_mu_pr, self.z_log_sigma_sq_pr x = tf.tile(self.x, [self.num_particles, 1, 1, 1]) masks = tf.tile(self.masks, [self.num_particles, 1, 1]) self.z_mu = tf.tile(self.z_mu, [self.num_particles, 1]) self.z_mu_pr = tf.tile(self.z_mu_pr, [self.num_particles, 1]) self.z_log_sigma_sq = tf.tile(self.z_log_sigma_sq, [self.num_particles, 1]) self.z_log_sigma_sq_pr = tf.tile(self.z_log_sigma_sq_pr, [self.num_particles, 1]) sigma = tf.exp(self.z_log_sigma_sq / 2.) self.params = get_trainable_variables(["inference"]) dist = tf.distributions.Normal(loc=0., scale=1.) epsilon = dist.sample(sample_shape=[ self.batch_size * self.num_particles, self.z_dim ], seed=None) z = self.z_mu + tf.multiply(epsilon, sigma) if self.network_type == 'binary': self.pixel_params = decoder(z) else: self.pixel_params = decoder( z, nr_logistic_mix=self.nr_logistic_mix) if self.network_type == 'binary': nll = bernoulli_loss(x, self.pixel_params, masks=masks, output_mean=False) else: nll = mix_logistic_loss(x, self.pixel_params, masks=masks, output_mean=False) log_prob_pos = dist.log_prob(epsilon) epsilon_pr = (z - self.z_mu_pr) / tf.exp( self.z_log_sigma_sq_pr / 2.) log_prob_pr = dist.log_prob(epsilon_pr) # convert back log_prob_pr = tf.stack([ log_prob_pr[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_prob_pos = tf.stack([ log_prob_pos[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_prob_pr = tf.reduce_sum(log_prob_pr, axis=2) log_prob_pos = tf.reduce_sum(log_prob_pos, axis=2) nll = tf.stack([ nll[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_likelihood = -nll # log_weights = log_prob_pr + log_likelihood - log_prob_pos log_weights = log_likelihood log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0) log_avg_weight = log_sum_weight - tf.log( tf.to_float(self.num_particles)) self.log_avg_weight = log_avg_weight normalized_weights = tf.stop_gradient( tf.nn.softmax(log_weights, axis=0)) sq_normalized_weights = tf.square(normalized_weights) self.gradients = tf.gradients( -tf.reduce_sum(sq_normalized_weights * log_weights, axis=0), self.params, colocate_gradients_with_ops=True)
def __model(self): print("****** Building Graph ******") # placeholders if self.network_type == 'binary': self.num_channels = 1 else: self.num_channels = 3 self.x = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size, self.num_channels)) self.x_bar = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size, self.num_channels)) self.is_training = tf.placeholder(tf.bool, shape=()) self.dropout_p = tf.placeholder(tf.float32, shape=()) self.masks = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size)) self.input_masks = tf.placeholder(tf.float32, shape=(self.batch_size, self.img_size, self.img_size)) # choose network size if self.img_size == 32: if self.network_type == 'large': encoder = conv_encoder_32_large_bn decoder = conv_decoder_32_large encoder_q = conv_encoder_32_q else: encoder = conv_encoder_32 decoder = conv_decoder_32 forward_pixelcnn = forward_pixel_cnn_32_small reverse_pixelcnn = reverse_pixel_cnn_32_small elif self.img_size == 28: if self.network_type == 'binary': encoder = conv_encoder_28_binary decoder = conv_decoder_28_binary forward_pixelcnn = forward_pixel_cnn_28_binary reverse_pixelcnn = reverse_pixel_cnn_28_binary encoder_q = conv_encoder_28_binary_q kwargs = { "nonlinearity": self.nonlinearity, "bn": self.bn, "kernel_initializer": self.kernel_initializer, "kernel_regularizer": self.kernel_regularizer, "is_training": self.is_training, "counters": self.counters, } with arg_scope( [forward_pixelcnn, reverse_pixelcnn, encoder, decoder, encoder_q], **kwargs): kwargs_pixelcnn = { "nr_resnet": self.nr_resnet, "nr_filters": self.nr_filters, "nr_logistic_mix": self.nr_logistic_mix, "dropout_p": self.dropout_p, "bn": False, } with arg_scope([forward_pixelcnn, reverse_pixelcnn], **kwargs_pixelcnn): self.num_particles = 16 inp = self.x * broadcast_masks_tf( self.input_masks, num_channels=self.num_channels) inp += tf.random_uniform( int_shape(inp), -1, 1) * (1 - broadcast_masks_tf( self.input_masks, num_channels=self.num_channels)) inp = tf.concat([ inp, broadcast_masks_tf(self.input_masks, num_channels=1) ], axis=-1) inputs_pos = tf.concat([ self.x, broadcast_masks_tf(tf.ones_like(self.input_masks), num_channels=1) ], axis=-1) inp = tf.concat([inp, inputs_pos], axis=0) z_mu, z_log_sigma_sq = encoder(inp, self.z_dim) self.z_mu_pr, self.z_mu = z_mu[:self.batch_size], z_mu[ self.batch_size:] self.z_log_sigma_sq_pr, self.z_log_sigma_sq = z_log_sigma_sq[:self.batch_size], z_log_sigma_sq[ self.batch_size:] x = tf.tile(self.x, [self.num_particles, 1, 1, 1]) x_bar = tf.tile(self.x_bar, [self.num_particles, 1, 1, 1]) input_masks = tf.tile(self.input_masks, [self.num_particles, 1, 1]) masks = tf.tile(self.masks, [self.num_particles, 1, 1]) self.z_mu_pr = tf.tile(self.z_mu_pr, [self.num_particles, 1]) self.z_log_sigma_sq_pr = tf.tile(self.z_log_sigma_sq_pr, [self.num_particles, 1]) self.z_mu = tf.tile(self.z_mu, [self.num_particles, 1]) self.z_log_sigma_sq = tf.tile(self.z_log_sigma_sq, [self.num_particles, 1]) self.z_mu, self.z_log_sigma_sq = self.z_mu_pr, self.z_log_sigma_sq_pr sigma = tf.exp(self.z_log_sigma_sq / 2.) self.params = get_trainable_variables(["inference"]) dist = tf.distributions.Normal(loc=0., scale=1.) epsilon = dist.sample(sample_shape=[ self.batch_size * self.num_particles, self.z_dim ], seed=None) z = self.z_mu + tf.multiply(epsilon, sigma) decoded_features = decoder(z, output_features=True) r_outputs = reverse_pixelcnn(x, masks, context=None, bn=False) cond_features = tf.concat([r_outputs, decoded_features], axis=-1) cond_features = tf.concat([ broadcast_masks_tf(input_masks, num_channels=1), cond_features ], axis=-1) self.pixel_params = forward_pixelcnn(x_bar, cond_features, bn=False) if self.network_type == 'binary': nll = bernoulli_loss(x, self.pixel_params, masks=masks, output_mean=False) else: nll = mix_logistic_loss(x, self.pixel_params, masks=masks, output_mean=False) log_prob_pos = dist.log_prob(epsilon) epsilon_pr = (z - self.z_mu_pr) / tf.exp( self.z_log_sigma_sq_pr / 2.) log_prob_pr = dist.log_prob(epsilon_pr) # convert back log_prob_pr = tf.stack([ log_prob_pr[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_prob_pos = tf.stack([ log_prob_pos[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_prob_pr = tf.reduce_sum(log_prob_pr, axis=2) log_prob_pos = tf.reduce_sum(log_prob_pos, axis=2) nll = tf.stack([ nll[self.batch_size * i:self.batch_size * (i + 1)] for i in range(self.num_particles) ], axis=0) log_likelihood = -nll # log_weights = log_prob_pr + log_likelihood - log_prob_pos log_weights = log_likelihood log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0) log_avg_weight = log_sum_weight - tf.log( tf.to_float(self.num_particles)) self.log_avg_weight = log_avg_weight normalized_weights = tf.stop_gradient( tf.nn.softmax(log_weights, axis=0)) sq_normalized_weights = tf.square(normalized_weights) self.gradients = tf.gradients(-tf.reduce_sum( sq_normalized_weights * log_weights, axis=0), self.params, colocate_gradients_with_ops=True)