def sample_and_save(prefix='normal'): sample_bs = 100 bs = 100 if self.config.dataset == 'cifar100': bs = 1000 z = utils.sample_normal(bs, self.config.nz, device=self.device) if self.config.conditional: num_rep = int(math.ceil(bs / self.num_classes)) y = [[i] * num_rep for i in range(self.num_classes)] y = np.hstack(y) y = torch.from_numpy(y).long() y = y.to(self.device) else: y = None gen_list = [] with torch.no_grad(): for i in range(int(bs / sample_bs)): z_cur = z[i * sample_bs:(i + 1) * sample_bs] if self.config.conditional: y_cur = y[i * sample_bs:(i + 1) * sample_bs] else: y_cur = None gen = self.netG(z_cur, y_cur) gen_list.append(gen) gen = torch.cat(gen_list, dim=0) vutils.save_image(gen * 0.5 + 0.5, '{}/{}_{}.png'.format(img_path, prefix, self.itr), nrow=10)
def build_graph(self, graph): with graph.as_default(): # TODO: MNIST sizes are hard-coded in self.x = x = tf.placeholder(tf.float32, [None, 784]) x_reshape = tf.reshape(x, [-1, 28, 28, 1]) net = slim.conv2d(x_reshape, self.num_features, kernel_size=self.kernel_size) net = slim.flatten(net) # Sample from the latent distribution self.q_z_mean = slim.fully_connected(net, self.hidden_size, activation_fn=None) tf.summary.histogram('q_z_mean', self.q_z_mean) self.q_z_log_var = slim.fully_connected(net, self.hidden_size, activation_fn=None) tf.summary.histogram('q_z_log_var', self.q_z_log_var) z = sample_normal(self.q_z_mean, self.q_z_log_var) self.representation = z # The decoder net = tf.reshape(z, [-1, 1, 1, self.hidden_size]) net = slim.conv2d_transpose(net, self.num_features, kernel_size=self.kernel_size) net = slim.flatten(net) net = slim.fully_connected(net, x.get_shape().as_list()[-1], activation_fn=None) # TODO: figure out the whole logits and Bernoulli dist vs MSE thing self.p_x = Bernoulli(logits=net) tf.summary.image('generated', tf.reshape(self.p_x.mean(), [-1, 28, 28, 1]), max_outputs=1) self.loss = self._vae_loss() tf.summary.scalar('loss', self.loss) learning_rate = tf.Variable(self.learning_rate) tf.summary.scalar('learning_rate', learning_rate) global_step = tf.Variable(0, trainable=False, name="global_step") self.train_op = tf.train.AdamOptimizer( learning_rate=learning_rate) \ .minimize(self.loss, var_list=slim.get_model_variables(), global_step=global_step) return graph
def build_graph(self, graph): with graph.as_default(): # TODO: MNIST sizes are hard-coded in self.x = x = tf.placeholder(tf.float32, [None, 784]) shape = x.get_shape().as_list() # This layer's size is halfway between the layer before and after. # TODO: allow the architecture to be specified at the command line. net = slim.fully_connected(x, (shape[-1] - self.hidden_size) / 2) # Sample from the latent distribution self.q_z_mean = slim.fully_connected(net, self.hidden_size, activation_fn=None) tf.summary.histogram('q_z_mean', self.q_z_mean) self.q_z_log_var = slim.fully_connected(net, self.hidden_size, activation_fn=None) tf.summary.histogram('q_z_log_var', self.q_z_log_var) z = sample_normal(self.q_z_mean, self.q_z_log_var) self.representation = z # The decoder net = slim.fully_connected(z, (shape[-1] - self.hidden_size) / 2) # TODO: figure out the whole logits and Bernoulli dist vs MSE thing # Do not include the batch size in creating the final layer net = slim.fully_connected(net, shape[-1], activation_fn=None) self.p_x = Bernoulli(logits=net) tf.summary.image('generated', tf.reshape(self.p_x.mean(), [-1, 28, 28, 1]), max_outputs=1) self.loss = self._vae_loss() tf.summary.scalar('loss', self.loss) learning_rate = tf.Variable(self.learning_rate) tf.summary.scalar('learning_rate', learning_rate) global_step = tf.Variable(0, trainable=False, name="global_step") self.train_op = tf.train.AdamOptimizer( learning_rate=learning_rate) \ .minimize(self.loss, var_list=slim.get_model_variables(), global_step=global_step) return graph
def create_samples(self, num_samples): print('Creating samples') bs = 100 gen_list = [] with torch.no_grad(): for i in range(int(num_samples / bs)): z_cur = utils.sample_normal(bs, self.config.nz, device=self.device) y_cur = None gen = self.netG(z_cur, y_cur) gen_list.append(gen.detach().cpu()) gen = torch.cat(gen_list, dim=0) vutils.save_image(gen[0:100] * 0.5 + 0.5, '{}/samples.png'.format(self.logdir), nrow=10) print('Sample creation done') return gen
model.eval() transform = transforms.Compose( [transforms.CenterCrop((224, 224)), transforms.ToTensor()]) dataset = ZeroShotImageFolder(args.data_root, train=False, transform=transform) dataloader = DataLoader(dataset, batch_size=50) z_all = [] targets_all = [] for images, target in dataloader: if torch.no_grad(): import ipdb ipdb.set_trace() z_inv, mu_logvar, _ = model(images.cuda()) z_var = utils.sample_normal(mu_logvar) z = z_inv + z_var z_all.append(z.detach()) targets_all.append(target) z_all = torch.cat(z_all, 0).cpu().numpy() targets_all = torch.cat(targets_all, 0).numpy() scores = [] for n in [1, 2, 4, 8]: nbrs = NearestNeighbors(n_neighbors=n).fit(z_all) indices = nbrs.kneighbors(z_all, n_neighbors=n + 1, return_distance=False) nbr_targets = targets_all[indices[:, 1:]] nbr_targets = np.any(nbr_targets == targets_all[:, np.newaxis], axis=-1) score = sum(nbr_targets) / nbr_targets.shape[0] * 100 print("n = ", n, ", score = ", score)
def compute_inception_fid(self): self.netG.eval() if self.config.use_ema: G_state = copy.deepcopy(self.netG.state_dict()) self.netG.load_state_dict(self.ema.target_dict) # bs = self.config.batchSize bs = 32 samples = [] labels_gen = [] num_batches = int(self.config.num_inception_imgs / bs) for batch in range(num_batches): with torch.no_grad(): z = utils.sample_normal(bs, self.config.nz, device=self.device) if self.config.conditional: y = utils.sample_cats(bs, self.num_classes, device=self.device) labels_gen.append(y.cpu().numpy()) else: y = None gen = self.netG(z, y) gen = gen * 0.5 + 0.5 gen = gen * 255.0 gen = gen.cpu().numpy().astype(np.uint8) gen = np.transpose(gen, (0, 2, 3, 1)) samples.extend(gen) if self.config.conditional: labels_gen = np.hstack(labels_gen) samples = (samples, labels_gen) IS_mean, IS_std, fid, intra_fid = self.inception_evaluator.compute_metrics( samples) self.log('IS: {} +/- {}'.format(IS_mean, IS_std)) self.log('FID: {}'.format(fid)) self.log('Intra FID: {}'.format(intra_fid)) # Choosing the min FID model if self.best_intra_fid > intra_fid: self.best_is = IS_mean self.best_is_std = IS_std self.best_fid = fid self.best_intra_fid = intra_fid self.save_state('model_best.pth') else: IS_mean, IS_std, fid = self.inception_evaluator.compute_metrics( samples) self.log('IS: {} +/- {}'.format(IS_mean, IS_std)) self.log('FID: {}'.format(fid)) # Choosing the min FID model if self.best_fid > fid: self.best_is = IS_mean self.best_is_std = IS_std self.best_fid = fid self.save_state('model_best.pth') self.log('Best IS: {} +/- {}'.format(self.best_is, self.best_is_std)) self.log('Best FID: {}'.format(self.best_fid)) if self.config.conditional: self.log('Best intra FID: {}'.format(self.best_intra_fid)) if self.config.use_ema: self.netG.load_state_dict(G_state)
def gan_updates(self, real_data, real_labels, real_indices): self.optimizerD.zero_grad() batch_size = real_data.size(0) noise = utils.sample_normal(batch_size, self.config.nz, device=self.device) if self.config.conditional: fake_labels = utils.sample_cats(batch_size, self.num_classes, device=self.device) else: real_labels = None fake_labels = None fake_data = self.netG(noise, fake_labels) # Discriminator updates outD_real = self.netD(real_data, real_labels) outD_fake = self.netD(fake_data.detach(), fake_labels) if self.weight_update_flag and self.weight_update_type == 'discrete': self.disc_vector_cur[real_indices] = torch.squeeze(outD_real) if self.weight_update_flag: if self.weight_update_type == 'discrete': real_weights = self.weight_vector[real_indices].view(-1, 1) else: real_weights = self.netW(real_data, real_labels) + self.eps real_weights = (real_weights / real_weights.sum()) * self.config.batchSize else: real_weights = torch.ones(real_data.size(0), 1).to(self.device) if self.config.conditioning == 'acgan': outD_real_cls = outD_real[1] outD_real = outD_real[0] outD_fake_cls = outD_fake[1] outD_fake = outD_fake[0] aux_loss_real = self.aux_loss_fn(outD_real_cls, real_labels) aux_loss_fake = self.aux_loss_fn(outD_fake_cls, fake_labels) errD_real, errD_fake = self.disc_loss_fn(outD_real, outD_fake, real_weights) if self.config.conditioning == 'acgan': errD_real = errD_real + aux_loss_real errD_fake = errD_fake + aux_loss_fake errD_real.backward() errD_fake.backward() if self.config.regularization == 'gradient_penalty': if self.config.conditional: fake_data_consistent = self.netG(noise, real_labels) gp = losses.gradient_penalty(self.netD, real_data, fake_data_consistent, self.config.gp_lamb, device=self.device, labels=real_labels) else: gp = losses.gradient_penalty(self.netD, real_data, fake_data, self.config.gp_lamb, device=self.device) gp.backward() if self.config.regularization == 'ortho': losses.orthogonal_regularization(self.netD, self.config.ortho_strength) self.optimizerD.step() if self.config.lrdecay: self.schedulerD.step() self.schedulerG.step() disc_loss = errD_real.item() + errD_fake.item() # Generator updates if self.itr % self.config.disc_iters == 0: self.optimizerG.zero_grad() outD = self.netD(fake_data, fake_labels) if self.config.conditioning == 'acgan': outD_cls = outD[1] outD = outD[0] aux_loss = self.aux_loss_fn(outD_cls, fake_labels) errG = self.gen_loss_fn(outD) if self.config.conditioning == 'acgan': errG = errG + aux_loss errG.backward() self.optimizerG.step() gen_loss = errG.item() self.prev_gen_loss = gen_loss else: gen_loss = self.prev_gen_loss return disc_loss, gen_loss
def __init__(self, input_, cont_dim=2, discrete_dim=0, filters=[32, 64], hidden_dim=1024, model_name="ConcreteVae"): """ Constructs a Variational Autoencoder that supports continuous and discrete dimensions. Currently only one discrete dimension is supported. Args: input_ the input tensor cont_dim the number of continuous latent dimensions discrete_dim the number of categories in the discrete latent dimension filters the number of filters for each convolution hidden_dim the dimension of the fully-connected hidden layer between the convolutions and the latent variable model_name the name of the model """ self.input_ = input_ input_shape = input_.get_shape().as_list() print('Input shape {}'.format(input_shape)) self.model_name = model_name # Build the encoder # According to karpathy, generative models work better when # they discard pooling layers in favor of larger strides # (https://cs231n.github.io/convolutional-networks/#pool) net = slim.conv2d(self.input_, filters[0], kernel_size=5, stride=2, padding='SAME') net = slim.conv2d(net, filters[1], kernel_size=5, stride=2, padding='SAME') # Use dropout to reduce overfitting # net = slim.dropout(net, 0.9) net = slim.flatten(net) # Sample from the latent distribution q_z_mean = slim.fully_connected(net, cont_dim, activation_fn=None) q_z_log_var = slim.fully_connected(net, cont_dim, activation_fn=None) # TODO: support multiple categorical variables q_category_logits = slim.fully_connected(net, discrete_dim, activation_fn=None) q_category = tf.nn.softmax(q_category_logits) self.q_z_mean = q_z_mean self.q_z_log_var = q_z_log_var self.q_category = q_category self.continuous_z = sample_normal(q_z_mean, q_z_log_var) self.tau = tf.Variable(5.0, name="temperature") self.category = sample_gumbel(q_category_logits, self.tau) self.z = tf.concat([self.continuous_z, self.category], axis=1) # Build the decoder net = tf.reshape(self.z, [-1, 1, 1, cont_dim + discrete_dim]) net = slim.conv2d_transpose(net, filters[1], kernel_size=5, stride=2, padding='SAME') net = slim.conv2d_transpose(net, filters[0], kernel_size=5, stride=2, padding='SAME') net = slim.conv2d_transpose(net, input_shape[3], kernel_size=5, padding='VALID') net = slim.flatten(net) # TODO: figure out the whole logits and Bernoulli dist vs MSE thing # Do not include the batch size in creating the final layer self.logits = slim.fully_connected(net, np.product(input_shape[1:]), activation_fn=None) print('Output shape {}'.format(self.logits.get_shape())) p_x = Bernoulli(logits=self.logits) self.p_x = p_x self.loss = self._vae_loss() self.learning_rate = tf.Variable(1e-3, name="learning_rate") self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) \ .minimize(self.loss, var_list=slim.get_model_variables())