Пример #1
0
def sample_from_gen(args, device, num_classes, gen):
    """Sample fake images and labels from generator.

    Args:
        args (argparse object)
        device (torch.device)
        num_classes (int): for pseudo_y
        gen (nn.Module)

    Returns:
        fake, pseudo_y, z

    """

    z = utils.sample_z(
        args.batch_size, args.gen_dim_z, device, args.gen_distribution
    )
    if args.cGAN:
        pseudo_y = utils.sample_pseudo_labels(
            num_classes, args.batch_size, device
        )
    else:
        pseudo_y = None

    fake = gen(z, pseudo_y)
    return fake, pseudo_y, z
Пример #2
0
    def build_model(self):
        with tf.device('/gpu:%d' % self.gpu_id):
            ### Placeholder ###
            self.X = tf.placeholder(tf.float32, [None, self.input_dim])
            self.k = tf.placeholder(tf.int32)
            self.keep_prob = tf.placeholder(tf.float32)

            ### Encoding ###
            self.z_mu, self.z_logvar = self.encoder(self.X, self.enc_h_dim_list, self.z_dim, self.keep_prob)
            self.z = sample_z(self.z_mu, self.z_logvar)
 
            ### Decoding ### 
            self.recon_X_logit = self.decoder(self.z, self.dec_h_dim_list, self.input_dim, self.keep_prob, False)
            self.recon_X = tf.nn.tanh(self.recon_X_logit)
            self.output = tf.nn.tanh(self.recon_X_logit)

            #self.logger.info([x.name for x in tf.global_variables()])
            #cost = tf.reduce_mean(tf.square(X-output))

            ### Loss ###
            self.recon_loss = self.recon_loss() 
            self.kl_loss = kl_divergence_normal_distribution(self.z_mu, self.z_logvar)
            self.total_loss = self.recon_loss + self.kl_loss
            #cost_summary = tf.summary.scalar('cost', cost)

            ### Solver ### 
            self.solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.total_loss)

            ### Recommendaiton metric ###
            #self.recon_X = tf.nn.sigmoid(self.recon_X_logit)

        with tf.device('/cpu:0'):
            self.top_k_op = tf.nn.top_k(self.recon_X, self.k)
def generator_rnn(representations, query_poses, sequence_size=12, scope='GQN_RNN'):
    dim_r = representations.get_shape().as_list()
    batch = tf.shape(representations)[0]
    height, width = dim_r[1], dim_r[2]
    cell = GeneratorLSTMCell(input_shape=[height, width, C.GENERATOR_INPUT_CHANNELS], output_channels=C.LSTM_OUTPUT_CHANNELS, canvas_channels=C.LSTM_CANVAS_CHANNELS, kernel_size=C.LSTM_KERNEL_SIZE, name='GeneratorCell')
    outputs = []
    endpoints = {}
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as var_scope:
        if not tf.executing_eagerly():
            if var_scope.caching_device is None:
                var_scope.set_caching_device(lambda op: op.device)

        query_poses = broadcast_pose(query_poses, height, width)
        state = cell.zero_state(batch, tf.float32)

        for step in range(sequence_size):
            z = sample_z(state.lstm.h, scope='sample_eta_pi')
            inputs = _GeneratorCellInput(representations, query_poses, z)
            with tf.name_scope('Generator'):
                (output, state) = cell(inputs, state, 'LSTM_gen')
            ep_canvas = 'canvas_{}'.format(step)
            endpoints[ep_canvas] = output.canvas
            outputs.append(output)
        target_canvas = outputs[-1].canvas
    mu_target = eta_g(target_canvas, channels=C.IMG_CHANNELS, scope='eta_g')
    endpoints['mu_target'] = mu_target
    return mu_target, endpoints
Пример #4
0
 def updateQ(self, sess, states, actions, rewards, states_, dones, states2,
             actions2, batch_size, latent_size):
     feed_dict = {
         self.rewards: rewards,
         self.dones: dones,
         self.target_qnet.states: states_,
         self.qnet.states: states,
         self.qnet.actions: actions,
         self.cgan_reward.states2: states2,
         self.cgan_reward.actions2: actions2,
         self.cgan_reward.Z2: sample_z(batch_size, latent_size),
         self.cgan_state.states2: states2,
         self.cgan_state.actions2: actions2,
         self.cgan_state.Z2: sample_z(batch_size, latent_size),
         self.qnet.states2: states2,
         self.qnet.actions2: actions2
     }
     _ = sess.run(self.opt_Q, feed_dict=feed_dict)
Пример #5
0
 def __call__(self, *args, **kwargs):
     with torch.no_grad():
         self.generator.eval()
         z = utils.sample_z(self.batch, self.latent, self.device,
                            self.gen_distribution)
         pseudo_y = utils.sample_pseudo_labels(num_classes, self.batch,
                                               self.device)
         fake_img = self.generator(z, pseudo_y)
     return fake_img
Пример #6
0
def sample_from_gen(args, device, num_classes, gen):
    z = utils.sample_z(args.batch_size, args.gen_dim_z, device,
                       args.gen_distribution)
    if args.cGAN:
        pseudo_y = utils.sample_pseudo_labels(num_classes, args.batch_size,
                                              device)
    else:
        pseudo_y = None

    fake = gen(z, pseudo_y)
    return fake, pseudo_y, z
    def build_model(self):
        with tf.device('/gpu:%d' % self.gpu_id):
            ### Placeholder ###
            self.X = tf.placeholder(tf.float32, [None, self.input_dim])
            self.k = tf.placeholder(tf.int32)
            self.z = tf.placeholder(tf.float32, [None, self.z_dim])
            self.keep_prob = tf.placeholder(tf.float32)

            ### Encoding ###
            self.z_mu, self.z_logvar = self.encoder_to_distribution(self.X, self.enc_h_dim_list, self.z_dim, self.keep_prob)
            self.z_sampled = sample_z(self.z_mu, self.z_logvar)

            self.kl_loss = kl_divergence_normal_distribution(self.z_mu, self.z_logvar)

            ### Decoding ###
            self.recon_X_logit = self.decoder(self.z_sampled, self.dec_h_dim_list, self.input_dim, self.keep_prob, False)
            self.recon_X = tf.nn.sigmoid(self.recon_X_logit)
            self.output = tf.nn.tanh(self.recon_X_logit)

            self.recon_loss = self.recon_loss() 

            ### Generating ###
            self.gen_X_logit = self.decoder(self.z, self.dec_h_dim_list, self.input_dim, self.keep_prob, True)
            self.gen_X = tf.nn.sigmoid(self.gen_X_logit)

            ### Discriminating ###
            dis_logit_real = self.discriminator(self.X, self.dis_h_dim_list, 1, self.keep_prob, False)
            dis_logit_fake = self.discriminator(self.gen_X, self.dis_h_dim_list, 1, self.keep_prob, True)

            self.dec_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_fake, labels=tf.ones_like(dis_logit_fake))) 

            self.dis_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_real, labels=tf.ones_like(dis_logit_real))) 
            self.dis_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_fake, labels=tf.zeros_like(dis_logit_fake))) 

            # Improved part  
            dis_logit_recon = self.discriminator(self.recon_X, self.dis_h_dim_list, 1, self.keep_prob, True)
            self.dec_loss_recon = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_recon, labels=tf.ones_like(dis_logit_recon)))
            self.dis_loss_recon = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_recon, labels=tf.zeros_like(dis_logit_recon))) 
            
            ### Loss ###
            self.enc_loss = self.kl_loss + self.recon_loss 
            self.dec_loss = self.recon_loss + self.dec_loss_fake + self.dec_loss_recon
            self.dis_loss = self.dis_loss_real + self.dis_loss_fake + self.dis_loss_recon
            
            ### Theta ###
            enc_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='enc')
            dec_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='dec')
            dis_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='dis')

            ### Solver ###
            self.enc_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta)
            self.dec_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta)
            self.dis_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta)
Пример #8
0
    def updateR(self, sess, states, actions, rewards, states2, actions2,
                batch_size, latent_size):
        feed_dict = {
            self.cgan_reward.states: states,
            self.cgan_reward.actions: actions,
            self.cgan_reward.Z: sample_z(batch_size, latent_size),
            self.cgan_reward.X: rewards[..., np.newaxis]
        }
        _ = sess.run(self.opt_reward_model_D, feed_dict=feed_dict)

        feed_dict = {
            self.cgan_reward.states: states,
            self.cgan_reward.actions: actions,
            self.cgan_reward.Z: sample_z(batch_size, latent_size),
            self.cgan_reward.states2: states2,
            self.cgan_reward.actions2: actions2,
            self.cgan_reward.Z2: sample_z(batch_size, latent_size),
            self.cgan_state.states2: states2,
            self.cgan_state.actions2: actions2,
            self.cgan_state.Z2: sample_z(batch_size, latent_size),
            self.qnet.states2: states2,
            self.qnet.actions2: actions2
        }
        _ = sess.run(self.opt_reward_model_G, feed_dict=feed_dict)
Пример #9
0
    def updateS(self, sess, states, actions, states_, states2, actions2,
                batch_size, latent_size):
        feed_dict = {
            self.cgan_state.states: states,
            self.cgan_state.actions: actions,
            self.cgan_state.Z: sample_z(batch_size, latent_size),
            self.cgan_state.X: states_
        }
        _ = sess.run(self.opt_state_model_D, feed_dict=feed_dict)

        feed_dict = {
            self.cgan_state.states: states,
            self.cgan_state.actions: actions,
            self.cgan_state.Z: sample_z(batch_size, latent_size),
            self.cgan_reward.states2: states2,
            self.cgan_reward.actions2: actions2,
            self.cgan_reward.Z2: sample_z(batch_size, latent_size),
            self.cgan_state.states2: states2,
            self.cgan_state.actions2: actions2,
            self.cgan_state.Z2: sample_z(batch_size, latent_size),
            self.qnet.states2: states2,
            self.qnet.actions2: actions2
        }
        _ = sess.run(self.opt_state_model_G, feed_dict=feed_dict)
Пример #10
0
def main():
    args = parse_arguments()
    with open(args.config, 'r') as f:
        config = yaml.safe_load(f)
        config = OmegaConf.create(f)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    labels = [int(l) for l in args.labels.split(',')]

    generator = instantiate(config.generator)
    generator.load_state_dict(
        torch.load(config.resume_checkpoint)['g_model_dict'])

    for y in range(labels):
        y = torch.tensor((y), device=device)
        z = sample_z(generator.z_dim, 1, device)
        x_fake = generator(z, y)

        show_tensor_images(x_fake)
Пример #11
0
def compute_loss(batch, grid, mask, z_params_full, z_params_masked, h, w,
                 decoder):
    ## compute loss
    z_full = sample_z(z_params_full)  # size bsize * hidden
    z_full = z_full.unsqueeze(1).expand(-1, h * w, -1)

    # resize context to have one context per input coordinate
    grid_input = grid.view(1, h * w, -1).expand(batch.size(0), -1, -1)
    target_input = torch.cat([z_full, grid_input], dim=-1)

    reconstructed_image_mean, reconstructed_image_variance = decoder(
        target_input)  # bsize,h*w,1
    reconstruction_loss = -(
        log_normal(x=batch.view(batch.size(0), 3, h * w).transpose(1, 2),
                   m=reconstructed_image_mean,
                   v=reconstructed_image_variance) *
        (1 - mask.view(-1, h * w))).sum(dim=1).mean()

    kl_loss = kl_normal(z_params_full, z_params_masked).mean()
    return reconstruction_loss, kl_loss, reconstructed_image_mean, reconstructed_image_variance
def compute_loss(batch, grid, mask, z_params_full, z_params_masked, h, w,
                 decoder):
    ## compute loss
    z_full = sample_z(z_params_full)  # size bsize * hidden
    z_full = z_full.unsqueeze(1).expand(-1, h * w, -1)

    # resize context to have one context per input coordinate
    grid_input = grid.view(1, h * w, -1).expand(batch.size(0), -1, -1)
    target_input = torch.cat([z_full, grid_input], dim=-1)

    reconstructed_image = decoder(target_input)  # bsize,h*w,1

    reconstruction_loss = (
        F.binary_cross_entropy(reconstructed_image,
                               batch.view(batch.size(0), h * w, 1),
                               reduction='none') *
        (1 - mask.view(-1, h * w, 1))).sum(dim=1).mean()

    kl_loss = kl_normal(z_params_full, z_params_masked).mean()

    return reconstruction_loss, kl_loss, reconstructed_image
Пример #13
0
    for i in range(num_epoch):

        seed = seed + 1
        mini = 0

        minibatches = random_mini_batches(toydata.T,
                                          mini_batch_size=mb_size,
                                          seed=seed)

        for minibatch in minibatches:
            mini = mini + 1
            X_mb = minibatch.T
            count = count + 1

            # Train auto-encoders
            z_mb = sample_z(X_mb.shape[0], z_dim)
            sess.run([R_solver], feed_dict={X: X_mb, z: z_mb})

            # Train disciminator
            for k in range(n_critics):
                z_mb_critics = sample_z(X_mb.shape[0], z_dim)
                X_mb_critics = random_batches(toydata, X_mb.shape[0])
                sess.run([D_solver],
                         feed_dict={
                             X: X_mb_critics,
                             z: z_mb_critics
                         })

            # Train generator
            z_mb = sample_z(X_mb.shape[0], z_dim)
            sess.run([G_solver], feed_dict={X: X_mb, z: z_mb})
Пример #14
0
    def train(self, sess, states_B, actions_B, rewards_B, states1_B, dones_B,
              states_M, actions_M, batch_size, latent_size):
        dones_B = dones_B.astype(np.float64)
        #Joint dictionary
        feed_dict_joint = {
            self.cgan_state.states2: states_M,
            self.cgan_state.actions2: actions_M,
            self.cgan_state.Z2: sample_z(batch_size, latent_size),
            self.cgan_reward.states2: states_M,
            self.cgan_reward.actions2: actions_M,
            self.cgan_reward.Z2: sample_z(batch_size, latent_size)
        }

        #Update critic
        #Dict for critic
        feed_dict_critic = {
            self.states: states_B,
            self.actions: actions_B,
            self.rewards: rewards_B,
            self.states1: states1_B,
            self.dones: dones_B
        }

        _, l_Q = sess.run([self.opt_Q, self.loss_Q],
                          feed_dict=merge_two_dicts(feed_dict_joint,
                                                    feed_dict_critic))

        #Get another Z sample
        feed_dict_joint[self.cgan_state.Z2] = sample_z(batch_size, latent_size)
        feed_dict_joint[self.cgan_reward.Z2] = sample_z(
            batch_size, latent_size)

        #Update actor
        _ = sess.run(self.opt_actor, feed_dict={self.states: states_B})

        #Update state model (discriminator)
        feed_dict_state_model = {
            self.cgan_state.states: states_B,
            self.cgan_state.actions: actions_B,
            self.cgan_state.Z: sample_z(batch_size, latent_size),
            self.cgan_state.X: states1_B
        }
        _ = sess.run(self.opt_state_model_D, feed_dict=feed_dict_state_model)

        #Update state model (generator)
        feed_dict_state_model.pop(self.cgan_state.X)
        feed_dict_state_model[self.cgan_state.Z] = sample_z(
            batch_size, latent_size)

        _ = sess.run(self.opt_state_model_G,
                     feed_dict=merge_two_dicts(feed_dict_joint,
                                               feed_dict_state_model))

        #Get another Z sample
        feed_dict_joint[self.cgan_state.Z2] = sample_z(batch_size, latent_size)
        feed_dict_joint[self.cgan_reward.Z2] = sample_z(
            batch_size, latent_size)

        #Update reward model (discriminator)
        feed_dict_reward_model = {
            self.cgan_reward.states: states_B,
            self.cgan_reward.actions: actions_B,
            self.cgan_reward.Z: sample_z(batch_size, latent_size),
            self.cgan_reward.X: rewards_B[..., np.newaxis]
        }
        _ = sess.run(self.opt_reward_model_D, feed_dict=feed_dict_reward_model)

        #Update reward model (generator)
        feed_dict_reward_model.pop(self.cgan_reward.X)
        feed_dict_reward_model[self.cgan_reward.Z] = sample_z(
            batch_size, latent_size)

        _ = sess.run(self.opt_reward_model_G,
                     feed_dict=merge_two_dicts(feed_dict_joint,
                                               feed_dict_reward_model))
Пример #15
0
    loss_Z = 0
    for X, _ in train_dataloader:
        X = select_white_line_images(X, proba_white_line)

        # put a mask on images
        input_masked = X.to(device) * mask

        # Freeze H network
        freeze(net_H)
        H, skip_connect_layers = net_H(input_masked)

        # freeze G network
        freeze(net_G)

        # generate init z
        z_t = sample_z(X.shape[0], z_size=10)
        z_t.requires_grad = True

        for _ in range(STEPS):
            ###########################
            # ##### updating z  ##### #
            ###########################
            z = z_t.clone().detach()

            X_hat = net_G(H,
                          z_t.permute(0, 2, 1).unsqueeze(3),
                          skip_connect_layers)
            loss = criterion(X_hat * ~mask, X.to(device) * ~mask)
            loss.backward()

            with torch.no_grad():
Пример #16
0
def train(dataloaders,
          models,
          optimizers,
          train_config,
          device,
          start_epoch=0):
    ''' Train function for BigGAN '''
    # unpack modules
    train_dataloader, val_dataloader = dataloaders
    generator, discriminator = models
    g_optimizer, d_optimizer = optimizers

    log_dir = os.path.join(train_config.log_dir,
                           datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    os.makedirs(log_dir, mode=0o775, exist_ok=False)

    loss = BigGANLoss(device=device)

    for epoch in range(start_epoch, train_config.epochs):

        # training epoch
        epoch_steps = 0
        mean_g_loss = 0.0
        mean_d_loss = 0.0
        generator.train()
        discriminator.train()
        pbar = tqdm(train_dataloader,
                    position=0,
                    desc='train [G loss: -.-----][D loss: -.-----]')
        for (x, y) in pbar:
            x = x.to(device)
            y = y.to(device)
            z = sample_z(generator.z_dim, x.shape[0], device)

            with torch.cuda.amp.autocast(enabled=(device == 'cuda')):
                g_loss, d_loss, x_fake = loss(generator, discriminator, x, y,
                                              z)

            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()

            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()

            mean_g_loss += g_loss.item()
            mean_d_loss += d_loss.item()
            epoch_steps += 1
            pbar.set_description(
                desc=
                f'train [G loss: {mean_g_loss/epoch_steps:.5f}][D loss: {mean_d_loss/epoch_steps:.5f}]'
            )

        if epoch + 1 % train_config.save_every == 0:
            print(f'Epoch {epoch}: saving checkpoint')
            torch.save(
                {
                    'g_model_dict': generator.state_dict(),
                    'd_model_dict': discriminator.state_dict(),
                    'g_optim_dict': g_optimizer.state_dict(),
                    'd_optim_dict': d_optimizer.state_dict(),
                    'epoch': epoch,
                }, os.path.join(log_dir, f'epoch={epoch}.pt'))

        # validation epoch
        epoch_steps = 0
        mean_g_loss = 0.0
        mean_d_loss = 0.0
        generator.eval()
        discriminator.eval()
        pbar = tqdm(val_dataloader,
                    position=0,
                    desc='val [G loss: -.-----][D loss: -.-----]')
        for (x, y) in pbar:
            x = x.to(device)
            y = y.to(device)
            z = sample_z(generator.z_dim, x.shape[0], device)

            with torch.no_grad():
                with torch.cuda.amp.autocast(enabled=(device == 'cuda')):
                    g_loss, d_loss, x_fake = loss(generator, discriminator, x,
                                                  y, z)

            mean_g_loss += g_loss.item()
            mean_d_loss += d_loss.item()
            epoch_steps += 1
            pbar.set_description(
                desc=
                f'val [G loss: {mean_g_loss/epoch_steps:.5f}][D loss: {mean_d_loss/epoch_steps:.5f}]'
            )
Пример #17
0
for i in range(steps):

  ob = obs[i:i+1] # (1, 64, 64, 1)
  action = oh_actions[i:i+1] # (1, n)

  z = vae.encode(ob) # (1, 32) VAE done!
  rnn_z = np.expand_dims(z, axis=0) # (1, 1, 32)
  action = np.expand_dims(action, axis=0) # (1, 1, n)


  input_x = np.concatenate([rnn_z, action], axis=2) # (1, 1, 32+n)
  feed = {rnn.input_x: input_x, rnn.initial_state: state} # predict the next state and next z.

  if pz is not None: # decode from the z
    frame = vae.decode(pz[None])
    frame2 = vae.decode(z)
    #neglogp = neg_likelihood(logmix, mean, logstd, z.reshape(32,1))
    #imsave(output_dir + '/%s_origin_%.2f.png' % (pad_num(i), np.exp(-neglogp)), 255.*ob.reshape(64, 64))
    #imsave(output_dir + '/%s_reconstruct.png' % pad_num(i), 255. * frame[0].reshape(64, 64))
    img = concat_img(255.*ob, 255*frame2, 255.*frame)
    imsave(output_dir + '/%s.png' % pad_num(i), img)

  (logmix, mean, logstd, state) = rnn.sess.run([rnn.out_logmix, rnn.out_mean,
                                                rnn.out_logstd, rnn.final_state], feed)



  # Sample the next frame's state.
  pz = sample_z(logmix, mean, logstd, OUTWIDTH, T)
if __name__ == "__main__":
    # Create the models
    net_H = Net_H().to(device)
    net_Z = Net_Z().to(device)
    net_G = Net_G().to(device)

    # Initialize models weights
    net_H.apply(weights_init)
    net_Z.apply(weights_init)
    net_G.apply(weights_init)

    # Print the models
    print(net_H)
    print(net_Z)
    print(net_G)
    """Test outputs size"""
    z = torch.zeros(10, 1, 10).to(device)
    X = torch.zeros(10, 1, 32, 32).to(device)
    with torch.no_grad():
        h, skip_connect_layers = net_H(X)
        z = net_Z(h.squeeze(3).permute(0, 2, 1), z)
        output = net_G(h, z[:, :, :10].permute(0, 2, 1).unsqueeze(3),
                       skip_connect_layers)

    print(h.shape)  # expected: [10, 4000, 1, 1]
    print(z.shape)  # expected: [10, 1, 10]
    print(output.shape)  # expected: [10, 1, 32, 32]
    """Test vector z shape"""
    print(sample_z(10, z_size=10).size())  # expected size [10,1,10]
Пример #19
0
 def train_G(self, batch_size, loop=1):
     for i in range(loop):
         _, loss = self.sess.run(
             [self.G_solver, self.G_loss],
             feed_dict={self.zs: sample_z(batch_size, self.z_dim)})
     return loss
Пример #20
0
def main():
    # set GPU card
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # load anime face
    data_dir = '../anime_face/data_64/images/'
    data_extra_dir = '../anime_face/extra_data/images/'
    ds = dataset()
    ds.load_data(data_dir, verbose=0)
    ds.load_data(data_extra_dir, verbose=0)
    ds.shuffle()

    # reset graph
    tf.reset_default_graph()

    # set session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # build model
    model = GAN(sess, gf_dim=128)

    # training
    z_plot = sample_z(36, 100)

    # initial fake image
    z = sample_z((bs), 100)
    i = 1
    while True:
        if (i == 1) or (i <= 100
                        and i % 20 == 0) or (i <= 200 and i % 50 == 0) or (
                            i <= 1000 and i % 100 == 0) or (i % 200 == 0):
            g_samples = model.generate(z_plot)
            plot_samples(g_samples,
                         save=True,
                         filename=str(i),
                         folder_path='out2/',
                         h=6,
                         w=6)

        # train discriminator more
        for _ in range(5):
            real_img = ds.next_batch(bs)
            z = sample_z(bs, 100)
            fake_img = model.generate(z)
            # train D
            D_loss = model.train_D(real_img, fake_img)

        G_loss = model.train_G(bs)

        if (i % 100) == 0:
            model.save(model_name='WGAN_v2')
            z_loss = sample_z(64, 100)
            g_loss = model.generate(sample_z(32, 100))
            g, d = model.sess.run([model.G_loss, model.D_loss],
                                  feed_dict={
                                      model.xs: ds.random_sample(32),
                                      model.gs: g_loss,
                                      model.zs: z_loss
                                  })
            print(str(i) + ' iteration:')
            print('D_loss:', d)
            print('G_loss:', g, '\n')

        i = i + 1
Пример #21
0
def train(args):
    random.seed(8722)
    torch.manual_seed(4565)
    measure_history = deque([0]*3000, 3000)
    convergence_history = []
    prev_measure = 1
    iter = 0
    thresh = 0.5
    
    graph = bn.create_bayes_net()
    bce_loss = torch.nn.BCEWithLogitsLoss()
    lr = args.lr
    iters = args.load_step
    prepare_paths(args)
    u_dist = utils.create_uniform(-1, 1)
    fixed_z = utils.sample_z(u_dist, (args.batch_size, args.z))
    (netG, optimG), (netD, optimD), (netL, optimL) = init_models(args)
    pretrain_labeler(args, netL, optimL)
    data_loader = datagen.load_celeba_50k_attrs(args)
    for epoch in range(args.epochs):
        for i, (data, _, attrs) in enumerate(data_loader):
            data = data.cuda()
            attrs = torch.squeeze(attrs>0).float().cuda()
            """ Labeler """
            netL.zero_grad()
            real_labels = netL(data)
            real_label_loss = bce_loss(real_labels, attrs).mean()
            real_label_loss.backward()
            optimL.step()
            """ Discriminator """
            for p in netD.parameters():
                p.requires_grad = True
            z = utils.sample_z(u_dist, (args.batch_size, args.z))       
            marginals = rand_marginals(args, graph)
            netD.zero_grad()
            with torch.no_grad():
                g_fake = netG(z, marginals)
            _, d_fake = netD(g_fake, marginals)
            _, d_real = netD(data, attrs)
          
            real_loss_d = (d_real - data).abs().mean()
            fake_loss_d = (d_fake - g_fake).abs().mean()

            lossD = real_loss_d - args.k * fake_loss_d
            lossD.backward()
            optimD.step()
            """ Generator """
            for p in netD.parameters():
                p.requires_grad = False
            marginals = rand_marginals(args, graph)
            netG.zero_grad()
            netL.zero_grad()
            z = utils.sample_z(u_dist, (args.batch_size, args.z))       
            g_fake = netG(z, marginals)
            _, d_fake = netD(g_fake, marginals)
            lossG = (d_fake - g_fake).abs().mean()
            
            ## Ok lets penalize distance from marginals (labeler)
            with torch.no_grad():
                marginals = rand_marginals(args, graph)
                z = utils.sample_z(u_dist, (args.batch_size, args.z))       
                g_fake = netG(z, marginals)
            fake_labels = netL(g_fake)
            fake_label_loss = bce_loss(fake_labels, marginals).mean()
            loss_gac = fake_label_loss + lossG
            loss_gac.backward()
            optimG.step(); optimL.step()

            lagrangian = (args.gamma*real_loss_d - fake_loss_d).detach()
            args.k += args.lambda_k * lagrangian
            args.k = max(min(1, args.k), 0)
        
            convg_measure = real_loss_d.item() + lagrangian.abs()
            measure_history.append(convg_measure)
            if iter % args.print_step == 0:
                print ("Iter: {}, D loss: {}, G Loss: {}, AC loss: {}".format(iter,
                       lossD.item(), lossG.item(), fake_label_loss.item()))
                save_images(args, g_fake.detach(), d_real.detach(), iter)
           
            """update training parameters"""
            lr = args.lr * 0.95 ** (iter//3000)
 
            for p in optimG.param_groups + optimD.param_groups:
                p['lr'] = lr

            if iter % 1000 == 0:
                pathG = 'experiments/{}/models/netG_{}.pt'.format(args.name, iter)
                pathD = 'experiments/{}/models/netD_{}.pt'.format(args.name, iter)
                utils.save_model(pathG, netG, optimG, args.k)
                utils.save_model(pathD, netD, optimD, args.k)
            iter += 1
Пример #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--env-interface", type=str, default='gym!atari')
    parser.add_argument("--environment", type=str, default='CartPole-v0')
    parser.add_argument("--action-size", type=int, default=2)
    parser.add_argument("--input-shape", type=list, default=[None, 4])
    parser.add_argument("--target-update-freq", type=int, default=200)
    parser.add_argument("--epsilon-max", type=float, default=1.)
    parser.add_argument("--epsilon-min", type=float, default=.01)
    parser.add_argument("--epsilon-decay", type=float, default=.001)

    parser.add_argument("--learning-rate", type=float, default=.99)
    parser.add_argument("--batch-size", type=int, default=64)
    parser.add_argument("--epochs", type=int, default=30000)

    parser.add_argument("--replay-mem-size", type=int, default=1000000)

    parser.add_argument("--K",
                        type=int,
                        default=1,
                        help='The number of steps to train the environment')
    parser.add_argument(
        "--L",
        type=int,
        default=1,
        help='The number of Q-learning steps for hypothetical rollouts')
    parser.add_argument("--latent-size",
                        type=int,
                        default=4,
                        help='Size of vector for Z')

    args = parser.parse_args()

    env = env_interface(args.env_interface,
                        args.environment,
                        pixel_feature=False,
                        render=True)

    #args.action_size = env.action_space.n
    args.action_size = env.action_size
    args.input_shape = [None] + list(env.obs_space_shape)

    print args

    # Other parameters
    epsilon = args.epsilon_max

    # Replay memory
    memory = Memory(args.replay_mem_size)

    # Time step
    time_step = 0.

    # Initialize the GANs
    cgan_state = CGAN(input_shape=args.input_shape,
                      action_size=args.action_size,
                      latent_size=args.latent_size,
                      gen_input_shape=args.input_shape)
    cgan_reward = CGAN(input_shape=args.input_shape,
                       action_size=args.action_size,
                       latent_size=args.latent_size,
                       gen_input_shape=[None, 1])

    qnet = qnetwork(input_shape=args.input_shape,
                    action_size=args.action_size,
                    scope='qnet')
    target_qnet = qnetwork(input_shape=args.input_shape,
                           action_size=args.action_size,
                           scope='target_qnet')
    update_ops = update_target_graph('qnet', 'target_qnet')

    rand_no = np.random.rand()
    #env = gym.wrappers.Monitor(env, '/tmp/cartpole-experiment-' + str(rand_no), force=True, video_callable=False)
    init = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(args.epochs):
            total_reward = 0
            observation = env.reset()
            for t in range(1000000):
                #env.render()
                action = qnet.get_action(sess, observation)
                if np.random.rand() < epsilon:
                    #action = env.action_space.sample()
                    action = np.random.randint(args.action_size)
                observation1, reward, done, info = env.step(action)
                total_reward += reward

                # Add to memory
                memory.add([observation, action, reward, observation1, done])

                # Reduce epsilon
                time_step += 1.
                epsilon = args.epsilon_min + (
                    args.epsilon_max - args.epsilon_min) * np.exp(
                        -args.epsilon_decay * time_step)

                # Training step
                batch = np.array(memory.sample(args.batch_size))
                qnet.train(sess, batch, args.learning_rate, target_qnet)

                # Training step: environment model
                for k in range(args.K):
                    batch = np.array(memory.sample(args.batch_size))

                    states = np.vstack(batch[:, 0])
                    actions = np.array(batch[:, 1])
                    rewards = batch[:, 2]
                    states1 = np.vstack(batch[:, 3])

                    _, D_loss_state = sess.run(
                        [cgan_state.D_solver, cgan_state.D_loss],
                        feed_dict={
                            cgan_state.states: states,
                            cgan_state.actions: actions,
                            cgan_state.Z: sample_z(len(batch),
                                                   args.latent_size),
                            cgan_state.X: states1
                        })
                    _, G_loss_state = sess.run(
                        [cgan_state.G_solver, cgan_state.G_loss],
                        feed_dict={
                            cgan_state.states: states,
                            cgan_state.actions: actions,
                            cgan_state.Z: sample_z(len(batch),
                                                   args.latent_size)
                        })

                    _, D_loss_reward = sess.run(
                        [cgan_reward.D_solver, cgan_reward.D_loss],
                        feed_dict={
                            cgan_reward.states: states,
                            cgan_reward.actions: actions,
                            cgan_reward.Z: sample_z(len(batch),
                                                    args.latent_size),
                            cgan_reward.X: rewards[..., np.newaxis]
                        })
                    _, G_loss_reward = sess.run(
                        [cgan_reward.G_solver, cgan_reward.G_loss],
                        feed_dict={
                            cgan_reward.states: states,
                            cgan_reward.actions: actions,
                            cgan_reward.Z: sample_z(len(batch),
                                                    args.latent_size)
                        })
                    #print D_loss_state, G_loss_state, D_loss_reward, G_loss_state

                # Training step: imagination rollouts
                if time_step == 0.:
                    print "time_step 0 here"
                if time_step >= 0.:
                    for l in range(args.L):
                        batch = np.array(memory.sample(args.batch_size))
                        assert len(batch) > 0

                        states1 = np.vstack(batch[:, 3])
                        actions = np.random.randint(args.action_size,
                                                    size=len(batch))
                        dones = np.array([False] * len(batch))

                        G_sample_state = sess.run(cgan_state.G_sample,
                                                  feed_dict={
                                                      cgan_state.states:
                                                      states1,
                                                      cgan_state.actions:
                                                      actions,
                                                      cgan_state.Z:
                                                      sample_z(
                                                          len(batch),
                                                          args.latent_size)
                                                  })
                        G_sample_reward = sess.run(cgan_reward.G_sample,
                                                   feed_dict={
                                                       cgan_reward.states:
                                                       states1,
                                                       cgan_reward.actions:
                                                       actions,
                                                       cgan_reward.Z:
                                                       sample_z(
                                                           len(batch),
                                                           args.latent_size)
                                                   })
                        qnet.train(sess, None, args.learning_rate, target_qnet,
                                   states1, actions, G_sample_reward,
                                   G_sample_state, dones)

                # Set observation
                observation = observation1

                # Update?
                if int(time_step) % args.target_update_freq == 0:
                    #print "Updating target..."
                    sess.run(update_ops)

                if done:
                    print "Episode finished after {} timesteps".format(
                        t + 1), 'epoch', epoch, 'total_rewards', total_reward
                    break
Пример #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='Pendulum-v0')
    parser.add_argument("--action-dim", type=int, default=1)
    parser.add_argument("--state-dim", type=int, default=1)
    parser.add_argument("--input-shape", type=list, default=[None, 1])
    parser.add_argument("--epochs", type=int, default=30000)
    parser.add_argument('--tau',
                        help='soft target update parameter',
                        default=0.001)
    parser.add_argument("--action-bound", type=float, default=1.)
    parser.add_argument("--replay-mem-size", type=int, default=1000000)
    parser.add_argument("--batch-size", type=int, default=64)
    parser.add_argument("--gamma", type=float, default=.99)

    parser.add_argument("--K",
                        type=int,
                        default=1,
                        help='The number of steps to train the environment')
    parser.add_argument(
        "--L",
        type=int,
        default=1,
        help='The number of Q-learning steps for hypothetical rollouts')
    parser.add_argument("--latent-size",
                        type=int,
                        default=4,
                        help='Size of vector for Z')

    args = parser.parse_args()

    # Initialize environment
    env = gym.make(args.environment)
    args.state_dim = env.observation_space.shape[0]
    args.input_shape = [None, args.state_dim]
    args.action_dim = env.action_space.shape[0]
    #assert args.action_dim == 1
    args.action_bound = env.action_space.high
    print(args)

    # Networks
    actor_source = actor(state_shape=[None, args.state_dim],\
        action_shape=[None, args.action_dim],\
        output_bound=args.action_bound[0],\
        scope='actor_source')
    critic_source = critic(state_shape=[None, args.state_dim],\
        action_shape=[None, args.action_dim],\
        scope='critic_source')
    actor_target = actor(state_shape=[None, args.state_dim],\
        action_shape=[None, args.action_dim],\
        output_bound=args.action_bound[0],\
        scope='actor_target')
    critic_target = critic(state_shape=[None, args.state_dim],\
        action_shape=[None, args.action_dim],\
        scope='critic_target')

    # Initialize the GANs
    cgan_state = CGAN(input_shape=args.input_shape,\
        action_size=args.action_dim,\
        latent_size=args.latent_size,\
        gen_input_shape=args.input_shape,\
        continuous_action=True)
    cgan_reward = CGAN(input_shape=args.input_shape,\
        action_size=args.action_dim,\
        latent_size=args.latent_size,\
        gen_input_shape=[None, 1],\
        continuous_action=True)

    # Update and copy operators
    update_target_actor = update_target_graph2('actor_source', 'actor_target',
                                               args.tau)
    update_target_critic = update_target_graph2('critic_source',
                                                'critic_target', args.tau)

    copy_target_actor = update_target_graph2('actor_source', 'actor_target',
                                             1.)
    copy_target_critic = update_target_graph2('critic_source', 'critic_target',
                                              1.)

    # Replay memory
    memory = Memory(args.replay_mem_size)

    # Actor noise
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(args.action_dim))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(copy_target_critic)
        sess.run(copy_target_actor)

        for epoch in range(args.epochs):
            state = env.reset()
            total_rewards = 0.0
            while True:
                #env.render()
                # Choose an action
                action = sess.run(
                    actor_source.action,
                    feed_dict={actor_source.states: state[np.newaxis, ...]
                               })[0] + actor_noise()
                # Execute action
                state1, reward, done, _ = env.step(action)
                total_rewards += float(reward)
                # Store tuple in replay memory
                memory.add([state[np.newaxis, ...],\
                    action[np.newaxis, ...],\
                    reward,\
                    state1[np.newaxis, ...],\
                    done])

                # Training step: update actor critic using real experience
                batch = np.array(memory.sample(args.batch_size))
                assert len(batch) > 0
                states = np.concatenate(batch[:, 0], axis=0)
                actions = np.concatenate(batch[:, 1], axis=0)
                rewards = batch[:, 2]
                states1 = np.concatenate(batch[:, 3], axis=0)
                dones = batch[:, 4]

                # Update the critic
                actions1 = sess.run(actor_target.action,\
                    feed_dict={actor_target.states:states1})
                targetQ = np.squeeze(sess.run(critic_target.Q,\
                    feed_dict={critic_target.states:states1,\
                        critic_target.actions:actions1}), axis=-1)
                targetQ = rewards + (
                    1. - dones.astype(np.float32)) * args.gamma * targetQ
                targetQ = targetQ[..., np.newaxis]
                _, critic_loss = sess.run([critic_source.critic_solver,\
                    critic_source.loss],\
                    feed_dict={critic_source.states:states,\
                        critic_source.actions:actions,\
                        critic_source.targetQ:targetQ})

                # Update the actor
                critic_grads = sess.run(critic_source.grads,\
                    feed_dict={critic_source.states:states,\
                        critic_source.actions:actions})[0]# Grab gradients from critic
                _ = sess.run(actor_source.opt,\
                    feed_dict={actor_source.states:states,\
                        actor_source.dQ_by_da:critic_grads})

                # Update target networks
                sess.run(update_target_critic)
                sess.run(update_target_actor)

                # Training step: update the environment model using real experience (i.e., update the conditional GANs)
                for k in range(args.K):
                    batch = np.array(memory.sample(args.batch_size))

                    states = np.concatenate(batch[:, 0], axis=0)
                    actions = np.concatenate(batch[:, 1], axis=0)
                    rewards = batch[:, 2]
                    states1 = np.concatenate(batch[:, 3], axis=0)

                    _, D_loss_state = sess.run([cgan_state.D_solver, cgan_state.D_loss],\
                        feed_dict={cgan_state.states:states,\
                            cgan_state.actions:actions,\
                            cgan_state.Z:sample_z(len(batch),\
                            args.latent_size),\
                            cgan_state.X:states1})

                    _, G_loss_state = sess.run([cgan_state.G_solver,\
                        cgan_state.G_loss],\
                        feed_dict={cgan_state.states:states,\
                            cgan_state.actions:actions,\
                            cgan_state.Z:sample_z(len(batch),\
                            args.latent_size)})

                    _, D_loss_reward = sess.run([cgan_reward.D_solver,\
                        cgan_reward.D_loss],\
                        feed_dict={cgan_reward.states:states,\
                            cgan_reward.actions:actions,\
                            cgan_reward.Z:sample_z(len(batch),\
                            args.latent_size),\
                            cgan_reward.X:rewards[..., np.newaxis]})

                    _, G_loss_reward = sess.run([cgan_reward.G_solver,\
                        cgan_reward.G_loss],\
                        feed_dict={cgan_reward.states:states,\
                            cgan_reward.actions:actions,\
                            cgan_reward.Z:sample_z(len(batch),\
                            args.latent_size)})
                    #print D_loss_state, G_loss_state, D_loss_reward, G_loss_state

                # Training step: update actor critic using imagination rollouts
                for l in range(args.L):
                    batch = np.array(memory.sample(args.batch_size))
                    states_ = np.concatenate(batch[:, 3], axis=0)
                    actions = np.random.uniform(env.action_space.low[0],\
                        env.action_space.high[0],\
                        size=(len(batch),\
                        env.action_space.shape[0]))
                    dones = np.array([False] * len(batch))

                    G_sample_state = sess.run(cgan_state.G_sample,\
                        feed_dict={cgan_state.states:states_,\
                            cgan_state.actions:actions,\
                            cgan_state.Z:sample_z(len(batch),\
                            args.latent_size)})
                    G_sample_reward = sess.run(cgan_reward.G_sample,\
                        feed_dict={cgan_reward.states:states_,\
                            cgan_reward.actions:actions,\
                            cgan_reward.Z:sample_z(len(batch),\
                            args.latent_size)})
                    G_sample_reward = np.squeeze(G_sample_reward, axis=-1)

                    # Update the critic
                    actions1 = sess.run(actor_target.action,\
                        feed_dict={actor_target.states:G_sample_state})
                    targetQ = np.squeeze(sess.run(critic_target.Q,\
                        feed_dict={critic_target.states:G_sample_state,\
                            critic_target.actions:actions1}), axis=-1)
                    targetQ = G_sample_reward + (
                        1. - dones.astype(np.float32)) * args.gamma * targetQ
                    targetQ = targetQ[..., np.newaxis]
                    _, critic_loss = sess.run([critic_source.critic_solver,\
                        critic_source.loss],\
                        feed_dict={critic_source.states:states_,\
                            critic_source.actions:actions,\
                            critic_source.targetQ:targetQ})

                    # Update the actor
                    critic_grads = sess.run(critic_source.grads,\
                        feed_dict={critic_source.states:states_,\
                            critic_source.actions:actions})[0]# Grab gradients from critic
                    _ = sess.run(actor_source.opt,\
                        feed_dict={actor_source.states:states_,\
                            actor_source.dQ_by_da:critic_grads})

                    # Update target networks
                    sess.run(update_target_critic)
                    sess.run(update_target_actor)

                state = np.copy(state1)
                if done == True:
                    print 'epoch', epoch, 'total rewards', total_rewards
                    break
Пример #24
0
    def build_model(self):
        with tf.device('/gpu:%d' % self.gpu_id):
            self.X = tf.placeholder(tf.float32, [None, self.input_dim])
            self.k = tf.placeholder(tf.int32)
            self.keep_prob = tf.placeholder(tf.float32)

            ### Encoding ###
            self.z_mu, self.z_logvar = self.encoder(self.X,
                                                    self.enc_h_dim_list,
                                                    self.z_dim, self.keep_prob)
            self.z = sample_z(self.z_mu, self.z_logvar)

            ### Decoding/Generating ###
            self.recon_X_logit = self.decoder(self.z, self.dec_h_dim_list,
                                              self.input_dim, self.keep_prob,
                                              False)
            gen_X_logit = self.decoder(tf.random_normal(tf.shape(self.z)),
                                       self.dec_h_dim_list, self.input_dim,
                                       self.keep_prob, True)
            self.recon_X = tf.nn.sigmoid(self.recon_X_logit)
            gen_X = tf.nn.sigmoid(gen_X_logit)

            ### Discriminating ###
            dis_logit_real, dis_prob_real = self.discriminator(
                self.X, self.dis_h_dim_list, 1, self.keep_prob, False)
            #dis_logit_real, dis_prob_real = self.discriminator(self.recon_X, self.dis_h_dim_list, 1, self.keep_prob, False)
            dis_logit_fake, dis_prob_fake = self.discriminator(
                gen_X, self.dis_h_dim_list, 1, self.keep_prob, True)

            self.logger.info([x.name for x in tf.global_variables()])
            print([x.name for x in tf.global_variables() if 'enc' in x.name])
            print([x.name for x in tf.global_variables() if 'dec' in x.name])
            print([x.name for x in tf.global_variables() if 'dis' in x.name])

            enc_theta = ([x for x in tf.global_variables() if 'enc' in x.name])
            dec_theta = ([x for x in tf.global_variables() if 'dec' in x.name])
            dis_theta = ([x for x in tf.global_variables() if 'dis' in x.name])

            #cost = tf.reduce_mean(tf.square(X-output))
            ### Loss ###
            #self.recon_loss = tf.losses.mean_squared_error(self.X, self.recon_X)
            self.recon_loss = self.recon_loss()
            self.kl_loss = kl_divergence_normal_distribution(
                self.z_mu, self.z_logvar)

            self.dec_loss_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=dis_logit_fake,
                    labels=tf.ones_like(dis_logit_fake)))

            self.dis_loss_real = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=dis_logit_real,
                    labels=tf.ones_like(dis_logit_real)))
            self.dis_loss_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=dis_logit_fake,
                    labels=tf.zeros_like(dis_logit_fake)))

            self.enc_loss = self.recon_loss + self.kl_loss
            self.dec_loss = self.dec_loss_fake + self.recon_loss
            self.dis_loss = self.dis_loss_real + self.dis_loss_fake
            #cost_summary = tf.summary.scalar('cost', cost)

            #self.total_loss = self.enc_loss + self.dec_loss + self.dis_loss
            self.enc_solver = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.enc_loss, var_list=enc_theta)
            self.dec_solver = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.dec_loss, var_list=dec_theta)
            self.dis_solver = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.dis_loss, var_list=dis_theta)
            """
            self.enc_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta)
            self.dec_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta)
            self.dis_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta)
            """
            """
            self.enc_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta)
            self.dec_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta)
            self.dis_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta)
            """
            ### Recommendaiton metric ###
        with tf.device('/cpu:0'):
            self.top_k_op = tf.nn.top_k(self.recon_X, self.k)
def train(context_encoder,
          context_to_dist,
          decoder,
          aggregator,
          train_loader,
          test_loader,
          optimizer,
          n_epochs,
          device,
          save_path,
          summary_writer,
          save_every=10,
          h=28,
          w=28,
          log=1):
    context_encoder.train()
    decoder.train()
    grid = make_mesh_grid(h, w).to(device)  # size h,w,2

    for epoch in range(n_epochs):
        running_loss = 0.0
        last_log_time = time.time()

        # Training
        train_loss = 0.0
        for batch_idx, (batch, _) in enumerate(train_loader):
            batch = batch.to(device)
            if ((batch_idx % 100) == 0) and batch_idx > 1:
                print(
                    "epoch {} | batch {} | mean running loss {:.2f} | {:.2f} batch/s"
                    .format(epoch, batch_idx, running_loss / 100,
                            100 / (time.time() - last_log_time)))
                last_log_time = time.time()
                running_loss = 0.0

            mask = random_mask_uniform(bsize=batch.size(0),
                                       h=h,
                                       w=w,
                                       device=batch.device)
            z_params_full, z_params_masked = all_forward(
                batch, grid, mask, context_encoder, aggregator,
                context_to_dist)
            reconstruction_loss, kl_loss, reconstructed_image = compute_loss(
                batch, grid, mask, z_params_full, z_params_masked, h, w,
                decoder)
            loss = reconstruction_loss + kl_loss

            if batch_idx % 100 == 0:
                print("reconstruction {:.2f} | kl {:.2f}".format(
                    reconstruction_loss, kl_loss))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # add loss
            running_loss += loss.item()
            train_loss += loss.item()

        print("Epoch train loss : {}".format(train_loss / len(train_loader)))
        if summary_writer is not None:
            summary_writer.add_scalar("train/loss",
                                      train_loss / len(train_loader),
                                      global_step=epoch)
        if (epoch % save_every == 0) and log and epoch > 0:
            save_model(save_path, "NP_model_epoch_{}.pt".format(epoch),
                       context_encoder, context_to_dist, decoder, aggregator,
                       device)
        ## TEST
        test_loss = 0.0

        for batch_idx, (batch, _) in enumerate(test_loader):
            batch = batch.to(device)
            mask = random_mask_uniform(bsize=batch.size(0),
                                       h=h,
                                       w=w,
                                       device=batch.device)
            with torch.no_grad():
                z_params_full, z_params_masked = all_forward(
                    batch, grid, mask, context_encoder, aggregator,
                    context_to_dist)
                reconstruction_loss, kl_loss, reconstructed_image = compute_loss(
                    batch, grid, mask, z_params_full, z_params_masked, h, w,
                    decoder)
                loss = reconstruction_loss + kl_loss
                test_loss += loss.item()

        if summary_writer is not None:
            summary_writer.add_scalar("test/loss",
                                      test_loss / len(test_loader),
                                      global_step=epoch)
        print("TEST loss | epoch {} | {:.2f}".format(
            epoch, test_loss / len(test_loader)))

        # do examples

        example_batch, _ = next(iter(test_loader))
        example_batch = example_batch[:10].to(device)
        for n_pixels in [50, 150, 450]:
            mask = random_mask(example_batch.size(0),
                               h,
                               w,
                               n_pixels,
                               device=example_batch.device)
            z_params_full, z_params_masked = all_forward(
                example_batch, grid, mask, context_encoder, aggregator,
                context_to_dist)

            z_context = torch.cat([
                sample_z(z_params_masked).unsqueeze(1).expand(-1, h * w, -1)
                for i in range(3)
            ],
                                  dim=0)
            z_context = torch.cat([
                z_context,
                grid.view(1, h * w, 2).expand(z_context.size(0), -1, -1)
            ],
                                  dim=2)
            decoded_images = decoder(z_context).view(-1, 1, h, w)
            stacked_images = display_images(original_image=example_batch,
                                            mask=mask,
                                            reconstructed_image=decoded_images)

            image = torch.tensor(stacked_images)

            if summary_writer is not None:
                summary_writer.add_image(
                    "test_image/{}_pixels".format(n_pixels),
                    image,
                    global_step=epoch)

    return