train(e) mdn_rnn_test_loss, vae_test_loss = test(e) vae_scheduler.step(vae_test_loss) mdrnn_scheduler.step(mdn_rnn_test_loss) vae_checkpoint_fname = join(new_vae_dir, 'checkpoint_{e}.tar'.format(e=e)) mdn_rnn_checkpoint_fname = join(new_rnn_dir, 'checkpoint_{e}.tar'.format(e=e)) torch.save( { 'epoch': e, 'state_dict': vae.state_dict(), 'precision': vae_test_loss, 'optimizer': vae_optimizer.state_dict(), 'scheduler': vae_scheduler.state_dict(), }, vae_checkpoint_fname) torch.save( { 'state_dict': mdrnn.state_dict(), 'optimizer': mdrnn_optimizer.state_dict(), 'scheduler': mdrnn_scheduler.state_dict(), 'precision': mdn_rnn_test_loss, 'epoch': e }, mdn_rnn_checkpoint_fname) with torch.no_grad(): sample = torch.randn(RED_SIZE, LSIZE).to(device) sample = vae.decoder(sample).cpu() save_image(sample.view(64, 3, RED_SIZE, RED_SIZE), join(new_samples_dir, 'sample_' + str(e) + '.png'))
pbar.update(BSIZE) pbar.close() return cum_loss * BSIZE / len(loader.dataset) train = partial(data_pass, train=True) test = partial(data_pass, train=False) for e in range(epochs): cur_best = None train(e) test_loss = test(e) scheduler.step(test_loss) earlystopping.step(test_loss) is_best = not cur_best or test_loss < cur_best if is_best: cur_best = test_loss checkpoint_fname = join(rnn_dir, 'checkpoint.tar') save_checkpoint({ "state_dict": mdrnn.state_dict(), "optimizer": optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'earlystopping': earlystopping.state_dict(), "precision": test_loss, "epoch": e}, is_best, checkpoint_fname, rnn_file) if earlystopping.stop: print("End of Training because of early stopping at epoch {}".format(e)) break
pbar.close() return cum_loss * BSIZE / len(loader.dataset) train = partial(data_pass, train=True, include_reward=args.include_reward) test = partial(data_pass, train=False, include_reward=args.include_reward) for e in range(epochs): cur_best = None train(e) test_loss = test(e) scheduler.step(test_loss) earlystopping.step(test_loss) is_best = not cur_best or test_loss < cur_best if is_best: cur_best = test_loss checkpoint_fname = join(rnn_dir, 'checkpoint.tar') save_checkpoint({ "state_dict": mdrnn.state_dict(), "optimizer": optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'earlystopping': earlystopping.state_dict(), "precision": test_loss, "epoch": e}, is_best, checkpoint_fname, rnn_file) if earlystopping.stop: print("End of Training because of early stopping at epoch {}".format(e)) break
def train_mdrnn(logdir, traindir, epochs=10, testdir=None): BSIZE = 80 # maybe should change this back to their initial one of 16 noreload = False #Best model is not reloaded if specified SEQ_LEN = 32 epochs = int(epochs) testdir = testdir if testdir else traindir cuda = torch.cuda.is_available() torch.manual_seed(123) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Loading VAE vae_file = join(logdir, 'vae', 'best.tar') assert exists(vae_file), "No trained VAE in the logdir..." state = torch.load(vae_file) print("Loading VAE at epoch {} " "with test error {}".format( state['epoch'], state['precision'])) vae = VAE(3, LSIZE).to(device) vae.load_state_dict(state['state_dict']) # Loading model rnn_dir = join(logdir, 'mdrnn') rnn_file = join(rnn_dir, 'best.tar') if not exists(rnn_dir): mkdir(rnn_dir) mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5) mdrnn.to(device) optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) earlystopping = EarlyStopping('min', patience=30) if exists(rnn_file) and not noreload: rnn_state = torch.load(rnn_file) print("Loading MDRNN at epoch {} " "with test error {}".format( rnn_state["epoch"], rnn_state["precision"])) mdrnn.load_state_dict(rnn_state["state_dict"]) optimizer.load_state_dict(rnn_state["optimizer"]) scheduler.load_state_dict(state['scheduler']) earlystopping.load_state_dict(state['earlystopping']) # Data Loading transform = transforms.Lambda( lambda x: np.transpose(x, (0, 3, 1, 2)) / 255) train_loader = DataLoader( RolloutSequenceDataset(traindir, SEQ_LEN, transform, buffer_size=30), batch_size=BSIZE, num_workers=8, shuffle=True) test_loader = DataLoader( RolloutSequenceDataset(testdir, SEQ_LEN, transform, train=False, buffer_size=10), batch_size=BSIZE, num_workers=8) def to_latent(obs, next_obs): """ Transform observations to latent space. :args obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :args next_obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :returns: (latent_obs, latent_next_obs) - latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) - next_latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) """ with torch.no_grad(): obs, next_obs = [ f.upsample(x.view(-1, 3, SIZE, SIZE), size=RED_SIZE, mode='bilinear', align_corners=True) for x in (obs, next_obs)] (obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma) = [ vae(x)[1:] for x in (obs, next_obs)] latent_obs, latent_next_obs = [ (x_mu + x_logsigma.exp() * torch.randn_like(x_mu)).view(BSIZE, SEQ_LEN, LSIZE) for x_mu, x_logsigma in [(obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma)]] return latent_obs, latent_next_obs def get_loss(latent_obs, action, reward, terminal, latent_next_obs): """ Compute losses. The loss that is computed is: (GMMLoss(latent_next_obs, GMMPredicted) + MSE(reward, predicted_reward) + BCE(terminal, logit_terminal)) / (LSIZE + 2) The LSIZE + 2 factor is here to counteract the fact that the GMMLoss scales approximately linearily with LSIZE. All losses are averaged both on the batch and the sequence dimensions (the two first dimensions). :args latent_obs: (BSIZE, SEQ_LEN, LSIZE) torch tensor :args action: (BSIZE, SEQ_LEN, ASIZE) torch tensor :args reward: (BSIZE, SEQ_LEN) torch tensor :args latent_next_obs: (BSIZE, SEQ_LEN, LSIZE) torch tensor :returns: dictionary of losses, containing the gmm, the mse, the bce and the averaged loss. """ latent_obs, action,\ reward, terminal,\ latent_next_obs = [arr.transpose(1, 0) for arr in [latent_obs, action, reward, terminal, latent_next_obs]] mus, sigmas, logpi, rs, ds = mdrnn(action, latent_obs) gmm = gmm_loss(latent_next_obs, mus, sigmas, logpi) bce = f.binary_cross_entropy_with_logits(ds, terminal) mse = f.mse_loss(rs, reward) loss = (gmm + bce + mse) / (LSIZE + 2) return dict(gmm=gmm, bce=bce, mse=mse, loss=loss) def data_pass(epoch, train): # pylint: disable=too-many-locals """ One pass through the data """ if train: mdrnn.train() loader = train_loader else: mdrnn.eval() loader = test_loader loader.dataset.load_next_buffer() cum_loss = 0 cum_gmm = 0 cum_bce = 0 cum_mse = 0 pbar = tqdm(total=len(loader.dataset), desc="Epoch {}".format(epoch)) for i, data in enumerate(loader): obs, action, reward, terminal, next_obs = [arr.to(device) for arr in data] # transform obs latent_obs, latent_next_obs = to_latent(obs, next_obs) if train: losses = get_loss(latent_obs, action, reward, terminal, latent_next_obs) optimizer.zero_grad() losses['loss'].backward() optimizer.step() else: with torch.no_grad(): losses = get_loss(latent_obs, action, reward, terminal, latent_next_obs) cum_loss += losses['loss'].item() cum_gmm += losses['gmm'].item() cum_bce += losses['bce'].item() cum_mse += losses['mse'].item() pbar.set_postfix_str("loss={loss:10.6f} bce={bce:10.6f} " "gmm={gmm:10.6f} mse={mse:10.6f}".format( loss=cum_loss / (i + 1), bce=cum_bce / (i + 1), gmm=cum_gmm / LSIZE / (i + 1), mse=cum_mse / (i + 1))) pbar.update(BSIZE) pbar.close() return cum_loss * BSIZE / len(loader.dataset) train = partial(data_pass, train=True) test = partial(data_pass, train=False) for e in range(epochs): cur_best = None train(e) test_loss = test(e) scheduler.step(test_loss) earlystopping.step(test_loss) is_best = not cur_best or test_loss < cur_best if is_best: cur_best = test_loss checkpoint_fname = join(rnn_dir, 'checkpoint.tar') save_checkpoint({ "state_dict": mdrnn.state_dict(), "optimizer": optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'earlystopping': earlystopping.state_dict(), "precision": test_loss, "epoch": e}, is_best, checkpoint_fname, rnn_file) if earlystopping.stop: print("End of Training because of early stopping at epoch {}".format(e)) break