def main(): make_ckpt_data_sample_dirs() config_path = sys.argv[1] with open(config_path, "r") as config_fp: config = yaml.full_load(config_fp) input_dims = config["input_dims"] input_size = np.prod(input_dims) hidden_size = config["hidden_size"] latent_size = config["latent_size"] device = torch.device(config["device"]) num_epochs = config["epochs"] save_freq = config["save_freq"] if config["dataset"] == "mnist": train_loader, val_loader, test_loader = load_mnist() data_cmap = "Greys_r" else: train_loader, val_loader, test_loader = load_centered_dspirtes() data_cmap = "RGB" model = VAE( input_size=input_size, hidden_size=hidden_size, latent_size=latent_size, device=device, ) optimizer = torch.optim.Adam(model.parameters()) model.to(device) train_model( model, train_loader, val_loader, num_epochs, optimizer, device, image_dims=input_dims, save_freq=save_freq, data_cmap=data_cmap, ) test_loss = test_model(model, test_loader, device) print("Test loss: " + str(test_loss)) plot_reconstructions(model, next(iter(test_loader)), device, num_epochs, input_dims, data_cmap) plot_samples_from_prior(model, device, num_epochs, input_dims, data_cmap) save_checkpoint(model, num_epochs)
def train_model( model, train_loader, val_loader, epochs, optimizer, device, image_dims, save_freq=5, data_cmap="Greys_r", ): print("Training model...") for i in range(epochs): model.train() tqdm_loader = tqdm(train_loader, desc="Epoch " + str(i)) running_loss = 0 total_images = 0 for (x, _) in tqdm_loader: x = x.to(device) optimizer.zero_grad() output, mean, logvar = model(x) loss, _, _ = vae_loss(x, output, mean, logvar) loss.backward() optimizer.step() batch_size = x.shape[0] running_loss += loss.item() * batch_size total_images += batch_size tqdm_loader.set_postfix( {"training_loss": running_loss / total_images}) model.eval() val_loss = test_model(model, val_loader, device) print("\tValidation loss: " + str(val_loss)) if i % save_freq == 0: save_checkpoint(model, i) plot_reconstructions(model, next(iter(val_loader)), device, i, image_dims, data_cmap) plot_samples_from_prior(model, device, i, image_dims, data_cmap)
autoencoder.summary() # Train autoencoder autoencoder.fit(x_train_noisy, x_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_test_noisy, x_test), callbacks=[TensorBoard(log_dir='/tmp/denoising')]) # Use autoencoder to denoise test images denoised_imgs = autoencoder.predict(x_test_noisy) # Plot denoised images utils.plot_reconstructions(x_test_noisy, denoised_imgs, n=10) #------------------------------------------------------------------------------- # Part 3.3 - Image generation with variational autoencoders #------------------------------------------------------------------------------- # # Parameters # batch_size = 256 # epochs = 50 # original_dim = 784 # intermediate_dim = 256 # latent_dim = 2 # epsilon_std = 1.0 # # Build variational autoencoder (vae) # vae, encoder, generator, vae_loss = lab10.build_vae(batch_size,
def train_vae(args): # set up path dir and save path EXPERIMENT_DIR = os.path.join(SAVE_DIR, args.name) if not os.path.exists(EXPERIMENT_DIR): os.mkdir(EXPERIMENT_DIR) PARAM_PATH = os.path.join(EXPERIMENT_DIR, "epoch{}.pth.tar") LOGS_PATH = os.path.join(EXPERIMENT_DIR, "logs.json") IMG_DIR = os.path.join(EXPERIMENT_DIR, "img") if not os.path.exists(IMG_DIR): os.mkdir(IMG_DIR) RECONSTRUCTIONS_PATH = os.path.join(IMG_DIR, "reconstructions_epoch{}.png") INTERPOLATION_PATH = os.path.join(IMG_DIR, "interpolation_epoch{}.png") # data loader n_epochs = args.num_epochs train_data_loader, test_data_loader = setup_datasets( DATA_DIR, image_loader, args.batch_size) # Model ae = VariationalAutoencoder(args.z_dim).to(args.device) if args.device == "cuda": # support multiple gpu #ae = torch.nn.DataParallel(ae, args.gpu_ids) pass # Loss function loss_fn = DFC_VAE_Loss([1., 1., 1., 0., 0.], args.device) if "pix" in args.name: loss_fn = VAE_Loss() optimizer = torch.optim.Adam(ae.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3) # Logs writer = SummaryWriter() min_loss = float('inf') # Improvement counter not_improved = 0. for epoch in range(n_epochs): torch.cuda.empty_cache() ae.train() for step, (images, y) in enumerate(train_data_loader): global_step = step + epoch * len(train_data_loader) # train optimizer.zero_grad() # Compute VAE outputs and pass into loss function. out, mu, logvar = ae(images.to(args.device)) loss_comp = loss_fn(out, images.to(args.device), mu, logvar) # Propagate if "pix" in args.name: loss_comp.backward() loss = loss_comp else: loss = sum(loss_comp.values()) loss.backward() optimizer.step() # Save loss writer.add_scalar('train/loss', loss.item(), global_step) if "pix" not in args.name: writer.add_scalars('train', {k: v.item() for k, v in loss_comp.items()}, global_step) if step % 10 == 0: to_print = "Epoch [{}/{}]; Step [{}/{}]; Train Loss: {:.7f}".format(epoch+1, \ n_epochs, step, len(train_data_loader), loss.item()) sys.stdout.write(to_print + '\n') sys.stdout.flush() # test print("Evaluating...") # TEST torch.cuda.empty_cache() with torch.no_grad(): ae.eval() losses = [] for step, (images, y) in enumerate(test_data_loader): out, mu, logvar = ae(images.to(args.device)) loss_i = loss_fn(out, images.to(args.device), mu, logvar) if "pix" in args.name: losses.append(loss_i.item()) else: loss_i = sum(loss_i.values()) losses.append(loss_i.item()) test_loss = np.array(losses).mean() scheduler.step(test_loss) writer.add_scalar('test/loss', test_loss, epoch * len(train_data_loader)) to_print = "Epoch [{}/{}]; Test Loss: {:.7f}".format( epoch + 1, n_epochs, test_loss) sys.stdout.write(to_print + '\n') sys.stdout.flush() writer.export_scalars_to_json(LOGS_PATH) # END OF EPOCH; SAVE MODEL IF LOSS DECREASED AND PLOT RECONSTRUCTIONS params = ae.parameters() if epoch > 0 and test_loss < min_loss: try: torch.save(ae.state_dict(), PARAM_PATH.format(epoch + 1)) min_loss = test_loss not_improved = 0. plot_interpolation(test_data_loader, ae, args.device, INTERPOLATION_PATH, epoch) plot_reconstructions(test_data_loader, ae, args.device, RECONSTRUCTIONS_PATH, epoch) print( "Saved model, plotted reconstruction and interpolation and reset improvement counter" ) except: print( 'Error occurred while saving after epoch {}'.format(epoch + 1)) else: not_improved += 1 print("Training has not improved on test set for {} epochs".format( not_improved)) writer.close()
autoencoder.summary() # Train autoencoder autoencoder.fit(x_train, x_train, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(x_test, x_test), callbacks=[TensorBoard(log_dir='/tmp/autoencoder')]) # Use autoencoder to reconstruct test images reconstructed_imgs = autoencoder.predict(x_test) # Plot reconstructions utils.plot_reconstructions(x_test, reconstructed_imgs, n=10) #------------------------------------------------------------------------------- # Part 3.2 - Image denoising with autoencoders #------------------------------------------------------------------------------- # # Parameters # batch_size = 256 # epochs = 50 # original_dim = 784 # encoding_dim = 32 # # Build autoencoder # autoencoder = lab10.build_autoencoder(original_dim, encoding_dim) # # Compile autoencoder with loss function