def test_glow_save_load(self): # ---------------------------------------------------------------------- # Data preparation and Run through dummy network # ---------------------------------------------------------------------- data = torch.rand(2, 3, 32, 32) cwd = os.path.dirname(os.path.realpath(__file__)) net = Glow(context_blocks=3, flow_steps=3, input_channels=3, hidden_channels=32, quantization=65536) output, _ = net(data) # ---------------------------------------------------------------------- # Save and load back model # ---------------------------------------------------------------------- # Save the model configuration with open(os.path.join(cwd, "config.json"), 'w') as file: json.dump(net.config, file) # Save the model state dictionary filename = os.path.join(cwd, net.config["name"] + ".pt") torch.save(net.state_dict(), filename) # Load it back loaded_model, config = load_model(cwd) # ---------------------------------------------------------------------- # Assert the output is as expected # ---------------------------------------------------------------------- new_output, _ = loaded_model(data) error = torch.mean(torch.abs(new_output - output)).item() self.assertLessEqual(error, 5e-5) self.assertEqual(config, net.config)
def main(args): # Set up main device and scale batch size wandb.init(project='dlp-lab7-task1-nf') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Set random seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) trainset = ICLEVRLoader(mode="train") print('trainset: ', trainset) datasetDir_path = '/home/arg/courses/machine_learning/homework/deep_learning_and_practice/Lab7/dataset/task_1' datasetImgDir_path = '/home/arg/courses/machine_learning/homework/deep_learning_and_practice/Lab7/dataset/task_1/images' testset = Lab7_Dataset(img_path=datasetImgDir_path, json_path=os.path.join(datasetDir_path, 'test.json')) print('testset: ', testset) trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) # Model print('Building model..') net = Glow(num_channels=args.num_channels, num_levels=args.num_levels, num_steps=args.num_steps) net = net.to(device) wandb.watch(net) # if device == 'cuda': # net = torch.nn.DataParallel(net, args.gpu_ids) # cudnn.benchmark = args.benchmark start_epoch = 1 # if args.resume: # # Load checkpoint. # print('Resuming from checkpoint at ckpts/best.pth.tar...') # assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!' # checkpoint = torch.load('ckpts/best.pth.tar') # net.load_state_dict(checkpoint['net']) # global best_loss # global global_step # best_loss = checkpoint['test_loss'] # start_epoch = checkpoint['epoch'] # global_step = start_epoch * len(trainset) loss_fn = util.NLLLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr) scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up)) train(args.num_epochs, net, trainloader, device, optimizer, scheduler, loss_fn, args.max_grad_norm)
def test_invertibility_glow(self): # ---------------------------------------------------------------------- # Prepare some dummy data # ---------------------------------------------------------------------- data = torch.rand(2, 3, 32, 32) # ---------------------------------------------------------------------- # Prepare the layer with default init # ---------------------------------------------------------------------- coupling = Glow(context_blocks=3, flow_steps=3, input_channels=3, hidden_channels=32, quantization=65536) # ---------------------------------------------------------------------- # Assess the results are as expected # ---------------------------------------------------------------------- out, log_det = coupling(data) back = coupling.reverse(out) error_reco = torch.mean(torch.abs(back - data)).item() # ------------- self.assertLessEqual(error_reco, 1e-5) self.assertNotEqual(log_det.sum().item(), 0) # ---------------------------------------------------------------------- # Apply and remove weight norm and check the results don't change # ---------------------------------------------------------------------- coupling.apply_weight_norm() out2, log_det2 = coupling(data) coupling.remove_weight_norm() back2 = coupling.reverse(out) error_out = torch.mean(torch.abs(out2 - out)).item() error_back = torch.mean(torch.abs(back2 - back)).item() # ------------- self.assertLessEqual(error_out, 5e-5) self.assertLessEqual(error_back, 1e-8)
def test_training_glow(self): # ---------------------------------------------------------------------- # Prepare the layer # ---------------------------------------------------------------------- coupling = Glow(context_blocks=2, flow_steps=6, input_channels=3, hidden_channels=64, quantization=65536, lu_decomposition=True) # ---------------------------------------------------------------------- # Train for a couple of batches # ---------------------------------------------------------------------- optimizer = torch.optim.Adam(coupling.parameters(), 0.0001) loss = NLLFlowLoss(sigma=1.0, quantization=65536, bits_per_dim=True) for _ in range(20): optimizer.zero_grad() data = torch.rand(2, 3, 32, 32) out, log_det = coupling(data) nll = loss(out, log_det) nll.backward() optimizer.step() # ---------------------------------------------------------------------- # Assess the network is still invertible # ---------------------------------------------------------------------- coupling.eval() data = torch.rand(2, 3, 32, 32) out, log_det = coupling(data) back = coupling.reverse(out) error_reco = torch.mean(torch.abs(back - data)).item() # ------------- self.assertLessEqual(error_reco, 1e-5) self.assertNotEqual(log_det.sum().item(), 0)
def main(args): # Set up main device and scale batch size device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu' args.batch_size *= max(1, len(args.gpu_ids)) # Set random seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) trainset = ImgDatasets(root_dir='data/celeba_sample', files='train_files.txt', mode=args.mode) trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) testset = ImgDatasets(root_dir='data/celeba_sample', files='test_files.txt', mode=args.mode) testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) # Model print('Building model..') net = Glow(num_channels=args.num_channels, num_levels=args.num_levels, num_steps=args.num_steps, mode=args.mode) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net, args.gpu_ids) cudnn.benchmark = args.benchmark start_epoch = 0 if args.resume: # Load checkpoint. print('Resuming from checkpoint at ckpts/best.pth.tar...') assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!' checkpoint = torch.load('ckpts/best.pth.tar') net.load_state_dict(checkpoint['net']) global best_loss global global_step best_loss = checkpoint['test_loss'] start_epoch = checkpoint['epoch'] global_step = start_epoch * len(trainset) loss_fn = util.NLLLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr) scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up)) for epoch in range(start_epoch, start_epoch + args.num_epochs): train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn, args.max_grad_norm) test(epoch, net, testloader, device, loss_fn, args.mode)
def main(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' ckpt = torch.load(args.ckpt_path) ckpt_args = ckpt["args"] net = Glow(num_channels=ckpt_args.num_channels, num_levels=ckpt_args.num_levels, num_steps=ckpt_args.num_steps, img_size=ckpt_args.img_size, dec_size=ckpt_args.dec_size).to(device) if device == 'cuda': net = torch.nn.DataParallel(net, ckpt_args.gpu_ids) net.load_state_dict(ckpt['net']) cond_data = torch.load(args.cond_data) original, cond_img = cond_data["original"], cond_data["cond_img"].to( device) # style transfer synth_img, target = style_transfer(net, original, cond_img, target_index=args.index) ######3# os.makedirs('inference_data', exist_ok=True) origin_concat = torchvision.utils.make_grid(original, nrow=4, padding=2, pad_value=255) img_concat = torchvision.utils.make_grid(synth_img, nrow=4, padding=2, pad_value=255) torchvision.utils.save_image(origin_concat, args.output_dir + 'original.png') torchvision.utils.save_image(img_concat, args.output_dir + '/synthesized.png') torchvision.utils.save_image(target, args.output_dir + 'cond_img.png')
def main(args): # Set up main device and scale batch size device = 'cuda' if torch.cuda.is_available() and args.gpu_ids else 'cpu' args.batch_size *= max(1, len(args.gpu_ids)) # Set random seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # No normalization applied, since Glow expects inputs in (0, 1) transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) transform_test = transforms.Compose([ transforms.ToTensor() ]) trainset = torchvision.datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) testset = torchvision.datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) # Model print('Building model..') net = Glow(num_channels=args.num_channels, num_levels=args.num_levels, num_steps=args.num_steps) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net, args.gpu_ids) cudnn.benchmark = args.benchmark start_epoch = 0 if args.resume: # Load checkpoint. print('Resuming from checkpoint at ckpts/best.pth.tar...') assert os.path.isdir('ckpts'), 'Error: no checkpoint directory found!' checkpoint = torch.load('ckpts/best.pth.tar') net.load_state_dict(checkpoint['net']) global best_loss global global_step best_loss = checkpoint['test_loss'] start_epoch = checkpoint['epoch'] global_step = start_epoch * len(trainset) loss_fn = util.NLLLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr) scheduler = sched.LambdaLR(optimizer, lambda s: min(1., s / args.warm_up)) for epoch in range(start_epoch, start_epoch + args.num_epochs): train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn, args.max_grad_norm) test(epoch, net, testloader, device, loss_fn, args.num_samples)
else: flag = False except: print("error") index += 1 #print(x.size()) return x, 0 transform = transforms.Compose( [transforms.Scale((32, 32)), transforms.ToTensor()]) for i in range(3): net = Glow(num_channels=512, num_levels=3, num_steps=16) device = 'cuda' if torch.cuda.is_available() else 'cpu' device = 'cpu' #net.to(device) if i == 0: net.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load("ckpts/-2.pth.tar")['net'].items() }) if i == 1: net.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load("ckpts/-1.pth.tar")['net'].items() }) net.eval() #testset = dataset(-2, transform, test=True,rotation_data=True)
from datetime import datetime from models import RealNVP, Glow from training import Trainer if __name__ == '__main__': # Path to the database data_dir = "/path/to/training/database/" # The device (GPU/CPU) on which to execute the code device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # The model to train model = Glow(context_blocks=4, flow_steps=8, input_channels=3, hidden_channels=256, quantization=256, lu_decomposition=False) model.to(device) # Path to the directory where the results will be saved saving_directory = \ os.path.join(os.getcwd(), 'results', datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) trainer = Trainer(model=model, data_path=data_dir, batch_size=4, learning_rate=0.0001, saving_directory=saving_directory,