def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: self.sample_z_, self.sample_y_ = Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # BEGAN parameters self.gamma = 0.75 self.lambda_ = 0.001 self.k = 0. # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() # self.L1_loss = torch.nn.L1loss().cuda() # BEGAN does not work well when using L1loss(). # else: # self.L1_loss = torch.nn.L1loss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader( datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # EBGAN parameters self.pt_loss_weight = 0.1 self.margin = max(1, self.batch_size / 64.) # margin for loss function # usually margin of 1 is enough, but for large batch size it must be larger than 1 # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader( datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA('data/celebA', transform=transforms.Compose( [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand((self.batch_size, self.z_dim)), volatile=True)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.lambda_ = 0.25 self.n_critic = 5 # the number of iterations of the critic per generator iteration self.lambda_cl = 0.2 self.c = 0.01 # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) # self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) # self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.G_optimizer = optim.RMSprop(self.G.parameters(), lr=args.lrG) self.D_optimizer = optim.RMSprop(self.D.parameters(), lr=args.lrD) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST( 'data/mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 self.y_dim = 10 elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader(datasets.FashionMNIST( 'data/fashion-mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA( 'data/celebA', transform=transforms.Compose([ transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor() ]), batch_size=self.batch_size, shuffle=True) from load_attr import load_attr attr = load_attr() self.attr = torch.FloatTensor(attr) self.z_dim = 62 self.y_dim = 1 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand( (self.sample_num, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand( (self.batch_size, self.z_dim)), volatile=True) if self.dataset == 'mnist': temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i * self.y_dim:(i + 1) * self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) elif self.dataset == 'celebA': self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_[:50, 0] = 1 # self.sample_y_[25:75, 1] = 1 if self.gpu_mode: self.sample_y_ = Variable(self.sample_y_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable( self.sample_y_, volatile=True)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir #self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.z_dim = 64 self.c = 0.01 # clipping value self.n_critic = 5 # the number of iterations of the critic per generator iteration # load dataset # load dataset self.dataset = datasets.CIFAR10( root='data/cifar10', download=True, transform=transforms.Compose([ transforms.Resize(self.input_size), transforms.CenterCrop(self.input_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) self.data_loader = tutils.data.DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, drop_last=True) #self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) indices = list(range(50000)) subset_indices = indices[:1000] train_sampler = SubsetRandomSampler(subset_indices) self.data_loader = torch.utils.data.DataLoader(self.dataset, batch_size=64, sampler=train_sampler, num_workers=1, drop_last=True) data = self.data_loader.__iter__().__next__()[0] self.mu = np.random.uniform(-1, 1, (self.batch_size, self.z_dim)) for i in range(self.batch_size): for j in range(self.z_dim): self.mu[i][j] = -32.0 + j self.sigma = np.random.uniform(1, 2, (self.batch_size, self.z_dim)) for i in range(self.batch_size): for j in range(self.z_dim): self.sigma[i][j] = (1.0 / (j + 1.0))**2 #self.sigma.fill(50) self.weight = np.random.uniform(-1, 1, (self.batch_size, self.z_dim)) for i in range(self.batch_size): for j in range(self.z_dim): self.weight[i][j] = 1.0 / self.z_dim self.mu = Variable(torch.FloatTensor(self.mu).cuda(), requires_grad=True) #mu = Variable(torch.from_numpy(mu).float(), requires_grad=True).to(device)#changed self.sigma = Variable(torch.FloatTensor(self.sigma).cuda(), requires_grad=True) self.weight = Variable(torch.FloatTensor(self.weight).cuda(), requires_grad=True) # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) params = list(self.D.parameters()) + list( [self.sigma, self.mu, self.weight]) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(params, lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # fixed noise self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: self.sample_z_ = self.sample_z_.cuda()
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.datasetname = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.folder = args.folder self.z_dim = 62 self.lambda_ = 10 self.n_critic = 5 # the number of iterations of the critic per generator iteration self.repeat = args.repeat # load dataset #self.dataset=pl.generate_random() if self.repeat == 0: self.dataset = pl.read_from_data_for_k_folder( 'pickle_seed.out', self.folder) else: self.dataset = pl.read_from_data_for_k_folder_add_size( 'result_collection', self.folder, 32 + (self.repeat - 1) * 16, 16, self.repeat) #self.dataset=pl.gather_trained_data('results_GAN_Game','transformed_array.out',48,16) #print(self.dataset[0]) #print(self.dataset) #self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) #self.dataset = datasets.ImageFolder(root='data/'+self.datasetname, transform=transforms.Compose([ # transforms.Resize(self.input_size), # transforms.CenterCrop(self.input_size), # transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # ])) self.data_loader = tutils.data.DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, drop_last=True) #self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) #indices = list(range(50000)) #subset_indices= indices[:1000] #train_sampler = SubsetRandomSampler(subset_indices) #self.data_loader = torch.utils.data.DataLoader(self.dataset, batch_size=64, sampler=train_sampler,num_workers=1,drop_last=True) data = self.data_loader.__iter__().__next__()[0] # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') if self.repeat == 1: self.G.load_state_dict(torch.load(args.netG_path)) self.D.load_state_dict(torch.load(args.netD_path)) # fixed noise self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: self.sample_z_ = self.sample_z_.cuda()
def main(): """ Main """ # Arguments args = parser.parse_args() # Setup Distributed Training device, local_rank = setup(distributed=args.distributed) # Get Dataloaders for Dataset of choice dataloaders, args = get_dataloaders(args) # Setup logging, saving models, summaries args = experiment_config(parser, args) # Get available models from /model/network.py model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) # If model exists if any(args.model in model_name for model_name in model_names): # Load model base_encoder = getattr(models, args.model)( args, num_classes=args.n_classes) # Encoder proj_head = models.projection_MLP(args) sup_head = models.Sup_Head(args) else: raise NotImplementedError("Model Not Implemented: {}".format( args.model)) # Remove last FC layer from resnet base_encoder.fc = nn.Sequential() # Place model onto GPU(s) if args.distributed: torch.cuda.set_device(device) torch.set_num_threads(6) # n cpu threads / n processes per node base_encoder = DistributedDataParallel(base_encoder.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) proj_head = DistributedDataParallel(proj_head.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) sup_head = DistributedDataParallel(sup_head.cuda(), device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True, broadcast_buffers=False) # Only print from process (rank) 0 args.print_progress = True if int( os.environ.get('RANK')) == 0 else False else: # If non Distributed use DataParallel if torch.cuda.device_count() > 1: base_encoder = nn.DataParallel(base_encoder) proj_head = nn.DataParallel(proj_head) sup_head = nn.DataParallel(sup_head) print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n') base_encoder.to(device) proj_head.to(device) sup_head.to(device) args.print_progress = True # Print Network Structure and Params if args.print_progress: print_network(base_encoder, args) # prints out the network architecture etc logging.info('\npretrain/train: {} - valid: {} - test: {}'.format( len(dataloaders['train'].dataset), len(dataloaders['valid'].dataset), len(dataloaders['test'].dataset))) # launch model training or inference if not args.finetune: ''' Pretraining / Finetuning / Evaluate ''' if not args.supervised: # Pretrain the encoder and projection head proj_head.apply(init_weights) pretrain(base_encoder, proj_head, dataloaders, args) else: supervised(base_encoder, sup_head, dataloaders, args) print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir)) # Load the pretrained model checkpoint = torch.load(args.load_checkpoint_dir) # Load the encoder parameters base_encoder.load_state_dict(checkpoint['encoder']) # Initalize weights of the supervised / classification head sup_head.apply(init_weights) # Supervised Finetuning of the supervised classification head finetune(base_encoder, sup_head, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group() else: ''' Finetuning / Evaluate ''' # Do not Pretrain, just finetune and inference print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir)) # Load the pretrained model checkpoint = torch.load(args.load_checkpoint_dir) # Load the encoder parameters base_encoder.load_state_dict(checkpoint['encoder']) # .cuda() # Initalize weights of the supervised / classification head sup_head.apply(init_weights) # Supervised Finetuning of the supervised classification head finetune(base_encoder, sup_head, dataloaders, args) # Evaluate the pretrained model and trained supervised head test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head, dataloaders, 'test', args.finetune_epochs, args) print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format( test_loss, test_acc, test_acc_top5)) if args.distributed: # cleanup torch.distributed.destroy_process_group()
def train(self): # networks self.model = Net(num_channels=self.num_channels, base_filter=64, num_residuals=18) # weigh initialization self.model.weight_init() # optimizer self.momentum = 0.9 self.weight_decay = 0.0001 self.clip = 0.4 self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) # loss function if self.gpu_mode: self.model.cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.model) print('----------------------------------------------') # load dataset train_data_loader = self.load_dataset(dataset='train') test_data_loader = self.load_dataset(dataset='test') # set the logger log_dir = os.path.join(self.save_dir, 'logs') if not os.path.exists(log_dir): os.mkdir(log_dir) logger = Logger(log_dir) ################# Train ################# print('Training is started.') avg_loss = [] step = 0 # test image test_input, test_target, test_bicubic = test_data_loader.dataset.__getitem__( 2) test_input = test_input.unsqueeze(0) test_target = test_target.unsqueeze(0) self.model.train() for epoch in range(self.num_epochs): # learning rate is decayed by a factor of 10 every 20 epochs if (epoch + 1) % 20 == 0: for param_group in self.optimizer.param_groups: param_group["lr"] /= 10.0 print("Learning rate decay: lr={}".format( self.optimizer.param_groups[0]["lr"])) epoch_loss = 0 for iter, (input, target, bi) in enumerate(train_data_loader): # input data (bicubic interpolated image) if self.gpu_mode: x_ = Variable(target.cuda()) y_ = Variable( utils.img_interp(input, self.scale_factor).cuda()) else: x_ = Variable(target) y_ = Variable(utils.img_interp(input, self.scale_factor)) # update network self.optimizer.zero_grad() recon_image = self.model(y_) loss = self.MSE_loss(recon_image, x_) loss.backward() # gradient clipping nn.utils.clip_grad_norm(self.model.parameters(), self.clip) self.optimizer.step() # log epoch_loss += loss.item() print("Epoch: [%2d] [%4d/%4d] loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), loss.item())) # tensorboard logging #logger.scalar_summary('loss', loss.item(), step + 1) step += 1 # avg. loss per epoch avg_loss.append(epoch_loss / len(train_data_loader)) # prediction recon_imgs = self.model( Variable( utils.img_interp(test_input, self.scale_factor).cuda())) recon_img = recon_imgs[0].cpu().data gt_img = test_target[0] lr_img = test_input[0] bc_img = utils.img_interp(test_input[0], self.scale_factor) # calculate psnrs bc_psnr = utils.PSNR(bc_img, gt_img) recon_psnr = utils.PSNR(recon_img, gt_img) # save result images result_imgs = [gt_img, lr_img, bc_img, recon_img] psnrs = [None, None, bc_psnr, recon_psnr] utils.plot_test_result(result_imgs, psnrs, epoch + 1, save_dir=self.save_dir, is_training=True) print("Saving training result images at epoch %d" % (epoch + 1)) # Save trained parameters of model if (epoch + 1) % self.save_epochs == 0: self.save_model(epoch + 1) # Plot avg. loss utils.plot_loss([avg_loss], self.num_epochs, save_dir=self.save_dir) print("Training is finished.") # Save final trained parameters of model self.save_model(epoch=None)
image_size=opt.patch_size * opt.upscale_factor) D = torch.nn.DataParallel(D, device_ids=gpus_list) ###Feature Extractor if opt.feature_extractor == 'VGG': feature_extractor = FeatureExtractor(models.vgg19(pretrained=True)) else: feature_extractor = FeatureExtractorResnet( models.resnet152(pretrained=True)) ###LOSS MSE_loss = nn.MSELoss() BCE_loss = nn.BCELoss() print('---------- Generator architecture -------------') utils.print_network(model) print('---------- Discriminator architecture ---------') utils.print_network(D) print('-----------------------------------------------') if opt.load_pretrained: model_name = os.path.join(opt.save_folder + opt.pretrained_sr) if os.path.exists(model_name): #model= torch.load(model_name, map_location=lambda storage, loc: storage) model.load_state_dict( torch.load(model_name, map_location=lambda storage, loc: storage)) print('Pre-trained SR model is loaded.') if opt.load_pretrained_D: D_name = os.path.join(opt.save_folder + opt.pretrained_D) if os.path.exists(D_name):
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # BEGAN parameters self.gamma = 0.75 self.lambda_ = 0.001 self.k = 0. # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() # self.L1_loss = torch.nn.L1loss().cuda() # BEGAN does not work well when using L1loss(). # else: # self.L1_loss = torch.nn.L1loss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST( 'data/mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader(datasets.FashionMNIST( 'data/fashion-mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.load_celebA( 'data/celebA', transform=transforms.Compose([ transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor() ]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand( (self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand( (self.batch_size, self.z_dim)), volatile=True)
def build_model(self): # networks self.stopup_G = Generator(num_channels=self.num_channels, base_filter=64, stop='up') self.stopdown_G = Generator(num_channels=self.num_channels, base_filter=64, stop='down') self.stopup_D = NLayerDiscriminator(num_channels=2 * self.num_channels, base_filter=64, image_size=self.patch_size) self.stopdown_D = NLayerDiscriminator(num_channels=2 * self.num_channels, base_filter=64, image_size=self.patch_size) print('---------- Networks architecture -------------') utils.print_network(self.stopup_G) utils.print_network(self.stopdown_D) print('----------------------------------------------') # weigh initialization self.stopup_G.weight_init() self.stopdown_G.weight_init() self.stopup_D.weight_init() self.stopdown_D.weight_init() # optimizer self.stopup_G_optimizer = optim.Adam(self.stopup_G.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.stopdown_G_optimizer = optim.Adam(self.stopdown_G.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.stopup_D_optimizer = optim.Adam(self.stopup_D.parameters(), lr=self.lr, betas=(0.5, 0.999)) self.stopdown_D_optimizer = optim.Adam(self.stopdown_D.parameters(), lr=self.lr, betas=(0.5, 0.999)) # loss function if self.gpu_mode: self.stopup_G = nn.DataParallel(self.stopup_G) self.stopdown_G = nn.DataParallel(self.stopdown_G) self.stopup_D = nn.DataParallel(self.stopup_D) self.stopdown_D = nn.DataParallel(self.stopdown_D) self.stopup_G.cuda() self.stopdown_G.cuda() self.stopup_D.cuda() self.stopdown_D.cuda() self.L1_loss = nn.L1Loss().cuda() self.criterionGAN = GANLoss().cuda() else: self.L1_loss = nn.L1Loss() self.MSE_loss = nn.MSELoss() self.BCE_loss = nn.BCELoss() self.criterionGAN = GANLoss() return
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) # self.C = lenet(self.dataset) self.C = resnet18(self.dataset) # self.C = resnet18pa(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) # self.C_optimizer = optim.SGD(self.C.parameters(), lr=0.01) self.C_optimizer = optim.Adadelta( self.C.parameters()) # , lr=0.1, rho=0.9, eps=1e-8 if self.gpu_mode: self.G.cuda() self.D.cuda() self.C.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) utils.print_network(self.C) print('-----------------------------------------------') # load mnist if self.dataset == 'mnist': self.data_X, self.data_Y, self.X_test, self.y_test_vec = utils.load_mnist( args.dataset) self.z_dim = 100 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i * self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i * self.y_dim + j] = self.sample_z_[i * self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i * self.y_dim:(i + 1) * self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: self.sample_z_, self.sample_y_ = Variable( self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable( self.sample_y_, volatile=True)
if args.latest_discriminator_model != '': if torch.cuda.is_available(): D.load_state_dict(torch.load(args.latest_discriminator_model)) else: D.load_state_dict( torch.load(args.latest_discriminator_model, map_location=lambda storage, loc: storage)) VGG = networks.VGG19(init_weights=args.vgg_model, feature_mode=True) G.to(device) D.to(device) VGG.to(device) G.train() D.train() VGG.eval() print('---------- Networks initialized -------------') utils.print_network(G) utils.print_network(D) utils.print_network(VGG) print('-----------------------------------------------') # loss BCE_loss = nn.BCELoss().to(device) L1_loss = nn.L1Loss().to(device) # Adam optimizer G_optimizer = optim.Adam(G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2))
def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain( self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) #100*62 for i in range(10): self.sample_z_[i * self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i * self.y_dim + j] = self.sample_z_[i * self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i * self.y_dim:(i + 1) * self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) #100*10 self.sample_c_ = torch.zeros( (self.sample_num, self.len_continuous_code)) #100*2 # manipulating two continuous code temp_z_ = torch.rand((1, self.z_dim)) self.sample_z2_ = temp_z_ for i in range(self.sample_num - 1): self.sample_z2_ = torch.cat([self.sample_z2_, temp_z_]) #100*62 y = np.zeros(self.sample_num, dtype=np.int64) y_one_hot = np.zeros((self.sample_num, self.len_discrete_code)) y_one_hot[np.arange(self.sample_num), y] = 1 self.sample_y2_ = torch.from_numpy(y_one_hot).type( torch.FloatTensor) #100*10 temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) for i in range(10): for j in range(10): self.sample_c2_[i * 10 + j, 0] = temp_c[i] self.sample_c2_[i * 10 + j, 1] = temp_c[j] #100*2 if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True), \ Variable(self.sample_c_.cuda(), volatile=True), Variable(self.sample_z2_.cuda(), volatile=True), \ Variable(self.sample_y2_.cuda(), volatile=True), Variable(self.sample_c2_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True), \ Variable(self.sample_c_, volatile=True), Variable(self.sample_z2_, volatile=True), \ Variable(self.sample_y2_, volatile=True), Variable(self.sample_c2_, volatile=True)
def __init__(self): # parameters # self.epoch = args.epoch # self.sample_num = 100 # self.batch_size = args.batch_size # self.save_dir = args.save_dir # self.result_dir = args.result_dir # self.dataset = args.dataset # self.log_dir = args.log_dir # self.gpu_mode = args.gpu_mode # self.model_name = args.gan_type # self.input_size = args.input_size # self.z_dim = 62 # self.lambda_ = 10 # self.n_critic = 5 # the number of iterations of the critic per generator iteration self.epoch = 4 self.sample_num = 100 self.batch_size = 4 self.save_dir = 'models' self.result_dir = 'results' self.dataset = 'pems' self.log_dir = 'logs' self.gpu_mode = True self.model_name = "WGAN_gp" # self.input_size = args.input_size self.z_dim = 8 self.lambda_ = 1 self.n_critic = 5 # the number of iterations of the critic per generator iteration # load dataset # self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) # data = self.data_loader.__iter__().__next__()[0] dataset = Loader() self.data_loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) data = self.data_loader.__iter__().__next__() print("dataset_length:", self.data_loader.dataset.__len__()) print('*' * 80) # (307,4) print(data.shape) print('*' * 80) self.input_size = [data.shape[2], data.shape[3]] # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=0.0002, betas=(0.5, 0.999)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=0.0002, betas=(0.5, 0.999)) if self.gpu_mode: self.G.cuda() self.D.cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # fixed noise self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: self.sample_z_ = self.sample_z_.cuda()
def __init__(self, args): # parameters self.root = args.root self.epoch = args.epoch self.sample_num = 16 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.z_dim = args.z_dim self.model_name = args.model_name self.args = args # load dataset if self.dataset == 'mnist': dset = datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) valid_dset = datasets.MNIST('data/mnist', train=False, download=True, transform=transforms.Compose( [transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'cifar10': dset = datasets.CIFAR10(root='data/cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor() ])) valid_dset = datasets.CIFAR10(root='data/cifar10', train=False, download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor() ])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'image-net': dset = datasets.ImageFolder('./data/tinyimage/train/', transform=transforms.ToTensor()) valid_dset = datasets.ImageFolder('./data/tinyimage/test/', transform=transforms.ToTensor()) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=64, shuffle=True) elif self.dataset == 'fashion-mnist': dset = datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) valid_dset = datasets.FashionMNIST('data/fashion-mnist', train=False, download=True, transform=transforms.Compose( [transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': # TODO: add test data dset = datasets.ImageFolder('./data/resized_celebA/', transform=transforms.ToTensor()) valid_dats = datasets.ImageFolder('./data/resized_celebA/', transform=transforms.ToTensor()) num_train = len(dset) indices = list(range(num_train)) valid_size = 0.1 split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) self.data_loader = DataLoader(dset, batch_size=self.batch_size, sampler=train_sampler) self.valid_loader = DataLoader(valid_dats, batch_size=self.batch_size, sampler=valid_sampler) # image dimensions if self.dataset == 'mnist': self.height, self.width = dset.train_data.shape[1:3] self.pix_level = 1 elif self.dataset == 'cifar10': self.height = 64 self.width = 64 self.pix_level = dset.train_data.shape[3] elif self.dataset == 'image-net': self.height = 64 self.width = 64 self.pix_level = 3 elif self.dataset == 'celebA': self.height = 64 self.width = 64 self.pix_level = 3 elif len(dset.train_data.shape) == 4: self.pix_level = dset.train_data.shape[3] if self.dataset == 'celebA': self.iter_batch_epoch = floor( (0.9 * self.data_loader.dataset.__len__()) // self.batch_size) else: self.iter_batch_epoch = self.data_loader.dataset.__len__( ) // self.batch_size # networks init self.G = Generator(self.dataset, self.z_dim, self.height, self.width, self.pix_level) self.D = Discriminator(self.dataset, self.height, self.width, self.pix_level) self.FC = FeatureExtractor() self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(chain(self.D.parameters(), self.FC.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if torch.cuda.is_available(): self.G.cuda() self.D.cuda() self.FC.cuda() self.BCE_loss = nn.BCELoss().cuda() else: self.BCE_loss = nn.BCELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) utils.print_network(self.FC) print('-----------------------------------------------')
D = networks.discriminator(args.in_ndc, args.out_ndc, args.ndf) if args.latest_discriminator_model != '': if torch.cuda.is_available(): D.load_state_dict(torch.load(args.latest_discriminator_model)) else: D.load_state_dict(torch.load(args.latest_discriminator_model, map_location=lambda storage, loc: storage)) VGG = networks.VGG19(init_weights=args.vgg_model, feature_mode=True) G.to(device) D.to(device) VGG.to(device) G.train() D.train() VGG.eval() print('---------- Networks initialized -------------') utils.print_network(G) utils.print_network(D) utils.print_network(VGG) print('-----------------------------------------------') # loss BCE_loss = nn.BCELoss().to(device) L1_loss = nn.L1Loss().to(device) # Adam optimizer G_optimizer = optim.Adam(G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) G_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=G_optimizer, milestones=[args.train_epoch // 2, args.train_epoch // 4 * 3], gamma=0.1) D_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=D_optimizer, milestones=[args.train_epoch // 2, args.train_epoch // 4 * 3], gamma=0.1) pre_train_hist = {}
]) train_loader_src = utils.data_load(os.path.join('data', args.src_data), 'train', transform, args.batch_size, shuffle=True, drop_last=True) train_loader_tgt = utils.data_load(os.path.join('data', args.tgt_data), 'train', transform, args.batch_size, shuffle=True, drop_last=True) test_loader_src = utils.data_load(os.path.join('data', args.src_data), 'test', transform, 1, shuffle=True, drop_last=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') A2BG = net.generator(args.in_ngc, args.out_ngc, args.ngf) B2AG = net.generator(args.in_ngc, args.out_ngc, args.ngf) AD = net.discriminator(args.in_ndc, args.out_ndc, args.ndf) BD = net.discriminator(args.in_ndc, args.out_ndc, args.ndf) print('---------- Networks initialized -------------') utils.print_network(A2BG) utils.print_network(AD) print('-----------------------------------------------') vgg16 = models.vgg16(pretrained=True) vgg16 = net.VGG(vgg16.features[:23]).to(device) A2BG.to(device) B2AG.to(device) AD.to(device) BD.to(device) A2BG.train() B2AG.train() AD.train() BD.train()
if torch.cuda.is_available(): tmpD.load_state_dict( torch.load(targets_dir[i] + args.latest_discriminator_model)) else: tmpD.load_state_dict( torch.load(targets_dir[i] + args.latest_discriminator_model, map_location=lambda storage, loc: storage)) tmpD.to(device) tmpD.train() D.append(tmpD) VGG = networks.VGG19(init_weights=args.vgg_model, feature_mode=True) VGG.to(device) VGG.eval() print('---------- Networks initialized -------------') utils.print_network(G) utils.print_network(D[0]) utils.print_network(VGG) print('-----------------------------------------------') # loss BCE_loss = nn.BCELoss().to(device) L1_loss = nn.L1Loss().to(device) # Adam optimizer G_optimizer = optim.Adam(G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) G_scheduler = optim.lr_scheduler.MultiStepLR( optimizer=G_optimizer, milestones=[args.train_epoch // 2, args.train_epoch // 4 * 3],
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir # self.dataset = args.dataset self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.z_dim = 62 self.train_hist = {} # load dataset # self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) # data = self.data_loader.__iter__().__next__()[0] """dataset""" # self.data_loader = testToGAN(self.dataset,'train') print( '-------------------load train dataset--------------------------------------' ) self.data_loader = DataloadtoGAN(self.dataset, 'train') print( '---------------------------------------------------------------------------' ) print( '-------------------load validate dataset-----------------------------------' ) self.valdata = DataloadtoGAN(self.dataset, 'validate') print( '---------------------------------------------------------------------------' ) # 重置dataset self.dataset = 'ExponentialLR' data = next(iter(self.data_loader))[0] # print('data.shape:',data.shape)#data.shape: torch.Size([64, 1, 64, 21]) # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) # lr_scheduler # self.G_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.G_optimizer, mode='max', factor=0.1, patience=4, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=3, min_lr=0, eps=1e-08) # self.D_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.D_optimizer, mode='max', factor=0.1, patience=4, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=3, min_lr=0, eps=1e-08) # same epoch interval ruduce lr # self.G_scheduler = optim.lr_scheduler.StepLR(self.G_optimizer, 20, gamma=0.1, last_epoch=-1) # self.D_scheduler = optim.lr_scheduler.StepLR(self.D_optimizer, 20, gamma=0.1, last_epoch=-1) # ExponentialLR self.G_scheduler = optim.lr_scheduler.ExponentialLR( self.G_optimizer, 0.9) self.D_scheduler = optim.lr_scheduler.ExponentialLR( self.D_optimizer, 0.9) # self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) self.y_real_, self.y_fake_ = torch.zeros(self.batch_size, 1), torch.ones( self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda( ), self.y_fake_.cuda() if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() else: self.BCE_loss = nn.BCELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # fixed noise self.sample_z_ = torch.rand((self.batch_size, self.z_dim)) if self.gpu_mode: self.sample_z_ = self.sample_z_.cuda() self.writer = SummaryWriter() #log_dir=log_dir, self.X = 0
def train(self): # networks self.model = Net(num_channels=self.num_channels, base_filter=64) # weigh initialization self.model.weight_init(mean=0.0, std=0.001) # optimizer self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) # loss function if self.gpu_mode: self.model.cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.model) print('----------------------------------------------') # load dataset train_data_loader = self.load_dataset(dataset='train') test_data_loader = self.load_dataset(dataset='test') # set the logger log_dir = os.path.join(self.save_dir, 'logs') if not os.path.exists(log_dir): os.mkdir(log_dir) logger = Logger(log_dir) ################# Train ################# print('Training is started.') avg_loss = [] step = 0 # test image test_input, test_target = test_data_loader.dataset.__getitem__(2) test_input = test_input.unsqueeze(0) test_target = test_target.unsqueeze(0) self.model.train() for epoch in range(self.num_epochs): epoch_loss = 0 for iter, (input, target) in enumerate(train_data_loader): # input data (bicubic interpolated image) if self.gpu_mode: # exclude border pixels from loss computation x_ = Variable(utils.shave(target, border_size=8).cuda()) y_ = Variable(utils.img_interp(input, self.scale_factor).cuda()) else: x_ = Variable(utils.shave(target, border_size=8)) y_ = Variable(utils.img_interp(input, self.scale_factor)) # update network self.optimizer.zero_grad() recon_image = self.model(y_) loss = self.MSE_loss(recon_image, x_) loss.backward() self.optimizer.step() # log epoch_loss += loss.data[0] print("Epoch: [%2d] [%4d/%4d] loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), loss.data[0])) # tensorboard logging logger.scalar_summary('loss', loss.data[0], step + 1) step += 1 # avg. loss per epoch avg_loss.append(epoch_loss / len(train_data_loader)) # prediction recon_imgs = self.model(Variable(utils.img_interp(test_input, self.scale_factor).cuda())) recon_img = recon_imgs[0].cpu().data gt_img = utils.shave(test_target[0], border_size=8) lr_img = test_input[0] bc_img = utils.shave(utils.img_interp(test_input[0], self.scale_factor), border_size=8) # calculate psnrs bc_psnr = utils.PSNR(bc_img, gt_img) recon_psnr = utils.PSNR(recon_img, gt_img) # save result images result_imgs = [gt_img, lr_img, bc_img, recon_img] psnrs = [None, None, bc_psnr, recon_psnr] utils.plot_test_result(result_imgs, psnrs, epoch + 1, save_dir=self.save_dir, is_training=True) print("Saving training result images at epoch %d" % (epoch + 1)) # Save trained parameters of model if (epoch + 1) % self.save_epochs == 0: self.save_model(epoch + 1) # Plot avg. loss utils.plot_loss([avg_loss], self.num_epochs, save_dir=self.save_dir) print("Training is finished.") # Save final trained parameters of model self.save_model(epoch=None)
help="Path to the file storing network configuration") parser.add_argument("-d", "--data", type=str, default='', help="Data file") args = parser.parse_args() ##################### # Build the model # ##################### net = load_network(args.network) print("Loaded network: ") print_network(net) # allocate symbolic variables for theano graph computations X_batch = T.tensor4('x') data = np.load(args.data) if args.mean_file: mean = np.load(args.mean_file) if args.mean_file: data = data - mean x_test = np.rollaxis(data, 3, 1) # allocate shared variables for images, labels and learing rate x_shared = theano.shared(np.zeros((x_test.shape[0], 3, IMAGE_SIZE, IMAGE_SIZE), dtype=theano.config.floatX),
def train(self): # load dataset train_data_loader = self.load_dataset(dataset=self.train_dataset, is_train=True) test_data_loader = self.load_dataset(dataset=self.test_dataset[0], is_train=False) # networks self.G = Generator(num_channels=self.num_channels, base_filter=64, num_residuals=16) self.D = Discriminator(num_channels=self.num_channels, base_filter=64, image_size=self.crop_size) # weigh initialization self.G.weight_init() self.D.weight_init() # For the content loss self.feature_extractor = FeatureExtractor( models.vgg19(pretrained=True)) # optimizer self.G_optimizer = optim.Adam(self.G.parameters(), lr=self.lr, betas=(0.9, 0.999)) # self.D_optimizer = optim.Adam(self.D.parameters(), lr=self.lr, betas=(0.9, 0.999)) self.D_optimizer = optim.SGD(self.D.parameters(), lr=self.lr / 100, momentum=0.9, nesterov=True) # loss function if self.gpu_mode: self.G.cuda() self.D.cuda() self.feature_extractor.cuda() self.MSE_loss = nn.MSELoss().cuda() self.BCE_loss = nn.BCELoss().cuda() else: self.MSE_loss = nn.MSELoss() self.BCE_loss = nn.BCELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('----------------------------------------------') # set the logger G_log_dir = os.path.join(self.save_dir, 'G_logs') if not os.path.exists(G_log_dir): os.mkdir(G_log_dir) G_logger = Logger(G_log_dir) D_log_dir = os.path.join(self.save_dir, 'D_logs') if not os.path.exists(D_log_dir): os.mkdir(D_log_dir) D_logger = Logger(D_log_dir) ################# Pre-train generator ################# self.epoch_pretrain = 50 # Load pre-trained parameters of generator if not self.load_model(is_pretrain=True): # Pre-training generator for 50 epochs print('Pre-training is started.') self.G.train() for epoch in range(self.epoch_pretrain): for iter, (lr, hr, _) in enumerate(train_data_loader): # input data (low resolution image) if self.num_channels == 1: x_ = Variable( utils.norm(hr[:, 0].unsqueeze(1), vgg=True)) y_ = Variable( utils.norm(lr[:, 0].unsqueeze(1), vgg=True)) else: x_ = Variable(utils.norm(hr, vgg=True)) y_ = Variable(utils.norm(lr, vgg=True)) if self.gpu_mode: x_ = x_.cuda() y_ = y_.cuda() # Train generator self.G_optimizer.zero_grad() recon_image = self.G(y_) # Content losses content_loss = self.MSE_loss(recon_image, x_) # Back propagation G_loss_pretrain = content_loss G_loss_pretrain.backward() self.G_optimizer.step() # log print("Epoch: [%2d] [%4d/%4d] G_loss_pretrain: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), G_loss_pretrain.item())) print('Pre-training is finished.') # Save pre-trained parameters of generator self.save_model(is_pretrain=True) ################# Adversarial train ################# print('Training is started.') # Avg. losses G_avg_loss = [] D_avg_loss = [] step = 0 # test image test_lr, test_hr, test_bc = test_data_loader.dataset.__getitem__(2) test_lr = test_lr.unsqueeze(0) test_hr = test_hr.unsqueeze(0) test_bc = test_bc.unsqueeze(0) self.G.train() self.D.train() for epoch in range(self.num_epochs): # learning rate is decayed by a factor of 10 every 20 epoch if (epoch + 1) % 20 == 0: for param_group in self.G_optimizer.param_groups: param_group["lr"] /= 10.0 print("Learning rate decay for G: lr={}".format( self.G_optimizer.param_groups[0]["lr"])) for param_group in self.D_optimizer.param_groups: param_group["lr"] /= 10.0 print("Learning rate decay for D: lr={}".format( self.D_optimizer.param_groups[0]["lr"])) G_epoch_loss = 0 D_epoch_loss = 0 for iter, (lr, hr, _) in enumerate(train_data_loader): # input data (low resolution image) mini_batch = lr.size()[0] if self.num_channels == 1: x_ = Variable(utils.norm(hr[:, 0].unsqueeze(1), vgg=True)) y_ = Variable(utils.norm(lr[:, 0].unsqueeze(1), vgg=True)) else: x_ = Variable(utils.norm(hr, vgg=True)) y_ = Variable(utils.norm(lr, vgg=True)) if self.gpu_mode: x_ = x_.cuda() y_ = y_.cuda() # labels real_label = Variable(torch.ones(mini_batch).cuda()) fake_label = Variable(torch.zeros(mini_batch).cuda()) else: # labels real_label = Variable(torch.ones(mini_batch)) fake_label = Variable(torch.zeros(mini_batch)) # Reset gradient self.D_optimizer.zero_grad() # Train discriminator with real data D_real_decision = self.D(x_) D_real_loss = self.BCE_loss(D_real_decision[:, 0], real_label) # Train discriminator with fake data recon_image = self.G(y_) D_fake_decision = self.D(recon_image) D_fake_loss = self.BCE_loss(D_fake_decision[:, 0], fake_label) D_loss = D_real_loss + D_fake_loss # Back propagation D_loss.backward() self.D_optimizer.step() # Reset gradient self.G_optimizer.zero_grad() # Train generator recon_image = self.G(y_) D_fake_decision = self.D(recon_image) # Adversarial loss GAN_loss = self.BCE_loss(D_fake_decision[:, 0], real_label) # Content losses mse_loss = self.MSE_loss(recon_image, x_) x_VGG = Variable(utils.norm(hr, vgg=True).cuda()) recon_VGG = Variable( utils.norm(recon_image.data, vgg=True).cuda()) real_feature = self.feature_extractor(x_VGG) fake_feature = self.feature_extractor(recon_VGG) vgg_loss = self.MSE_loss(fake_feature, real_feature.detach()) # Back propagation G_loss = mse_loss + 6e-3 * vgg_loss + 1e-3 * GAN_loss G_loss.backward() self.G_optimizer.step() # log G_epoch_loss += G_loss.item() D_epoch_loss += D_loss.item() print("Epoch: [%2d] [%4d/%4d] G_loss: %.8f, D_loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), G_loss.item(), D_loss.item())) # tensorboard logging #G_logger.scalar_summary('losses', G_loss.item(), step + 1) #D_logger.scalar_summary('losses', D_loss.item(), step + 1) step += 1 # avg. loss per epoch G_avg_loss.append(G_epoch_loss / len(train_data_loader)) D_avg_loss.append(D_epoch_loss / len(train_data_loader)) # prediction if self.num_channels == 1: y_ = Variable(utils.norm(test_lr[:, 0].unsqueeze(1), vgg=True)) else: y_ = Variable(utils.norm(test_lr, vgg=True)) if self.gpu_mode: y_ = y_.cuda() recon_img = self.G(y_) sr_img = utils.denorm(recon_img[0].cpu().data, vgg=True) # save result image save_dir = os.path.join(self.save_dir, 'train_result') utils.save_img(sr_img, epoch + 1, save_dir=save_dir, is_training=True) print('Result image at epoch %d is saved.' % (epoch + 1)) # Save trained parameters of model if (epoch + 1) % self.save_epochs == 0: self.save_model(epoch + 1) # calculate psnrs if self.num_channels == 1: gt_img = test_hr[0][0].unsqueeze(0) lr_img = test_lr[0][0].unsqueeze(0) bc_img = test_bc[0][0].unsqueeze(0) else: gt_img = test_hr[0] lr_img = test_lr[0] bc_img = test_bc[0] bc_psnr = utils.PSNR(bc_img, gt_img) recon_psnr = utils.PSNR(sr_img, gt_img) # plot result images result_imgs = [gt_img, lr_img, bc_img, sr_img] psnrs = [None, None, bc_psnr, recon_psnr] utils.plot_test_result(result_imgs, psnrs, self.num_epochs, save_dir=save_dir, is_training=True) print('Training result image is saved.') # Plot avg. loss utils.plot_loss([G_avg_loss, D_avg_loss], self.num_epochs, save_dir=self.save_dir) print("Training is finished.") # Save final trained parameters of model self.save_model(epoch=None)
opt = parser.parse_args() print(opt) if torch.cuda.is_available() and not opt.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") ###### Definition of variables ###### # Networks netG_A2B = Generator() netG_B2A = Generator() netD_A = Discriminator() netD_B = Discriminator() print('---------- Networks initialized -------------') print_network(netG_A2B) print_network(netG_B2A) print_network(netD_A) print_network(netD_B) print('-----------------------------------------------') if opt.cuda: netG_A2B.cuda() netG_B2A.cuda() netD_A.cuda() netD_B.cuda() netG_A2B.apply(weights_init_normal) netG_B2A.apply(weights_init_normal) netD_A.apply(weights_init_normal) netD_B.apply(weights_init_normal)
def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.z_dim = 62 self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) self.sample_num = self.len_discrete_code**2 # load dataset self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) data = self.data_loader.__iter__().__next__()[0] # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size, len_discrete_code=self.len_discrete_code, len_continuous_code=self.len_continuous_code) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size, len_discrete_code=self.len_discrete_code, len_continuous_code=self.len_continuous_code) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain( self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(self.len_discrete_code): self.sample_z_[i * self.len_discrete_code] = torch.rand( 1, self.z_dim) for j in range(1, self.len_discrete_code): self.sample_z_[i * self.len_discrete_code + j] = self.sample_z_[i * self.len_discrete_code] temp = torch.zeros((self.len_discrete_code, 1)) for i in range(self.len_discrete_code): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(self.len_discrete_code): temp_y[i * self.len_discrete_code:(i + 1) * self.len_discrete_code] = temp self.sample_y_ = torch.zeros( (self.sample_num, self.len_discrete_code)).scatter_(1, temp_y.type(torch.LongTensor), 1) self.sample_c_ = torch.zeros( (self.sample_num, self.len_continuous_code)) # manipulating two continuous code self.sample_z2_ = torch.rand( (1, self.z_dim)).expand(self.sample_num, self.z_dim) self.sample_y2_ = torch.zeros(self.sample_num, self.len_discrete_code) self.sample_y2_[:, 0] = 1 temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) for i in range(self.len_discrete_code): for j in range(self.len_discrete_code): self.sample_c2_[i * self.len_discrete_code + j, 0] = temp_c[i] self.sample_c2_[i * self.len_discrete_code + j, 1] = temp_c[j] if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ self.sample_z_.cuda(), self.sample_y_.cuda(), self.sample_c_.cuda(), self.sample_z2_.cuda(), \ self.sample_y2_.cuda(), self.sample_c2_.cuda()
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if torch.backends.cudnn.enabled: torch.backends.cudnn.benchmark = True prepare_result() make_edge_promoting_img() # data_loader src_transform = transforms.Compose([ transforms.Resize((args.input_size, args.input_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) tgt_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) train_loader_src = utils.data_load(os.path.join('data', args.src_data), 'train', src_transform, args.batch_size, shuffle=True, drop_last=True) train_loader_tgt = utils.data_load(os.path.join('data', args.tgt_data), 'pair', tgt_transform, args.batch_size, shuffle=True, drop_last=True) test_loader_src = utils.data_load(os.path.join('data', args.src_data), 'test', src_transform, 1, shuffle=True, drop_last=True) # network G = networks.generator(args.in_ngc, args.out_ngc, args.ngf, args.nb) if args.latest_generator_model != '': if torch.cuda.is_available(): G.load_state_dict(torch.load(args.latest_generator_model)) else: # cpu mode G.load_state_dict( torch.load(args.latest_generator_model, map_location=lambda storage, loc: storage)) D = networks.discriminator(args.in_ndc, args.out_ndc, args.ndf) if args.latest_discriminator_model != '': if torch.cuda.is_available(): D.load_state_dict(torch.load(args.latest_discriminator_model)) else: D.load_state_dict( torch.load(args.latest_discriminator_model, map_location=lambda storage, loc: storage)) VGG = networks.VGG19(init_weights=args.vgg_model, feature_mode=True) G.to(device) D.to(device) VGG.to(device) G.train() D.train() VGG.eval() print('---------- Networks initialized -------------') utils.print_network(G) utils.print_network(D) utils.print_network(VGG) print('-----------------------------------------------') # loss BCE_loss = nn.BCELoss().to(device) L1_loss = nn.L1Loss().to(device) # Adam optimizer G_optimizer = optim.Adam(G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) G_scheduler = optim.lr_scheduler.MultiStepLR( optimizer=G_optimizer, milestones=[args.train_epoch // 2, args.train_epoch // 4 * 3], gamma=0.1) D_scheduler = optim.lr_scheduler.MultiStepLR( optimizer=D_optimizer, milestones=[args.train_epoch // 2, args.train_epoch // 4 * 3], gamma=0.1) pre_train_hist = {} pre_train_hist['Recon_loss'] = [] pre_train_hist['per_epoch_time'] = [] pre_train_hist['total_time'] = [] """ Pre-train reconstruction """ if args.latest_generator_model == '': print('Pre-training start!') start_time = time.time() for epoch in range(args.pre_train_epoch): epoch_start_time = time.time() Recon_losses = [] for x, _ in train_loader_src: x = x.to(device) # train generator G G_optimizer.zero_grad() x_feature = VGG((x + 1) / 2) G_ = G(x) G_feature = VGG((G_ + 1) / 2) Recon_loss = 10 * L1_loss(G_feature, x_feature.detach()) Recon_losses.append(Recon_loss.item()) pre_train_hist['Recon_loss'].append(Recon_loss.item()) Recon_loss.backward() G_optimizer.step() per_epoch_time = time.time() - epoch_start_time pre_train_hist['per_epoch_time'].append(per_epoch_time) print('[%d/%d] - time: %.2f, Recon loss: %.3f' % ((epoch + 1), args.pre_train_epoch, per_epoch_time, torch.mean(torch.FloatTensor(Recon_losses)))) total_time = time.time() - start_time pre_train_hist['total_time'].append(total_time) with open(os.path.join(args.name + '_results', 'pre_train_hist.pkl'), 'wb') as f: pickle.dump(pre_train_hist, f) with torch.no_grad(): G.eval() for n, (x, _) in enumerate(train_loader_src): x = x.to(device) G_recon = G(x) result = torch.cat((x[0], G_recon[0]), 2) path = os.path.join( args.name + '_results', 'Reconstruction', args.name + '_train_recon_' + str(n + 1) + '.png') plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2) if n == 4: break for n, (x, _) in enumerate(test_loader_src): x = x.to(device) G_recon = G(x) result = torch.cat((x[0], G_recon[0]), 2) path = os.path.join( args.name + '_results', 'Reconstruction', args.name + '_test_recon_' + str(n + 1) + '.png') plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2) if n == 4: break else: print('Load the latest generator model, no need to pre-train') train_hist = {} train_hist['Disc_loss'] = [] train_hist['Gen_loss'] = [] train_hist['Con_loss'] = [] train_hist['per_epoch_time'] = [] train_hist['total_time'] = [] print('training start!') start_time = time.time() real = torch.ones(args.batch_size, 1, args.input_size // 4, args.input_size // 4).to(device) fake = torch.zeros(args.batch_size, 1, args.input_size // 4, args.input_size // 4).to(device) for epoch in range(args.train_epoch): epoch_start_time = time.time() G.train() Disc_losses = [] Gen_losses = [] Con_losses = [] for (x, _), (y, _) in zip(train_loader_src, train_loader_tgt): e = y[:, :, :, args.input_size:] y = y[:, :, :, :args.input_size] x, y, e = x.to(device), y.to(device), e.to(device) # train D D_optimizer.zero_grad() D_real = D(y) D_real_loss = BCE_loss(D_real, real) G_ = G(x) D_fake = D(G_) D_fake_loss = BCE_loss(D_fake, fake) D_edge = D(e) D_edge_loss = BCE_loss(D_edge, fake) Disc_loss = D_real_loss + D_fake_loss + D_edge_loss Disc_losses.append(Disc_loss.item()) train_hist['Disc_loss'].append(Disc_loss.item()) Disc_loss.backward() D_optimizer.step() # train G G_optimizer.zero_grad() G_ = G(x) D_fake = D(G_) D_fake_loss = BCE_loss(D_fake, real) x_feature = VGG((x + 1) / 2) G_feature = VGG((G_ + 1) / 2) Con_loss = args.con_lambda * L1_loss(G_feature, x_feature.detach()) Gen_loss = D_fake_loss + Con_loss Gen_losses.append(D_fake_loss.item()) train_hist['Gen_loss'].append(D_fake_loss.item()) Con_losses.append(Con_loss.item()) train_hist['Con_loss'].append(Con_loss.item()) Gen_loss.backward() G_optimizer.step() G_scheduler.step() D_scheduler.step() per_epoch_time = time.time() - epoch_start_time train_hist['per_epoch_time'].append(per_epoch_time) print( '[%d/%d] - time: %.2f, Disc loss: %.3f, Gen loss: %.3f, Con loss: %.3f' % ((epoch + 1), args.train_epoch, per_epoch_time, torch.mean(torch.FloatTensor(Disc_losses)), torch.mean(torch.FloatTensor(Gen_losses)), torch.mean(torch.FloatTensor(Con_losses)))) if epoch % 2 == 1 or epoch == args.train_epoch - 1: with torch.no_grad(): G.eval() for n, (x, _) in enumerate(train_loader_src): x = x.to(device) G_recon = G(x) result = torch.cat((x[0], G_recon[0]), 2) path = os.path.join( args.name + '_results', 'Transfer', str(epoch + 1) + '_epoch_' + args.name + '_train_' + str(n + 1) + '.png') plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2) if n == 4: break for n, (x, _) in enumerate(test_loader_src): x = x.to(device) G_recon = G(x) result = torch.cat((x[0], G_recon[0]), 2) path = os.path.join( args.name + '_results', 'Transfer', str(epoch + 1) + '_epoch_' + args.name + '_test_' + str(n + 1) + '.png') plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2) if n == 4: break torch.save( G.state_dict(), os.path.join(args.name + '_results', 'generator_latest.pkl')) torch.save( D.state_dict(), os.path.join(args.name + '_results', 'discriminator_latest.pkl')) total_time = time.time() - start_time train_hist['total_time'].append(total_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (torch.mean(torch.FloatTensor( train_hist['per_epoch_time'])), args.train_epoch, total_time)) print("Training finish!... save training results") torch.save(G.state_dict(), os.path.join(args.name + '_results', 'generator_param.pkl')) torch.save(D.state_dict(), os.path.join(args.name + '_results', 'discriminator_param.pkl')) with open(os.path.join(args.name + '_results', 'train_hist.pkl'), 'wb') as f: pickle.dump(train_hist, f)
def __init__(self, args): # parameters self.root = args.root self.epoch = args.epoch self.sample_num = 16 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.log_dir = args.log_dir self.z_dim = args.z_dim self.model_name = args.model_name self.load_model = args.load_model self.dataset = args.dataset # load dataset if self.dataset == 'mnist': dset = datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) valid_dset = datasets.MNIST('data/mnist', train=False, download=True, transform=transforms.Compose( [transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'cifar10': dset = datasets.CIFAR10(root='data/cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ])) valid_dset = datasets.CIFAR10(root='data/cifar10', train=False, download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor(), transforms.Normalize( mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'svhn': # load SVHN dataset (73257, 3, 32, 32) dset = datasets.SVHN(root='data/svhn', split='train', download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ])) valid_dset = datasets.SVHN(root='data/svhn', split='test', download=True, transform=transforms.Compose([ transforms.Scale(64), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': dset = datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) valid_dset = datasets.FashionMNIST('data/fashion-mnist', train=False, download=True, transform=transforms.Compose( [transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': # TODO: add test data dset = utils.load_celebA('data/celebA', transform=transforms.Compose([ transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor() ])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) # image dimensions if self.dataset == 'svhn': self.height, self.width = dset.data.shape[2:4] self.pix_level = dset.data.shape[1] else: self.height, self.width = dset.train_data.shape[1:3] if len(dset.train_data.shape) == 3: self.pix_level = 1 elif self.dataset == 'cifar10': self.height = 64 self.width = 64 self.pix_level = dset.train_data.shape[3] elif len(dset.train_data.shape) == 4: self.pix_level = dset.train_data.shape[3] # networks init self.G = Generator(self.z_dim, self.pix_level) self.E = Encoder(self.z_dim, self.pix_level) self.D = Discriminator(self.pix_level) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.E_optimizer = optim.Adam(self.E.parameters(), lr=args.lrE, betas=(args.beta1, args.beta2)) if torch.cuda.is_available(): self.G.cuda() self.D.cuda() self.E.cuda() self.BCE_loss = nn.BCELoss().cuda() else: self.BCE_loss = nn.BCELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) utils.print_network(self.E) print('-----------------------------------------------') # load in saved model if self.load_model: self.load()
def __init__(self, args): # parameters self.epoch = args.epoch self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.z_dim = 62 self.class_num = 11 # last one for balance or imbalance self.minority = 6 self.minority_label = 0.98 # 1-100/5000 self.c = 0.01 # clipping value self.n_critic_g = 5 # the number of iterations of the critic per generator iteration self.n_critic_c = 5 # the number of iterations of the critic per classifier iteration self.use_fake_data = args.use_fake_data self.use_fake_data = args.fake_num if args.fake_num: self.sample_num = args.fake_num else: self.sample_num = (self.class_num - 1)**2 # load dataset # self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) self.data_loader = load_data(self.dataset, args.imbalance, self.batch_size, True, False) # imbalanceed dataset data = self.data_loader.__iter__().__next__()[0] # networks init self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size, class_num=self.class_num) self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size, class_num=self.class_num) self.C = classifier(input_dim=data.shape[1], output_dim=self.class_num, input_size=self.input_size, class_num=self.class_num) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.C_optimizer = optim.Adam(self.C.parameters(), lr=args.lrC, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.C.cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) utils.print_network(self.C) print('-----------------------------------------------') # fixed noise(每一组0,1,2,..,9是一样的) & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(self.class_num - 1): self.sample_z_[i * (self.class_num - 1)] = torch.rand( 1, self.z_dim) for j in range(1, self.class_num - 1): self.sample_z_[i * (self.class_num - 1) + j] = self.sample_z_[i * (self.class_num - 1)] if self.use_fake_data: self.temp_y = torch.ones((self.sample_num, 1)) * self.minority else: temp = torch.zeros((self.class_num - 1, 1)) for i in range(self.class_num - 1): temp[i, 0] = i self.temp_y = torch.zeros((self.sample_num, 1)) for i in range(self.class_num - 1): self.temp_y[i * (self.class_num - 1):(i + 1) * (self.class_num - 1)] = temp self.sample_y_ = torch.zeros( (self.sample_num, self.class_num)).scatter_(1, self.temp_y.type(torch.LongTensor), 1) i = 0 while i < self.sample_num: if self.sample_y_[i][self.minority] == 1: self.sample_y_[i][-1] = self.minority_label i += 1 if self.gpu_mode: self.sample_z_, self.sample_y_ = self.sample_z_.cuda( ), self.sample_y_.cuda()
parser.add_argument("-m", "--mean-file", type=str, default='', help="Path to the file storing network configuration") parser.add_argument("-d", "--data", type=str, default='', help="Data file") args = parser.parse_args() ##################### # Build the model # ##################### net = load_network(args.network) print("Loaded network: ") print_network(net) # allocate symbolic variables for theano graph computations X_batch = T.tensor4('x') data = np.load(args.data) if args.mean_file: mean = np.load(args.mean_file) if args.mean_file: data = data - mean x_test = np.rollaxis(data, 3, 1) # allocate shared variables for images, labels and learing rate x_shared = theano.shared(np.zeros(
def __init__(self, args): self.epoch = args.epoch self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.learning_rate = args.lr self.beta1 = args.beta1 self.beta2 = args.beta2 self.slope = args.slope self.decay = args.decay self.dropout = args.dropout self.network_type = args.network_type self.dataset = args.dataset self.dataset_path = args.dataset_path # BIGAN parameters self.z_dim = args.z_dim #dimension of feature space self.h_dim = args.h_dim #dimension of the hidden layer if args.dataset == 'mnist': self.X_dim = 28 * 28 #dimension of data self.num_channels = 1 elif args.dataset == 'robot_world': self.X_dim = 16 * 16 * 3 #dimension of data self.num_channels = 3 if args.network_type == 'FC': # networks init self.G = Generator_FC(self.z_dim, self.h_dim, self.X_dim) self.D = Discriminator_FC(self.z_dim, self.h_dim, self.X_dim) self.E = Encoder_FC(self.z_dim, self.h_dim, self.X_dim) elif args.network_type == 'CNN': params = { 'slope': self.slope, 'dropout': self.dropout, 'batch_size': self.batch_size, 'num_channels': self.num_channels, 'dataset': self.dataset } self.G = Generator_CNN(self.z_dim, self.h_dim, self.X_dim, params) self.D = Discriminator_CNN(self.z_dim, self.h_dim, self.X_dim, params) self.E = Encoder_CNN(self.z_dim, self.h_dim, self.X_dim, params) else: raise Exception("[!] There is no option for " + args.network_type) if self.gpu_mode: self.G.cuda() self.D.cuda() self.E.cuda() self.G_solver = optim.Adam(chain(self.E.parameters(), self.G.parameters()), lr=self.learning_rate, betas=[self.beta1, self.beta2], weight_decay=self.decay) self.D_solver = optim.Adam(self.D.parameters(), lr=self.learning_rate, betas=[self.beta1, self.beta2], weight_decay=self.decay) print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.E) utils.print_network(self.D) print('-----------------------------------------------')
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 64 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.dataroot_dir = args.dataroot_dir self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load dataset data_dir = os.path.join(self.dataroot_dir, self.dataset) if self.dataset == 'mnist': self.data_loader = DataLoader(datasets.MNIST( data_dir, train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': self.data_loader = DataLoader(datasets.FashionMNIST( data_dir, train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': self.data_loader = utils.CustomDataLoader( data_dir, transform=transforms.Compose([ transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor() ]), batch_size=self.batch_size, shuffle=True) self.z_dim = 62 # fixed noise if self.gpu_mode: self.sample_z_ = Variable(torch.rand( (self.batch_size, self.z_dim)).cuda(), volatile=True) else: self.sample_z_ = Variable(torch.rand( (self.batch_size, self.z_dim)), volatile=True)
def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain(self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) self.sample_c_ = torch.zeros((self.sample_num, self.len_continuous_code)) # manipulating two continuous code temp_z_ = torch.rand((1, self.z_dim)) self.sample_z2_ = temp_z_ for i in range(self.sample_num - 1): self.sample_z2_ = torch.cat([self.sample_z2_, temp_z_]) y = np.zeros(self.sample_num, dtype=np.int64) y_one_hot = np.zeros((self.sample_num, self.len_discrete_code)) y_one_hot[np.arange(self.sample_num), y] = 1 self.sample_y2_ = torch.from_numpy(y_one_hot).type(torch.FloatTensor) temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) for i in range(10): for j in range(10): self.sample_c2_[i*10+j, 0] = temp_c[i] self.sample_c2_[i*10+j, 1] = temp_c[j] if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True), \ Variable(self.sample_c_.cuda(), volatile=True), Variable(self.sample_z2_.cuda(), volatile=True), \ Variable(self.sample_y2_.cuda(), volatile=True), Variable(self.sample_c2_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True), \ Variable(self.sample_c_, volatile=True), Variable(self.sample_z2_, volatile=True), \ Variable(self.sample_y2_, volatile=True), Variable(self.sample_c2_, volatile=True)
def __init__(self, args): # parameters self.root = args.root self.epoch = args.epoch self.sample_num = 16 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.z_dim = args.z_dim self.model_name = args.model_name + '_7' self.load_model = args.load_model self.args = args # load dataset if self.dataset == 'mnist': dset = datasets.MNIST('data/mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) valid_dset = datasets.MNIST('data/mnist', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=64, shuffle=True) elif self.dataset == 'emnist': dset = datasets.EMNIST('data/emnist', split='balanced', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) valid_dset = datasets.EMNIST('data/emnist', split='balanced', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'cifar10': dset = datasets.CIFAR10(root='data/mnist', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) valid_dset = datasets.CIFAR10(root='data/mnist', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'svhn': dset = datasets.SVHN(root='data/svhn', split='train', download=True, transform=transforms.Compose([transforms.ToTensor()])) valid_dset = datasets.SVHN(root='data/svhn', split='test', download=True, transform=transforms.Compose([transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader(valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'fashion-mnist': dset = datasets.FashionMNIST('data/fashion-mnist', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) valid_dset = datasets.FashionMNIST('data/fashion-mnist', train=False, download=True, transform=transforms.Compose( [transforms.ToTensor()])) self.data_loader = DataLoader( dset, batch_size=self.batch_size, shuffle=True) self.valid_loader = DataLoader( valid_dset, batch_size=self.batch_size, shuffle=True) elif self.dataset == 'celebA': # TODO: add test data dset = utils.load_celebA('data/celebA', transform=transforms.Compose( [transforms.CenterCrop(160), transforms.Scale(64), transforms.ToTensor()])) self.data_loader = DataLoader(dset, batch_size=self.batch_size, shuffle=True) # image dimensions if self.dataset == 'svhn': self.height, self.width = dset.data.shape[2:4] self.pix_level = dset.data.shape[1] else: self.height, self.width = dset.train_data.shape[1:3] if len(dset.train_data.shape) == 3: self.pix_level = 1 # elif self.dataset == 'cifar10': # self.height = 2* self.height # self.width = 2 * self.width # self.pix_level = dset.train_data.shape[3] elif len(dset.train_data.shape) == 4: self.pix_level = dset.train_data.shape[3] print("Data shape is height:{}, width:{}, and pixel level:{}\n".format(self.height, self.width, self.pix_level)) # networks init self.G = Generator(self.dataset, self.z_dim, self.height, self.width, self.pix_level) self.E = Encoder(self.dataset, self.z_dim, self.height, self.width, self.pix_level) self.D = Discriminator(self.dataset, self.height, self.width, self.pix_level) self.FC = Feature(self.dataset, self.height, self.width, self.pix_level) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1* 1.2, args.beta2)) self.D_optimizer = optim.Adam(chain(self.D.parameters(), self.FC.parameters()), lr=args.lrD, betas=(args.beta1* 1.2, args.beta2)) self.E_optimizer = optim.Adam(self.E.parameters(), lr=args.lrE, betas=(args.beta1* 1.2, args.beta2)) if torch.cuda.is_available(): self.G.cuda() self.D.cuda() self.E.cuda() self.FC.cuda() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) utils.print_network(self.E) utils.print_network(self.FC) print('-----------------------------------------------')
def __init__(self, args): # parameters self.epoch = args.epoch self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.input_size = args.input_size self.z_dim = 2 self.class_num = 10 self.minority = 6 self.use_fake_data = args.use_fake_data self.fake_num = args.fake_num if self.use_fake_data: self.sample_num = self.fake_num else: self.sample_num = self.class_num**2 self.conditional = True # load dataset # self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size) self.data_loader = load_data(self.dataset, imbalance=args.imbalance, batch_size=self.batch_size, shuffle=True) data = self.data_loader.__iter__().__next__()[0] # networks init self.vae = VAE(encoder_layer_sizes=[784, 256], latent_size=self.z_dim, decoder_layer_sizes=[256, 784], conditional=self.conditional, num_labels=self.class_num if self.conditional else 0) self.optimizer = optim.Adam(self.vae.parameters(), lr=0.0002) # self.optimizer = optim.Adam(self.vae.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.loss_fn = loss_fn if self.gpu_mode: self.vae.cuda() print('---------- Networks architecture -------------') utils.print_network(self.vae) print('-----------------------------------------------') # fixed noise(每一组0,1,2,..,9是一样的) & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(self.class_num): self.sample_z_[i * self.class_num] = torch.rand(1, self.z_dim) for j in range(1, self.class_num): self.sample_z_[i * self.class_num + j] = self.sample_z_[i * self.class_num] if self.use_fake_data: self.temp_y = torch.ones((self.sample_num, 1)) * self.minority else: temp = torch.zeros((self.class_num, 1)) for i in range(self.class_num): temp[i, 0] = i self.temp_y = torch.zeros((self.sample_num, 1)) for i in range(self.class_num): self.temp_y[i * self.class_num:(i + 1) * self.class_num] = temp self.sample_y_ = torch.zeros( (self.sample_num, self.class_num)).scatter_(1, self.temp_y.type(torch.LongTensor), 1) if self.gpu_mode: self.sample_z_, self.sample_y_ = self.sample_z_.cuda( ), self.sample_y_.cuda()
def train(self): # load networks************************************************************************ self.Choose_Model(self.Model_index) utils.print_network(self.model) #os.environ["CUDA_VISIBLE_DEVICES"] = '0,1,2,3' os.environ["CUDA_VISIBLE_DEVICES"] = '5,6,7,8' # optimizer self.momentum = 0.9 self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum) self.model.float() # loss function if self.loss_func == 'mse': self.loss = nn.MSELoss() elif self.loss_func == 'ssim': self.loss = pytorch_ssim.SSIM(window_size=11) if self.gpu_mode: self.model = nn.DataParallel(self.model) self.model.cuda() self.loss.cuda() # load dataset train_data_loader = self.load_dataset(dataset='train') #val_data_loader = self.load_dataset(dataset='test') # set the logger log_dir = os.path.join(self.save_dir, 'logs') if not os.path.exists(log_dir): os.mkdir(log_dir) logger = Logger(log_dir) #ckpt_dir = self.ckpt_dir #if not os.path.exists(ckpt_dir): # os.makedirs(ckpt_dir, mode=0o777) ################# Train start################# print('Training is started.') avg_loss = [] step = 0 self.model.train() ### debug ### ### debug end ### for epoch in range(self.num_epochs): epoch_loss = 0 for iter, data in enumerate(train_data_loader): LR = data['img_LR'] HR = data['img_HR'] #only use Y channel input_Y = LR[:, 0:1, :, :] target_Y = HR[:, 0:1, :, :] if self.scale_factor == 4: target_Y = utils.shave( target_Y, border_size=2 * self.scale_factor ) #according to size of the output image passed the network elif self.scale_factor == 6: target_Y = utils.shave(target_Y, border_size=2 * self.scale_factor - 1) elif self.scale_factor == 2: target_Y = utils.shave(target_Y, border_size=2 * self.scale_factor - 1) else: target_Y = utils.shave(target_Y, border_size=2 * self.scale_factor - 2) if self.save_inImg == True: #save the net input image saveinY = (input_Y.numpy()[0, :, :, :].transpose(1, 2, 0) * 255).astype(numpy.uint8) scipy.misc.imsave('lrin.png', saveinY[:, :, 0]) savetarY = ( target_Y.numpy()[0, :, :, :].transpose(1, 2, 0) * 255).astype(numpy.uint8) scipy.misc.imsave('tarin.png', savetarY[:, :, 0]) if self.gpu_mode: target = Variable(target_Y.cuda()) input = Variable(input_Y.cuda()) else: target = Variable(target_Y) # target = Variable(utils.shave(target_Y, border_size=2*self.scale_factor)) input = Variable(input_Y) ############## ORIGINAL ############### self.optimizer.zero_grad() recon_image = self.model(input) # if self.scale_factor ==2: # recon_image = recon_image[:,:,1:-1,1:-1] # elif self.scale_factor == 3: # recon_image = recon_image[:, :, 0:-1, 0:-1] #### SSIM loss ############## # loss = 1-self.loss(recon_image, target) loss = self.loss(recon_image, target) # print loss.data loss.backward() self.optimizer.step() # log epoch_loss += loss.data # tensorboard logging logger.scalar_summary('loss', loss.data, step + 1) step += 1 if epoch % self.save_epochs == 0: #onnx_name = 'x' + str(self.scale_factor) + '_' + self.model_name + '_epoch_' + str(epoch) + '.onnx' #torch.onnx.export(self.model, input, onnx_name, export_params=True, verbose=True) self.save_model(epoch) #save_path = os.path.join(ckpt_dir, "{}_{}.pth".format(self.ckpt_name, epoch)) #torch.save(self.model.state_dict(), save_path) #self.validation(epoch, val_data_loader) avg_loss.append(epoch_loss / len(train_data_loader)) print("Epoch: [%2d] [%4d/%4d] loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), epoch_loss)) # Plot avg. loss utils.plot_loss([avg_loss], self.num_epochs, save_dir=self.save_dir) print("Training is finished.") # Save final trained parameters of model self.save_model(epoch=None)
if args.latest_discriminator_model != '': if torch.cuda.is_available(): D.load_state_dict(torch.load(args.latest_discriminator_model)) else: D.load_state_dict( torch.load(args.latest_discriminator_model, map_location=lambda storage, loc: storage)) VGG = networks.VGG19(init_weights=args.vgg_model, feature_mode=True) G_decoder.to(device) D.to(device) VGG.to(device) G_decoder.train() D.train() VGG.eval() print('---------- Networks initialized -------------') utils.print_network(G_decoder) utils.print_network(D) utils.print_network(VGG) print('-----------------------------------------------') # loss BCE_loss = nn.BCELoss().to(device) L1_loss = nn.L1Loss().to(device) # Adam optimizer G_optimizer = optim.Adam( [para for para in G_decoder.parameters() if para.requires_grad], lr=args.lrG, betas=(args.beta1, args.beta2)) D_optimizer = optim.Adam(D.parameters(), lr=args.lrD,