def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Initialize model and log folders if FLAGS.load_model is None: cur_time = datetime.now().strftime("%Y%m%d-%H%M") else: cur_time = FLAGS.load_model model_dir, log_dir, sample_dir, test_dir = utils.make_folders( is_train=FLAGS.is_train, cur_time=cur_time) # Logger logger = logging.getLogger(__name__) # logger logger.setLevel(logging.INFO) utils.init_logger(logger=logger, log_dir=log_dir, is_train=FLAGS.is_train, name='main') utils.print_main_parameters(logger, flags=FLAGS, is_train=FLAGS.is_train) # Initialize dataset data = Dataset(name=FLAGS.dataset, is_train=FLAGS.is_train, resized_factor=0.25, log_dir=log_dir) # Initialize model if FLAGS.method.lower() == 'wgan-gp': model = WGAN_GP() else: model = DCGAN(image_shape=data.image_shape, data_path=data(), batch_size=FLAGS.batch_size, z_dim=FLAGS.z_dim, lr=FLAGS.learning_rate, beta1=FLAGS.beta1, total_iters=int( np.ceil(FLAGS.epoch * data.num_images / FLAGS.batch_size)), is_train=FLAGS.is_train, log_dir=log_dir) # Intialize solver solver = Solver(model=model, dataset_name=data.name, batch_size=FLAGS.batch_size, z_dim=FLAGS.z_dim, log_dir=log_dir) # Initialize saver saver = tf.train.Saver(max_to_keep=1) if FLAGS.is_train: train(solver, data, saver, logger, sample_dir, model_dir, log_dir) else: test(solver, saver, test_dir, model_dir, log_dir)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Initialize model and log folders: if FLAGS.load_model is None: cur_time = datetime.now().strftime("%Y%m%d-%H%M%S") else: cur_time = FLAGS.load_model model_dir, log_dir, sample_dir, _, test_dir = utils.make_folders( isTrain=FLAGS.is_train, curTime=cur_time, subfolder=os.path.join('generation', FLAGS.method)) # Logger logger = logging.getLogger(__name__) # logger logger.setLevel(logging.INFO) utils.init_logger(logger=logger, logDir=log_dir, isTrain=FLAGS.is_train, name='egmain') print_main_parameters(logger, flags=FLAGS, is_train=FLAGS.is_train) # Initialize dataset data = Dataset(name=FLAGS.dataset, track='Generative_Dataset', isTrain=FLAGS.is_train, resizedFactor=FLAGS.resize_factor, logDir=log_dir) # Initialize model model = Pix2pix(decode_img_shape=data.decode_img_shape, output_shape=data.single_img_shape, num_classes=data.num_classes, data_path=data(is_train=FLAGS.is_train), batch_size=FLAGS.batch_size, lr=FLAGS.learning_rate, total_iters=FLAGS.iters, is_train=FLAGS.is_train, log_dir=log_dir, resize_factor=FLAGS.resize_factor, lambda_1=FLAGS.lambda_1) # Initialize solver solver = Solver(model=model, data=data, is_train=FLAGS.is_train) # Initialize saver saver = tf.compat.v1.train.Saver(max_to_keep=1) if FLAGS.is_train is True: train(solver, saver, logger, model_dir, log_dir, sample_dir) else: test(solver, saver, model_dir, test_dir)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Initialize model and log folders: if FLAGS.load_model is None: cur_time = datetime.now().strftime("%Y%m%d-%H%M%S") else: cur_time = FLAGS.load_model model_dir, log_dir, sample_dir, _, test_dir = utils.make_folders(is_train=FLAGS.is_train, cur_time=cur_time, subfolder='generation') # Logger logger = logging.getLogger(__name__) # logger logger.setLevel(logging.INFO) utils.init_logger(logger=logger, log_dir=log_dir, is_train=FLAGS.is_train, name='main') print_main_parameters(logger, flags=FLAGS, is_train=FLAGS.is_train) # Initialize Session sess = tf.compat.v1.Session() # Initialize dataset data = eg_dataset.Dataset(name='generation', resize_factor=FLAGS.resize_factor, is_train=FLAGS.is_train, log_dir=log_dir, is_debug=False) # Initialize model pix2pix = Pix2pix(input_img_shape=data.input_img_shape, gen_mode=FLAGS.gen_mode, iden_model_dir=FLAGS.load_iden_model, session=sess, lr=FLAGS.learning_rate, total_iters=int(np.ceil((FLAGS.epoch * data.num_train_imgs) / FLAGS.batch_size)), is_train=FLAGS.is_train, log_dir=log_dir, lambda_1=FLAGS.lambda_1, num_class=data.num_seg_class) # Initialize solver solver = Solver(data=data, gen_model=pix2pix, session=sess, flags=FLAGS, log_dir=log_dir) if FLAGS.is_train is True: train(solver, logger, model_dir, log_dir, sample_dir) else: test(solver, model_dir, log_dir, test_dir)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Evaluation optimizers and dropout optimizer_options = [ 'SGDNesterov', 'Adagrad', 'RMSProp', 'AdaDelta', 'Adam' ] dropout_options = [False, True] # Initialize model and log folders if FLAGS.load_model is None: cur_time = datetime.now().strftime("%Y%m%d-%H%M") else: cur_time = FLAGS.load_model model_dir, log_dir = make_folders(is_train=FLAGS.is_train, base=FLAGS.model, cur_time=cur_time) init_logger(log_dir=log_dir, is_train=FLAGS.is_train) if FLAGS.model.lower() == 'logistic' or FLAGS.model.lower( ) == 'neural_network': # Initialize MNIST dataset and print info data = MNIST(log_dir=log_dir) data.info(use_logging=True if FLAGS.is_train else False, show_img=False) # print basic information elif FLAGS.model.lower() == 'cnn': # Initialize CIFAR10 dataset and print info data = CIFAR10(log_dir=log_dir, is_train=FLAGS.is_train) data.info(use_logging=True if FLAGS.is_train else False, show_img=False, smooth=True) data.preprocessing(use_whiten=FLAGS.is_whiten ) # data preprocessing [whiten or subtract_mean] else: raise NotImplementedError if FLAGS.is_train: train(data, optimizer_options, dropout_options, model_dir, log_dir) else: test(data, optimizer_options, dropout_options, model_dir, log_dir)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Initialize model and log folders if FLAGS.load_model is None: cur_time = datetime.now().strftime("%Y%m%d-%H%M") else: cur_time = FLAGS.load_model model_dir, log_dir, sample_dir, test_dir = utils.make_folders( is_train=FLAGS.is_train, cur_time=cur_time) init_logger(log_dir=log_dir, is_train=FLAGS.is_train) # Initilize dataset data = Dataset(name=FLAGS.dataset, log_dir=log_dir) data.info(use_logging=True, log_dir=log_dir) # Initialize session sess = tf.Session() # Initilize model model = Model(input_shape=data.input_shape, output_shape=data.output_shape, lr=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay, total_iters=FLAGS.iters, is_train=FLAGS.is_train, log_dir=log_dir, name='U-Net') # Initilize solver solver = Solver(sess, model, data.mean_value) saver = tf.train.Saver(max_to_keep=1) if FLAGS.is_train: train(data, solver, saver, model_dir, log_dir, sample_dir) else: test(data, solver, saver, model_dir, test_dir)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index # Initialize model and log folders if FLAGS.load_model is None: curTime = datetime.now().strftime("%Y%m%d-%H%M%S") else: curTime = FLAGS.load_model modelDir, logDir, sampleDir, valDir, testDir = utils.make_folders( isTrain=FLAGS.is_train, curTime=curTime, subfolder=FLAGS.method) # Logger logger = logging.getLogger(__name__) # logger logger.setLevel(logging.INFO) utils.init_logger(logger=logger, logDir=logDir, isTrain=FLAGS.is_train, name='main') utils.print_main_parameters(logger, flags=FLAGS, isTrain=FLAGS.is_train) # Initialize dataset data = Dataset(name=FLAGS.dataset, isTrain=FLAGS.is_train, resizedFactor=FLAGS.resize_factor, logDir=logDir) # Initialize model if not 'v5' in FLAGS.method: model = UNet(decodeImgShape=data.decodeImgShape, outputShape=data.singleImgShape, numClasses=data.numClasses, dataPath=data(isTrain=FLAGS.is_train), batchSize=FLAGS.batch_size, lr=FLAGS.learning_rate, weightDecay=FLAGS.weight_decay, totalIters=FLAGS.iters, isTrain=FLAGS.is_train, logDir=logDir, method=FLAGS.method, multi_test=FLAGS.multi_test, advanced_multi_test=FLAGS.advanced_multi_test, resize_factor=FLAGS.resize_factor, use_dice_loss=FLAGS.use_dice_loss, lambda_one=FLAGS.lambda_one, name='UNet') else: model = DenseUNet(decodeImgShape=data.decodeImgShape, outputShape=data.singleImgShape, numClasses=data.numClasses, dataPath=data(isTrain=FLAGS.is_train), batchSize=FLAGS.batch_size, lr=FLAGS.learning_rate, weightDecay=FLAGS.weight_decay, totalIters=FLAGS.iters, isTrain=FLAGS.is_train, logDir=logDir, method=FLAGS.method, multi_test=FLAGS.multi_test, resize_factor=FLAGS.resize_factor, use_dice_loss=FLAGS.use_dice_loss, use_batch_norm=FLAGS.use_batch_norm, lambda_one=FLAGS.lambda_one, name='DenseUNet') # Initialize solver solver = Solver(model=model, data=data, is_train=FLAGS.is_train, multi_test=FLAGS.multi_test) # Initialize saver saver = tf.compat.v1.train.Saver(max_to_keep=1) if FLAGS.is_train is True: train(solver, saver, logger, modelDir, logDir, sampleDir) else: test(solver, saver, modelDir, valDir, testDir, data)
def train(): from benchmark import calc_fid, extract_feature_from_generator_fn, load_patched_inception_v3, real_image_loader, image_generator, image_generator_perm import lpips from config import IM_SIZE_GAN, BATCH_SIZE_GAN, NFC, NBR_CLS, DATALOADER_WORKERS, EPOCH_GAN, ITERATION_AE, GAN_CKECKPOINT from config import SAVE_IMAGE_INTERVAL, SAVE_MODEL_INTERVAL, LOG_INTERVAL, SAVE_FOLDER, TRIAL_NAME, DATA_NAME, MULTI_GPU from config import FID_INTERVAL, FID_BATCH_NBR, PRETRAINED_AE_PATH from config import data_root_colorful, data_root_sketch_1, data_root_sketch_2, data_root_sketch_3 real_features = None inception = load_patched_inception_v3().cuda() inception.eval() percept = lpips.PerceptualLoss(model='net-lin', net='vgg', use_gpu=True) saved_image_folder = saved_model_folder = None log_file_path = None if saved_image_folder is None: saved_image_folder, saved_model_folder = make_folders( SAVE_FOLDER, 'GAN_' + TRIAL_NAME) log_file_path = saved_image_folder + '/../gan_log.txt' log_file = open(log_file_path, 'w') log_file.close() dataset = PairedMultiDataset(data_root_colorful, data_root_sketch_1, data_root_sketch_2, data_root_sketch_3, im_size=IM_SIZE_GAN, rand_crop=True) print('the dataset contains %d images.' % len(dataset)) dataloader = iter( DataLoader(dataset, BATCH_SIZE_GAN, sampler=InfiniteSamplerWrapper(dataset), num_workers=DATALOADER_WORKERS, pin_memory=True)) from datasets import ImageFolder from datasets import trans_maker_augment as trans_maker dataset_rgb = ImageFolder(data_root_colorful, trans_maker(512)) dataset_skt = ImageFolder(data_root_sketch_3, trans_maker(512)) net_ae = AE(nfc=NFC, nbr_cls=NBR_CLS) if PRETRAINED_AE_PATH is None: PRETRAINED_AE_PATH = 'train_results/' + 'AE_' + TRIAL_NAME + '/models/%d.pth' % ITERATION_AE else: from config import PRETRAINED_AE_ITER PRETRAINED_AE_PATH = PRETRAINED_AE_PATH + '/models/%d.pth' % PRETRAINED_AE_ITER net_ae.load_state_dicts(PRETRAINED_AE_PATH) net_ae.cuda() net_ae.eval() RefineGenerator = None if DATA_NAME == 'celeba': from models import RefineGenerator_face as RefineGenerator elif DATA_NAME == 'art' or DATA_NAME == 'shoe': from models import RefineGenerator_art as RefineGenerator net_ig = RefineGenerator(nfc=NFC, im_size=IM_SIZE_GAN).cuda() net_id = Discriminator(nc=3).cuda( ) # we use the patch_gan, so the im_size for D should be 512 even if training image size is 1024 if MULTI_GPU: net_ae = nn.DataParallel(net_ae) net_ig = nn.DataParallel(net_ig) net_id = nn.DataParallel(net_id) net_ig_ema = copy_G_params(net_ig) opt_ig = optim.Adam(net_ig.parameters(), lr=2e-4, betas=(0.5, 0.999)) opt_id = optim.Adam(net_id.parameters(), lr=2e-4, betas=(0.5, 0.999)) if GAN_CKECKPOINT is not None: ckpt = torch.load(GAN_CKECKPOINT) net_ig.load_state_dict(ckpt['ig']) net_id.load_state_dict(ckpt['id']) net_ig_ema = ckpt['ig_ema'] opt_ig.load_state_dict(ckpt['opt_ig']) opt_id.load_state_dict(ckpt['opt_id']) ## create a log file losses_g_img = AverageMeter() losses_d_img = AverageMeter() losses_mse = AverageMeter() losses_rec_s = AverageMeter() losses_rec_ae = AverageMeter() fixed_skt = fixed_rgb = fixed_perm = None fid = [[0, 0]] for epoch in range(EPOCH_GAN): for iteration in tqdm(range(10000)): rgb_img, skt_img_1, skt_img_2, skt_img_3 = next(dataloader) rgb_img = rgb_img.cuda() rd = random.randint(0, 3) if rd == 0: skt_img = skt_img_1.cuda() elif rd == 1: skt_img = skt_img_2.cuda() else: skt_img = skt_img_3.cuda() if iteration == 0: fixed_skt = skt_img_3[:8].clone().cuda() fixed_rgb = rgb_img[:8].clone() fixed_perm = true_randperm(fixed_rgb.shape[0], 'cuda') ### 1. train D gimg_ae, style_feats = net_ae(skt_img, rgb_img) g_image = net_ig(gimg_ae, style_feats) pred_r = net_id(rgb_img) pred_f = net_id(g_image.detach()) loss_d = d_hinge_loss(pred_r, pred_f) net_id.zero_grad() loss_d.backward() opt_id.step() loss_rec_ae = F.mse_loss(gimg_ae, rgb_img) + F.l1_loss( gimg_ae, rgb_img) losses_rec_ae.update(loss_rec_ae.item(), BATCH_SIZE_GAN) ### 2. train G pred_g = net_id(g_image) loss_g = g_hinge_loss(pred_g) if DATA_NAME == 'shoe': loss_mse = 10 * (F.l1_loss(g_image, rgb_img) + F.mse_loss(g_image, rgb_img)) else: loss_mse = 10 * percept( F.adaptive_avg_pool2d(g_image, output_size=256), F.adaptive_avg_pool2d(rgb_img, output_size=256)).sum() losses_mse.update(loss_mse.item() / BATCH_SIZE_GAN, BATCH_SIZE_GAN) loss_all = loss_g + loss_mse if DATA_NAME == 'shoe': ### the grey image reconstruction perm = true_randperm(BATCH_SIZE_GAN) img_ae_perm, style_feats_perm = net_ae(skt_img, rgb_img[perm]) gimg_grey = net_ig(img_ae_perm, style_feats_perm) gimg_grey = gimg_grey.mean(dim=1, keepdim=True) real_grey = rgb_img.mean(dim=1, keepdim=True) loss_rec_grey = F.mse_loss(gimg_grey, real_grey) loss_all += 10 * loss_rec_grey net_ig.zero_grad() loss_all.backward() opt_ig.step() for p, avg_p in zip(net_ig.parameters(), net_ig_ema): avg_p.mul_(0.999).add_(p.data, alpha=0.001) ### 3. logging losses_g_img.update(pred_g.mean().item(), BATCH_SIZE_GAN) losses_d_img.update(pred_r.mean().item(), BATCH_SIZE_GAN) if iteration % SAVE_IMAGE_INTERVAL == 0: #show the current images with torch.no_grad(): backup_para_g = copy_G_params(net_ig) load_params(net_ig, net_ig_ema) gimg_ae, style_feats = net_ae(fixed_skt, fixed_rgb) gmatch = net_ig(gimg_ae, style_feats) gimg_ae_perm, style_feats = net_ae(fixed_skt, fixed_rgb[fixed_perm]) gmismatch = net_ig(gimg_ae_perm, style_feats) gimg = torch.cat([ F.interpolate(fixed_rgb, IM_SIZE_GAN), F.interpolate(fixed_skt.repeat(1, 3, 1, 1), IM_SIZE_GAN), gmatch, F.interpolate(gimg_ae, IM_SIZE_GAN), gmismatch, F.interpolate(gimg_ae_perm, IM_SIZE_GAN) ]) vutils.save_image( gimg, f'{saved_image_folder}/img_iter_{epoch}_{iteration}.jpg', normalize=True, range=(-1, 1)) del gimg make_matrix( dataset_rgb, dataset_skt, net_ae, net_ig, 5, f'{saved_image_folder}/img_iter_{epoch}_{iteration}_matrix.jpg' ) load_params(net_ig, backup_para_g) if iteration % LOG_INTERVAL == 0: log_msg = 'Iter: [{0}/{1}] G: {losses_g_img.avg:.4f} D: {losses_d_img.avg:.4f} MSE: {losses_mse.avg:.4f} Rec: {losses_rec_s.avg:.5f} FID: {fid:.4f}'.format( epoch, iteration, losses_g_img=losses_g_img, losses_d_img=losses_d_img, losses_mse=losses_mse, losses_rec_s=losses_rec_s, fid=fid[-1][0]) print(log_msg) print('%.5f' % (losses_rec_ae.avg)) if log_file_path is not None: log_file = open(log_file_path, 'a') log_file.write(log_msg + '\n') log_file.close() losses_g_img.reset() losses_d_img.reset() losses_mse.reset() losses_rec_s.reset() losses_rec_ae.reset() if iteration % SAVE_MODEL_INTERVAL == 0 or iteration + 1 == 10000: print('Saving history model') torch.save( { 'ig': net_ig.state_dict(), 'id': net_id.state_dict(), 'ae': net_ae.state_dict(), 'ig_ema': net_ig_ema, 'opt_ig': opt_ig.state_dict(), 'opt_id': opt_id.state_dict(), }, '%s/%d.pth' % (saved_model_folder, epoch)) if iteration % FID_INTERVAL == 0 and iteration > 1: print("calculating FID ...") fid_batch_images = FID_BATCH_NBR if real_features is None: if os.path.exists('%s_fid_feats.npy' % (DATA_NAME)): real_features = pickle.load( open('%s_fid_feats.npy' % (DATA_NAME), 'rb')) else: real_features = extract_feature_from_generator_fn( real_image_loader(dataloader, n_batches=fid_batch_images), inception) real_mean = np.mean(real_features, 0) real_cov = np.cov(real_features, rowvar=False) pickle.dump( { 'feats': real_features, 'mean': real_mean, 'cov': real_cov }, open('%s_fid_feats.npy' % (DATA_NAME), 'wb')) real_features = pickle.load( open('%s_fid_feats.npy' % (DATA_NAME), 'rb')) sample_features = extract_feature_from_generator_fn( image_generator(dataset, net_ae, net_ig, n_batches=fid_batch_images), inception, total=fid_batch_images) cur_fid = calc_fid(sample_features, real_mean=real_features['mean'], real_cov=real_features['cov']) sample_features_perm = extract_feature_from_generator_fn( image_generator_perm(dataset, net_ae, net_ig, n_batches=fid_batch_images), inception, total=fid_batch_images) cur_fid_perm = calc_fid(sample_features_perm, real_mean=real_features['mean'], real_cov=real_features['cov']) fid.append([cur_fid, cur_fid_perm]) print('fid:', fid) if log_file_path is not None: log_file = open(log_file_path, 'a') log_msg = 'fid: %.5f, %.5f' % (fid[-1][0], fid[-1][1]) log_file.write(log_msg + '\n') log_file.close()
def train(netG, netD, opt_G, opt_D, opt_E): D_real = D_fake = D_z_kl = G_real = Z_recon = R_kl = 0 fixed_z = torch.randn(64, Z_DIM).to(device) saved_image_folder, saved_model_folder = make_folders(SAVE_FOLDER, TRIAL_NAME) for n_iter in tqdm.tqdm(range(0, MAX_ITERATION+1)): if n_iter % SAVE_IMAGE_INTERVAL == 0: save_image_from_z(netG, fixed_z, pjoin(saved_image_folder, "z_%d.jpg"%n_iter)) save_image_from_r(netG, R_DIM, pjoin(saved_image_folder, "r_%d.jpg"%n_iter)) if n_iter % SAVE_MODEL_INTERVAL == 0: save_model(netG, netD, pjoin(saved_model_folder, "%d.pth"%n_iter)) ### 0. prepare data real_image = next(dataloader)[0].to(device) z = torch.randn(BATCH_SIZE, Z_DIM).to(device) # e(r|z) as the likelihood of r given z r_sampler = netG.r_sampler(z) g_image = netG.generate(r_sampler.sample()) ### 1. Train Discriminator on real and generated data netD.zero_grad() pred_f = netD.discriminate(g_image.detach()) pred_r, rec_z = netD(real_image) d_loss = loss_bce(torch.sigmoid(pred_r), torch.ones(pred_r.size()).to(device)) \ + loss_bce(torch.sigmoid(pred_f), torch.zeros(pred_f.size()).to(device)) q_loss = KL_Loss(rec_z) #d_loss.backward() total_loss = d_loss + q_loss total_loss.backward() opt_D.step() # record the loss values D_real += torch.sigmoid(pred_r).mean().item() D_fake += torch.sigmoid(pred_f).mean().item() D_z_kl += q_loss.item() ### 2. Train Generator netD.zero_grad() netG.zero_grad() # q(z|x) as the posterior of z given x pred_g, z_posterior = netD(g_image) # GAN loss for generator g_loss = LAMBDA_G * loss_bce(torch.sigmoid(pred_g), torch.ones(pred_g.size()).to(device)) # reconstruction loss of z ## TODO ## question here: as stated in the paper-algorithm-1: this part should be a - log(q(z|x)) instead of mse recon_loss = loss_mse(z_posterior, z) # kl loss between e(r|z) || m(r) as a variational inference #kl_loss = BETA_KL * torch.distributions.kl.kl_divergence(r_likelihood, M_r).mean() kl_loss = BETA_KL * kl_divergence(r_sampler, M_r).mean() total_loss = g_loss + recon_loss + kl_loss total_loss.backward() opt_E.step() opt_G.step() # record the loss values G_real += torch.sigmoid(pred_g).mean().item() Z_recon += recon_loss.item() R_kl += kl_loss.item() if n_iter % LOG_INTERVAL == 0 and n_iter > 0: print("D(x): %.5f D(G(z)): %.5f D_kl: %.5f G(z): %.5f Z_rec: %.5f R_kl: %.5f"%\ (D_real/LOG_INTERVAL, D_fake/LOG_INTERVAL, D_z_kl/LOG_INTERVAL, G_real/LOG_INTERVAL, Z_recon/LOG_INTERVAL, R_kl/LOG_INTERVAL)) D_real = D_fake = D_z_kl = G_real = Z_recon = R_kl = 0
def train(): from config import IM_SIZE_AE, BATCH_SIZE_AE, NFC, NBR_CLS, DATALOADER_WORKERS, ITERATION_AE from config import SAVE_IMAGE_INTERVAL, SAVE_MODEL_INTERVAL, SAVE_FOLDER, TRIAL_NAME, LOG_INTERVAL from config import DATA_NAME from config import data_root_colorful, data_root_sketch_1, data_root_sketch_2, data_root_sketch_3 dataset = PairedMultiDataset(data_root_colorful, data_root_sketch_1, data_root_sketch_2, data_root_sketch_3, im_size=IM_SIZE_AE, rand_crop=True) print(len(dataset)) dataloader = iter(DataLoader(dataset, BATCH_SIZE_AE, \ sampler=InfiniteSamplerWrapper(dataset), num_workers=DATALOADER_WORKERS, pin_memory=True)) dataset_ss = SelfSupervisedDataset(data_root_colorful, data_root_sketch_3, im_size=IM_SIZE_AE, nbr_cls=NBR_CLS, rand_crop=True) print(len(dataset_ss), len(dataset_ss.frame)) dataloader_ss = iter(DataLoader(dataset_ss, BATCH_SIZE_AE, \ sampler=InfiniteSamplerWrapper(dataset_ss), num_workers=DATALOADER_WORKERS, pin_memory=True)) style_encoder = StyleEncoder(nfc=NFC, nbr_cls=NBR_CLS).cuda() content_encoder = ContentEncoder(nfc=NFC).cuda() decoder = Decoder(nfc=NFC).cuda() opt_c = optim.Adam(content_encoder.parameters(), lr=2e-4, betas=(0.5, 0.999)) opt_s = optim.Adam(style_encoder.parameters(), lr=2e-4, betas=(0.5, 0.999)) opt_d = optim.Adam(decoder.parameters(), lr=2e-4, betas=(0.5, 0.999)) style_encoder.reset_cls() style_encoder.final_cls.cuda() from config import PRETRAINED_AE_PATH, PRETRAINED_AE_ITER if PRETRAINED_AE_PATH is not None: PRETRAINED_AE_PATH = PRETRAINED_AE_PATH + '/models/%d.pth' % PRETRAINED_AE_ITER ckpt = torch.load(PRETRAINED_AE_PATH) print(PRETRAINED_AE_PATH) style_encoder.load_state_dict(ckpt['s']) content_encoder.load_state_dict(ckpt['c']) decoder.load_state_dict(ckpt['d']) opt_c.load_state_dict(ckpt['opt_c']) opt_s.load_state_dict(ckpt['opt_s']) opt_d.load_state_dict(ckpt['opt_d']) print('loaded pre-trained AE') style_encoder.reset_cls() style_encoder.final_cls.cuda() opt_s_cls = optim.Adam(style_encoder.final_cls.parameters(), lr=2e-4, betas=(0.5, 0.999)) saved_image_folder, saved_model_folder = make_folders( SAVE_FOLDER, 'AE_' + TRIAL_NAME) log_file_path = saved_image_folder + '/../ae_log.txt' log_file = open(log_file_path, 'w') log_file.close() ## for logging losses_sf_consist = AverageMeter() losses_cf_consist = AverageMeter() losses_cls = AverageMeter() losses_rec_rd = AverageMeter() losses_rec_org = AverageMeter() losses_rec_grey = AverageMeter() import lpips percept = lpips.PerceptualLoss(model='net-lin', net='vgg', use_gpu=True) for iteration in tqdm(range(ITERATION_AE)): if iteration % ( (NBR_CLS * 100) // BATCH_SIZE_AE) == 0 and iteration > 1: dataset_ss._next_set() dataloader_ss = iter( DataLoader(dataset_ss, BATCH_SIZE_AE, sampler=InfiniteSamplerWrapper(dataset_ss), num_workers=DATALOADER_WORKERS, pin_memory=True)) style_encoder.reset_cls() opt_s_cls = optim.Adam(style_encoder.final_cls.parameters(), lr=2e-4, betas=(0.5, 0.999)) opt_s.param_groups[0]['lr'] = 1e-4 opt_d.param_groups[0]['lr'] = 1e-4 ### 1. train the encoder with self-supervision methods rgb_img_rd, rgb_img_org, skt_org, skt_bold, skt_erased, skt_erased_bold, img_idx = next( dataloader_ss) rgb_img_rd = rgb_img_rd.cuda() rgb_img_org = rgb_img_org.cuda() img_idx = img_idx.cuda() skt_org = F.interpolate(skt_org, size=512).cuda() skt_bold = F.interpolate(skt_bold, size=512).cuda() skt_erased = F.interpolate(skt_erased, size=512).cuda() skt_erased_bold = F.interpolate(skt_erased_bold, size=512).cuda() style_encoder.zero_grad() decoder.zero_grad() content_encoder.zero_grad() style_vector_rd, pred_cls_rd = style_encoder(rgb_img_rd) style_vector_org, pred_cls_org = style_encoder(rgb_img_org) content_feats = content_encoder(skt_org) content_feats_bold = content_encoder(skt_bold) content_feats_erased = content_encoder(skt_erased) content_feats_eb = content_encoder(skt_erased_bold) rd = random.randint(0, 3) gimg_rd = None if rd == 0: gimg_rd = decoder(content_feats, style_vector_rd) elif rd == 1: gimg_rd = decoder(content_feats_bold, style_vector_rd) elif rd == 2: gimg_rd = decoder(content_feats_erased, style_vector_rd) elif rd == 3: gimg_rd = decoder(content_feats_eb, style_vector_rd) loss_cf_consist = loss_for_list_perm(F.mse_loss, content_feats_bold, content_feats) +\ loss_for_list_perm(F.mse_loss, content_feats_erased, content_feats) +\ loss_for_list_perm(F.mse_loss, content_feats_eb, content_feats) loss_sf_consist = 0 for loss_idx in range(3): loss_sf_consist += -F.cosine_similarity(style_vector_rd[loss_idx], style_vector_org[loss_idx].detach()).mean() + \ F.cosine_similarity(style_vector_rd[loss_idx], style_vector_org[loss_idx][torch.randperm(BATCH_SIZE_AE)].detach()).mean() loss_cls = F.cross_entropy(pred_cls_rd, img_idx) + F.cross_entropy( pred_cls_org, img_idx) loss_rec_rd = F.mse_loss(gimg_rd, rgb_img_org) if DATA_NAME != 'shoe': loss_rec_rd += percept( F.adaptive_avg_pool2d(gimg_rd, output_size=256), F.adaptive_avg_pool2d(rgb_img_org, output_size=256)).sum() else: loss_rec_rd += F.l1_loss(gimg_rd, rgb_img_org) loss_total = loss_cls + loss_sf_consist + loss_rec_rd + loss_cf_consist #+ loss_kl_c + loss_kl_s loss_total.backward() opt_s.step() opt_s_cls.step() opt_c.step() opt_d.step() ### 2. train as AutoEncoder rgb_img, skt_img_1, skt_img_2, skt_img_3 = next(dataloader) rgb_img = rgb_img.cuda() rd = random.randint(0, 3) if rd == 0: skt_img = skt_img_1 elif rd == 1: skt_img = skt_img_2 else: skt_img = skt_img_3 skt_img = F.interpolate(skt_img, size=512).cuda() style_encoder.zero_grad() decoder.zero_grad() content_encoder.zero_grad() style_vector, _ = style_encoder(rgb_img) content_feats = content_encoder(skt_img) gimg = decoder(content_feats, style_vector) loss_rec_org = F.mse_loss(gimg, rgb_img) if DATA_NAME != 'shoe': loss_rec_org += percept( F.adaptive_avg_pool2d(gimg, output_size=256), F.adaptive_avg_pool2d(rgb_img, output_size=256)).sum() #else: # loss_rec_org += F.l1_loss(gimg, rgb_img) loss_rec = loss_rec_org if DATA_NAME == 'shoe': ### the grey image reconstruction perm = true_randperm(BATCH_SIZE_AE) gimg_perm = decoder(content_feats, [s[perm] for s in style_vector]) gimg_grey = gimg_perm.mean(dim=1, keepdim=True) real_grey = rgb_img.mean(dim=1, keepdim=True) loss_rec_grey = F.mse_loss(gimg_grey, real_grey) loss_rec += loss_rec_grey loss_rec.backward() opt_s.step() opt_d.step() opt_c.step() ### Logging losses_cf_consist.update(loss_cf_consist.mean().item(), BATCH_SIZE_AE) losses_sf_consist.update(loss_sf_consist.mean().item(), BATCH_SIZE_AE) losses_cls.update(loss_cls.mean().item(), BATCH_SIZE_AE) losses_rec_rd.update(loss_rec_rd.item(), BATCH_SIZE_AE) losses_rec_org.update(loss_rec_org.item(), BATCH_SIZE_AE) if DATA_NAME == 'shoe': losses_rec_grey.update(loss_rec_grey.item(), BATCH_SIZE_AE) if iteration % LOG_INTERVAL == 0: log_msg = 'Train Stage 1: AE: \nrec_rd: %.4f rec_org: %.4f cls: %.4f style_consist: %.4f content_consist: %.4f rec_grey: %.4f'%(losses_rec_rd.avg, \ losses_rec_org.avg, losses_cls.avg, losses_sf_consist.avg, losses_cf_consist.avg, losses_rec_grey.avg) print(log_msg) if log_file_path is not None: log_file = open(log_file_path, 'a') log_file.write(log_msg + '\n') log_file.close() losses_sf_consist.reset() losses_cls.reset() losses_rec_rd.reset() losses_rec_org.reset() losses_cf_consist.reset() losses_rec_grey.reset() if iteration % SAVE_IMAGE_INTERVAL == 0: vutils.save_image(torch.cat([ rgb_img_rd, F.interpolate(skt_org.repeat(1, 3, 1, 1), size=512), gimg_rd ]), '%s/rd_%d.jpg' % (saved_image_folder, iteration), normalize=True, range=(-1, 1)) if DATA_NAME != 'shoe': with torch.no_grad(): perm = true_randperm(BATCH_SIZE_AE) gimg_perm = decoder([c for c in content_feats], [s[perm] for s in style_vector]) vutils.save_image(torch.cat([ rgb_img, F.interpolate(skt_img.repeat(1, 3, 1, 1), size=512), gimg, gimg_perm ]), '%s/org_%d.jpg' % (saved_image_folder, iteration), normalize=True, range=(-1, 1)) if iteration % SAVE_MODEL_INTERVAL == 0: print('Saving history model') torch.save( { 's': style_encoder.state_dict(), 'd': decoder.state_dict(), 'c': content_encoder.state_dict(), 'opt_c': opt_c.state_dict(), 'opt_s_cls': opt_s_cls.state_dict(), 'opt_s': opt_s.state_dict(), 'opt_d': opt_d.state_dict(), }, '%s/%d.pth' % (saved_model_folder, iteration)) torch.save( { 's': style_encoder.state_dict(), 'd': decoder.state_dict(), 'c': content_encoder.state_dict(), 'opt_c': opt_c.state_dict(), 'opt_s_cls': opt_s_cls.state_dict(), 'opt_s': opt_s.state_dict(), 'opt_d': opt_d.state_dict(), }, '%s/%d.pth' % (saved_model_folder, ITERATION_AE))
def save_user_image(user_photo): make_folders() input_image_path = os.path.join('input', f'{user_photo.file_id}.jpg') output_image_path = os.path.join('output', f'{user_photo.file_id}.jpg') user_photo.download(input_image_path) return input_image_path, output_image_path