def run(**args): args = EasyDict(args) train = EasyDict(run_func_name = "training.training_loop.training_loop") # training loop options sched = EasyDict() # TrainingSchedule options vis = EasyDict() # visualize.eval() options grid = EasyDict(size = "1080p", layout = "random") # setup_snapshot_img_grid() options sc = dnnlib.SubmitConfig() # dnnlib.submit_run() options # Environment configuration tf_config = { "rnd.np_random_seed": 1000, "allow_soft_placement": True, "gpu_options.per_process_gpu_memory_fraction": 1.0 } if args.gpus != "": num_gpus = len(args.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus # Networks configuration cG = set_net("G", reg_interval = 4) cD = set_net("D", reg_interval = 16) # Dataset configuration ratios = { "clevr": 0.75, "lsun-bedrooms": 0.72, "cityscapes": 0.5, "ffhq": 1.0 } args.ratio = ratios.get(args.dataset, args.ratio) dataset_args = EasyDict(tfrecord_dir = args.dataset, max_imgs = args.max_images, ratio = args.ratio, num_threads = args.num_threads) for arg in ["data_dir", "mirror_augment", "total_kimg"]: cset(train, arg, args[arg]) # Training and Optimizations configuration for arg in ["eval", "train", "recompile", "last_snapshots"]: cset(train, arg, args[arg]) # Round to the closest multiply of minibatch size for validity args.batch_size -= args.batch_size % args.minibatch_size args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size args.latent_size -= args.latent_size % args.component_num if args.latent_size == 0: print(bcolored("Error: latent-size is too small. Must best a multiply of component-num.", "red")) exit() sched_args = { "G_lrate": "g_lr", "D_lrate": "d_lr", "minibatch_size": "batch_size", "minibatch_gpu": "minibatch_size" } for arg, cmd_arg in sched_args.items(): cset(sched, arg, args[cmd_arg]) cset(train, "clip", args.clip) # Logging and metrics configuration metrics = [metric_defaults[x] for x in args.metrics] cset(cG.args, "truncation_psi", args.truncation_psi) for arg in ["summarize", "keep_samples"]: cset(train, arg, args[arg]) # Visualization args.imgs = args.images args.ltnts = args.latents vis_types ["imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix"]: # Set of all the set visualization types option vis.vis_types = {arg for arg in vis_types if args[arg]} vis_args = { "grid": "vis_grid" , "num": "vis_num" , "rich_num": "vis_rich_num", "section_size": "vis_section_size", "intrp_density": "intrpolation_density", "intrp_per_component": "intrpolation_per_component", "alpha": "blending_alpha" } for arg, cmd_arg in vis_args.items(): cset(vis, arg, args[cmd_arg]) # Networks architecture cset(cG.args, "architecture", args.g_arch) cset(cD.args, "architecture", args.d_arch) cset([cG.args, cD.args], "resnet_mlp", args.resnet_mlp) cset(cG.args, "tanh", args.tanh) # Latent sizes if args.component_num > 1 if not (args.attention or args.merge): print(bcolored("Error: component-num > 1 but the model is not using components.", "red")) print(bcolored("Either add --attention for GANsformer or --merge for k-GAN).", "red")) exit() args.latent_size = int(args.latent_size / args.component_num) cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size cset([cG.args, cD.args, train, vis], "component_num", args.component_num) # Mapping network for arg in ["layersnum", "lrmul", "dim", "shared"]: cset(cG.args, arg, args["mapping_{}".formt(arg)]) # StyleGAN settings for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]: cset(cG.args, arg, args[arg]) cD.args.mbstd_group_size = args.minibatch_std_size # GANsformer cset([cG.args, train], "attention", args.transformer) cset(cD.args, "attention", args.d_transformer) cset([cG.args, cD.args], "num_heads", args.num_heads) args.norm = args.normalize for arg in ["norm", "integration", "ltnt_gate", "img_gate", "kmeans", "kmeans_iters", "asgn_direct", "mapping_ltnt2ltnt"]: cset(cG.args, arg, args[arg]) for arg in ["attention_inputs", "use_pos"]: cset([cG.args, cD.args], arg, args[arg]) # Positional encoding for arg in ["dim", "init", "directions_num"]: field = "pos_{}".format(arg) cset([cG.args, cD.args], field, args[field]) # k-GAN for arg in ["layer", "type", "channelwise"]: field = "merge_{}".format(arg) cset(cG.args, field, args[field]) cset([cG.args, train], "merge", args.merge) # Attention for arg in ["start_res", "end_res", "ltnt2ltnt", "img2img", "local_attention"]: cset(cG.args, arg, args["g_{}".format(arg)]) cset(cD.args, arg, args["d_{}".format(arg)]) cset(cG.args, "img2ltnt", args.g_img2ltnt) cset(cD.args, "ltnt2img", args.d_ltnt2img) # Mixing and dropout for arg in ["style_mixing", "component_mixing", "component_dropout", "attention_dropout"]: cset(cG.args, arg, args[arg]) # Loss and regularization gloss_args = { "loss_type": "g_loss", "reg_weight": "g_reg_weight" "pathreg": "pathreg", } dloss_args = { "loss_type": "d_loss", "reg_type": "d_reg", "gamma": "gamma" } for arg, cmd_arg in gloss_args.items(): cset(cG.loss_args, arg, args[cmd_arg]) for arg, cmd_arg in dloss_args.items(): cset(cD.loss_args, arg, args[cmd_arg]) ##### Experiments management: # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'. # When we rerun a training or evaluation command it restores the model from that directory by default. # If we wish to restart the model training, we can set --restart and then we will store data in a new # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc. # Find the latest directory that matches the experiment exp_dir = sorted(glob.glob("{}/{}:*".format(args.result_dir, args.expname)))[-1] run_id = int(exp_dir.split(":")[-1]) # If restart, then work over a new directory if args.restart: run_id += 1 run_name = "{}:{0:03d}".format(args.expname, run_id) train.printname = "{} ".format(misc.bold(args.expname)) snapshot, kimg, resume = None, 0, False pkls = sorted(glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name))) # Load a particular snapshot is specified if args.pretrained_pkl: # Soft links support snapshot = glob.glob(args.pretrained_pkl)[0] if os.path.islink(snapshot): snapshot = os.readlink(snapshot) # Extract training step from the snapshot if specified try: kimg = int(snapshot.split("-")[-1].split(".")[0]) except: pass # Find latest snapshot in the directory elif len(pkls) > 0: snapshot = pkls[-1] kimg = int(snapshot.split("-")[-1].split(".")[0]) resume = True if snapshot: print(misc.bcolored("Resuming {}, kimg {}".format(snapshot, kimg), "white")) train.resume_pkl = snapshot train.resume_kimg = kimg else: print("Start model training from scratch.", "white") # Run environment configuration sc.run_dir_root = args.result_dir sc.run_desc = args.expname sc.run_id = run_id sc.run_name = run_name sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(cG = cG, cD = cD) kwargs.update(dataset_args = dataset_args, vis_args = vis, sched_args = sched, grid_args = grid, metric_arg_list = metrics, tf_config = tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.resume = resume # If reload new options from the command line, no need to load the original configuration file kwargs.load_config = not args.reload dnnlib.submit_run(**kwargs)
def run(opt): """Sets-up all of the parameters necessary to start a ProgressiveGAN training job.""" desc = build_job_name( opt) # Description string included in result subdir name. train = EasyDict(run_func_name='training.training_loop.training_loop' ) # Options for training loop. G = EasyDict(func_name='training.networks_progan.G_paper' ) # Options for generator network. D = EasyDict(func_name='training.networks_progan.D_paper' ) # Options for discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. G_loss = EasyDict( func_name='training.loss.G_wgan') # Options for generator loss. D_loss = EasyDict( func_name='training.loss.D_wgan_gp') # Options for discriminator loss. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict( size='1080p', layout='random') # Options for setup_snapshot_image_grid(). submit_config = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = { 'rnd.np_random_seed': opt.seed } # Options for tflib.init_tf(). metrics = [] # Metrics to run during training. if 'FID' in opt.metrics: metrics.append(metric_base.fid50k) if 'PPL' in opt.metrics: metrics.append(metric_base.ppl_zend_v2) train.network_metric_ticks = opt.compute_metrics_ticks train.interp_snapshot_ticks = opt.compute_interp_ticks find_dataset(opt.dataset) # Optionally resume from checkpoint: if opt.resume_exp is not None: results_dir = os.path.join(os.getcwd(), config.result_dir) _resume_pkl = find_pkl(results_dir, opt.resume_exp, opt.resume_snapshot) train.resume_run_id = opt.resume_exp train.resume_snapshot = _resume_pkl train.resume_kimg = int(_resume_pkl.split('.pkl')[0][-6:]) if f'hessian_penalty_{opt.dataset}' not in _resume_pkl and opt.hp_lambda > 0: print( 'When fine-tuning a job that was originally trained without the Hessian Penalty, ' 'hp_start_kimg is relative to the kimg of the checkpoint being resumed from. ' 'Hessian Penalty will be phased-in starting at absolute ' f'kimg={opt.hp_start_kimg + train.resume_kimg}.') opt.hp_start_kimg += train.resume_kimg # Set up dataset hyper-parameters: dataset = EasyDict(tfrecord_dir=os.path.join(os.getcwd(), config.data_dir, opt.dataset), resolution=opt.resolution) train.mirror_augment = False # Set up network hyper-parameters: G.latent_size = opt.nz D.infogan_nz = opt.infogan_nz G.infogan_lambda = opt.infogan_lambda D.infogan_lambda = opt.infogan_lambda # When computing the multi-layer Hessian Penalty, we retrieve intermediate activations by accessing the # corresponding tensor's name. Below are the names of various activations in G that we can retrieve: activation_type = 'norm' progan_generator_layer_index_to_name = { 1: f'4x4/Dense/Post_{activation_type}', 2: f'4x4/Conv/Post_{activation_type}', 3: f'8x8/Conv0_up/Post_{activation_type}', 4: f'8x8/Conv1/Post_{activation_type}', 5: f'16x16/Conv0_up/Post_{activation_type}', 6: f'16x16/Conv1/Post_{activation_type}', 7: f'32x32/Conv0_up/Post_{activation_type}', 8: f'32x32/Conv1/Post_{activation_type}', 9: f'64x64/Conv0_up/Post_{activation_type}', 10: f'64x64/Conv1/Post_{activation_type}', 11: f'128x128/Conv0_up/Post_{activation_type}', 12: f'128x128/Conv1/Post_{activation_type}', 13: 'images_out' # final full-resolution RGB activation } # Convert from layer indices to layer names (which we'll need to compute the Hessian Penalty): layers_to_reg = [ progan_generator_layer_index_to_name[layer_ix] for layer_ix in sorted(opt.layers_to_reg) ] # Store the Hessian Penalty parameters in their own dictionary: HP = EasyDict(hp_lambda=opt.hp_lambda, epsilon=opt.epsilon, num_rademacher_samples=opt.num_rademacher_samples, layers_to_reg=layers_to_reg, warmup_nimg=opt.warmup_kimg * 1000, hp_start_nimg=opt.hp_start_kimg * 1000) # How long to train for (as measured by thousands of real images processed, not gradient steps): train.total_kimg = opt.total_kimg # We ran the original experiments using 4 GPUs per job. If using a different number, # we try to scale batch sizes up or down accordingly in the for-loop below. Note that # using other batch sizes is somewhat untested, though! submit_config.num_gpus = opt.num_gpus sched.minibatch_base = 32 sched.minibatch_dict = { 4: 2048, 8: 1024, 16: 512, 32: 256, 64: 128, 128: 96, 256: 32, 512: 16 } for res, batch_size in sched.minibatch_dict.items(): sched.minibatch_dict[res] = int(batch_size * opt.num_gpus / 4) # Set-up WandB if optionally using it instead of TensorBoard: if opt.dashboard_api == 'wandb': init_wandb(opt=opt, name=desc, group=opt.dataset, entity=opt.wandb_entity) # Start the training job: kwargs = EasyDict(train) kwargs.update(HP_args=HP, G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss) kwargs.update(dataset_args=dataset, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) kwargs.submit_config = copy.deepcopy(submit_config) kwargs.submit_config.run_dir_root = dnnlib.submission.submit.get_template_from_path( config.result_dir) kwargs.submit_config.run_dir_ignore += config.run_dir_ignore kwargs.submit_config.run_desc = desc dnnlib.submit_run(**kwargs)
def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics): train = EasyDict(run_func_name='training.training_loop.training_loop' ) # Options for training loop. G = EasyDict( func_name='training.invGAN.G_main') # Options for generator network. D = EasyDict(func_name='training.invGAN.D_stylegan2' ) # Options for discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg_inv' ) # Options for generator loss. D_loss = EasyDict(func_name='training.loss.D_logistic_r1' ) # Options for discriminator loss. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict( size='8k', layout='random') # Options for setup_snapshot_image_grid(). sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). train.data_dir = data_dir train.total_kimg = total_kimg train.mirror_augment = mirror_augment train.image_snapshot_ticks = train.network_snapshot_ticks = 10 sched.G_lrate_base = sched.D_lrate_base = 0.002 sched.minibatch_size_base = 32 sched.minibatch_gpu_base = 4 D_loss.gamma = 10 G.synthesis_func = 'G_quotient' # G.latents_size = 4096 * 3 G.dlatent_size = 4096 * 3 G.latent_size = 512 G.mapping_fmaps = 512 G.fmap_final = 3 metrics = [metric_defaults[x] for x in metrics] desc = 'InvGan' desc += '-' + dataset dataset_args = EasyDict(tfrecord_dir=dataset) assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus desc += '-%dgpu' % num_gpus assert config_id in _valid_configs desc += '-' + config_id # Configs A-E: Shrink networks to match original StyleGAN. if config_id != 'config-f': G.fmap_base = D.fmap_base = 8 << 10 # Config E: Set gamma to 100 and override G & D architecture. if config_id.startswith('config-e'): D_loss.gamma = 100 # Configs A-D: Enable progressive growing and switch to networks that support it. if config_id in ['config-a', 'config-b', 'config-c', 'config-d']: sched.lod_initial_resolution = 8 sched.G_lrate_base = sched.D_lrate_base = 0.001 sched.G_lrate_dict = sched.D_lrate_dict = { 128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003 } sched.minibatch_size_base = 32 # (default) sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32} sched.minibatch_gpu_base = 4 # (default) sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4} G.synthesis_func = 'G_synthesis_stylegan_revised' D.func_name = 'training.networks_stylegan2.D_stylegan' # Configs A-C: Disable path length regularization. if config_id in ['config-a', 'config-b', 'config-c']: G_loss = EasyDict(func_name='training.loss.G_logistic_ns') # Configs A-B: Disable lazy regularization. if config_id in ['config-a', 'config-b']: train.lazy_regularization = False # Config A: Switch to original StyleGAN networks. if config_id == 'config-a': pass # G = EasyDict(func_name='training.networks_stylegan.G_style') # D = EasyDict(func_name='training.networks_stylegan.D_basic') if gamma is not None: D_loss.gamma = gamma sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss) kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.submit_config.run_dir_root = result_dir kwargs.submit_config.run_desc = desc dnnlib.submit_run(**kwargs)
def run(**args): args = EasyDict(args) train = EasyDict(run_func_name="training.training_loop.training_loop" ) # training loop options sched = EasyDict() # TrainingSchedule options vis = EasyDict() # visualize.eval() options grid = EasyDict(size="1080p", layout="random") # setup_snapshot_img_grid() options sc = dnnlib.SubmitConfig() # dnnlib.submit_run() options # If the flag is specified without arguments (--arg), set to True for arg in [ "summarize", "keep_samples", "style", "fused_modconv", "local_noise" ]: if args[arg] is None: args[arg] = True if not args.train and not args.eval: misc.log( "Warning: Neither --train nor --eval are provided. Therefore, we only print network shapes", "red") if args.gansformer_default: task = args.dataset pretrained = "gdrive:{}-snapshot.pkl".format(task) if pretrained not in pretrained_networks.gdrive_urls: pretrained = None nset(args, "recompile", pretrained is not None) nset(args, "pretrained_pkl", pretrained) nset(args, "mirror_augment", task in ["cityscapes", "ffhq"]) nset(args, "transformer", True) nset(args, "components_num", {"clevr": 8}.get(task, 16)) nset(args, "latent_size", {"clevr": 128}.get(task, 512)) nset(args, "normalize", "layer") nset(args, "integration", "mul") nset(args, "kmeans", True) nset(args, "use_pos", True) nset(args, "mapping_ltnt2ltnt", task != "clevr") nset(args, "style", task != "clevr") nset(args, "g_arch", "resnet") nset(args, "mapping_resnet", True) gammas = {"ffhq": 10, "cities": 20, "clevr": 40, "bedrooms": 100} nset(args, "gamma", gammas.get(task, 10)) if args.baseline == "GAN": nset(args, "style", False) nset(args, "latent_stem", True) if args.baseline == "SAGAN": nset(args, "style", False) nset(args, "latent_stem", True) nset(args, "g_img2img", 5) if args.baseline == "kGAN": nset(args, "kgan", True) nset(args, "merge_layer", 5) nset(args, "merge_type", "softmax") nset(args, "components_num", 8) # Environment configuration tf_config = { "rnd.np_random_seed": 1000, "allow_soft_placement": True, "gpu_options.per_process_gpu_memory_fraction": 1.0 } if args.gpus != "": num_gpus = len(args.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus # Networks configuration cG = set_net("G", reg_interval=4) cD = set_net("D", reg_interval=16) # Dataset configuration # For bedrooms, we choose the most common ratio in the # dataset and crop the other images into that ratio. ratios = { "clevr": 0.75, "bedrooms": 188 / 256, "cityscapes": 0.5, "ffhq": 1.0 } args.ratio = ratios.get(args.dataset, args.ratio) dataset_args = EasyDict(tfrecord_dir=args.dataset, max_imgs=args.train_images_num, num_threads=args.num_threads) for arg in ["data_dir", "mirror_augment", "total_kimg", "ratio"]: cset(train, arg, args[arg]) # Training and Optimizations configuration for arg in ["eval", "train", "recompile", "last_snapshots"]: cset(train, arg, args[arg]) # Round to the closest multiply of minibatch size for validity args.batch_size -= args.batch_size % args.minibatch_size args.minibatch_std_size -= args.minibatch_std_size % args.minibatch_size args.latent_size -= args.latent_size % args.components_num if args.latent_size == 0: misc.error( "--latent-size is too small. Must best a multiply of components-num" ) sched_args = { "G_lrate": "g_lr", "D_lrate": "d_lr", "minibatch_size": "batch_size", "minibatch_gpu": "minibatch_size" } for arg, cmd_arg in sched_args.items(): cset(sched, arg, args[cmd_arg]) cset(train, "clip", args.clip) # Logging and metrics configuration metrics = [metric_defaults[x] for x in args.metrics] cset(cG.args, "truncation_psi", args.truncation_psi) for arg in ["keep_samples", "num_heads"]: cset(vis, arg, args[arg]) for arg in ["summarize", "eval_images_num"]: cset(train, arg, args[arg]) # Visualization args.vis_imgs = args.vis_images args.vis_ltnts = args.vis_latents vis_types = [ "imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix" ] # Set of all the set visualization types option vis.vis_types = {arg for arg in vis_types if args["vis_{}".format(arg)]} vis_args = { "attention": "transformer", "grid": "vis_grid", "num": "vis_num", "rich_num": "vis_rich_num", "section_size": "vis_section_size", "intrp_density": "interpolation_density", # "intrp_per_component": "interpolation_per_component", "alpha": "blending_alpha" } for arg, cmd_arg in vis_args.items(): cset(vis, arg, args[cmd_arg]) # Networks architecture cset(cG.args, "architecture", args.g_arch) cset(cD.args, "architecture", args.d_arch) cset(cG.args, "tanh", args.tanh) # Latent sizes if args.components_num > 1: if not (args.transformer or args.kgan): misc.error( "--components-num > 1 but the model is not using components. " + "Either add --transformer for GANsformer or --kgan for k-GAN.") args.latent_size = int(args.latent_size / args.components_num) cD.args.latent_size = cG.args.latent_size = cG.args.dlatent_size = args.latent_size cset([cG.args, cD.args, vis], "components_num", args.components_num) # Mapping network for arg in ["layersnum", "lrmul", "dim", "resnet", "shared_dim"]: field = "mapping_{}".format(arg) cset(cG.args, field, args[field]) # StyleGAN settings for arg in ["style", "latent_stem", "fused_modconv", "local_noise"]: cset(cG.args, arg, args[arg]) cD.args.mbstd_group_size = args.minibatch_std_size # GANsformer cset(cG.args, "transformer", args.transformer) cset(cD.args, "transformer", args.d_transformer) args.norm = args.normalize for arg in [ "norm", "integration", "ltnt_gate", "img_gate", "iterative", "kmeans", "kmeans_iters", "mapping_ltnt2ltnt" ]: cset(cG.args, arg, args[arg]) for arg in ["use_pos", "num_heads"]: cset([cG.args, cD.args], arg, args[arg]) # Positional encoding for arg in ["dim", "init", "directions_num"]: field = "pos_{}".format(arg) cset([cG.args, cD.args], field, args[field]) # k-GAN for arg in ["layer", "type", "same"]: field = "merge_{}".format(arg) cset(cG.args, field, args[field]) cset([cG.args, train], "merge", args.kgan) if args.kgan and args.transformer: misc.error( "Either have --transformer for GANsformer or --kgan for k-GAN, not both" ) # Attention for arg in ["start_res", "end_res", "ltnt2ltnt", "img2img"]: # , "local_attention" cset(cG.args, arg, args["g_{}".format(arg)]) cset(cD.args, arg, args["d_{}".format(arg)]) cset(cG.args, "img2ltnt", args.g_img2ltnt) # cset(cD.args, "ltnt2img", args.d_ltnt2img) # Mixing and dropout for arg in [ "style_mixing", "component_mixing", "component_dropout", "attention_dropout" ]: cset(cG.args, arg, args[arg]) # Loss and regularization gloss_args = { "loss_type": "g_loss", "reg_weight": "g_reg_weight", # "pathreg": "pathreg", } dloss_args = {"loss_type": "d_loss", "reg_type": "d_reg", "gamma": "gamma"} for arg, cmd_arg in gloss_args.items(): cset(cG.loss_args, arg, args[cmd_arg]) for arg, cmd_arg in dloss_args.items(): cset(cD.loss_args, arg, args[cmd_arg]) ##### Experiments management: # Whenever we start a new experiment we store its result in a directory named 'args.expname:000'. # When we rerun a training or evaluation command it restores the model from that directory by default. # If we wish to restart the model training, we can set --restart and then we will store data in a new # directory: 'args.expname:001' after the first restart, then 'args.expname:002' after the second, etc. # Find the latest directory that matches the experiment exp_dir = sorted(glob.glob("{}/{}-*".format(args.result_dir, args.expname))) run_id = 0 if len(exp_dir) > 0: run_id = int(exp_dir[-1].split("-")[-1]) # If restart, then work over a new directory if args.restart: run_id += 1 run_name = "{}-{:03d}".format(args.expname, run_id) train.printname = "{} ".format(misc.bold(args.expname)) snapshot, kimg, resume = None, 0, False pkls = sorted( glob.glob("{}/{}/network*.pkl".format(args.result_dir, run_name))) # Load a particular snapshot is specified if args.pretrained_pkl is not None and args.pretrained_pkl != "None": # Soft links support if args.pretrained_pkl.startswith("gdrive"): if args.pretrained_pkl not in pretrained_networks.gdrive_urls: misc.error( "--pretrained_pkl {} not available in the catalog (see pretrained_networks.py)" ) snapshot = args.pretrained_pkl else: snapshot = glob.glob(args.pretrained_pkl)[0] if os.path.islink(snapshot): snapshot = os.readlink(snapshot) # Extract training step from the snapshot if specified try: kimg = int(snapshot.split("-")[-1].split(".")[0]) except: pass # Find latest snapshot in the directory elif len(pkls) > 0: snapshot = pkls[-1] kimg = int(snapshot.split("-")[-1].split(".")[0]) resume = True if snapshot: misc.log( "Resuming {}, from {}, kimg {}".format(run_name, snapshot, kimg), "white") train.resume_pkl = snapshot train.resume_kimg = kimg else: misc.log("Start model training from scratch", "white") # Run environment configuration sc.run_dir_root = args.result_dir sc.run_desc = args.expname sc.run_id = run_id sc.run_name = run_name sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(cG=cG, cD=cD) kwargs.update(dataset_args=dataset_args, vis_args=vis, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.resume = resume kwargs.load_config = args.reload dnnlib.submit_run(**kwargs)
def setup_config(run_dir, **args): args = EasyDict(args) # command-line options train = EasyDict(run_dir=run_dir) # training loop options vis = EasyDict(run_dir=run_dir) # visualization loop options if args.reload: config_fn = os.path.join(run_dir, "training_options.json") if os.path.exists(config_fn): # Load config form the experiment existing file (and so ignore command-line arguments) with open(config_fn, "rt") as f: config = json.load(f) return config misc.log( f"Warning: --reload is set for a new experiment {args.expname}," + f" but configuration file to reload from {config_fn} doesn't exist.", "red") # GANformer and baselines default settings # ---------------------------------------------------------------------------- if args.ganformer_default: task = args.dataset nset(args, "mirror_augment", task in ["cityscapes", "ffhq"]) nset(args, "transformer", True) nset(args, "components_num", {"clevr": 8}.get(task, 16)) nset(args, "latent_size", {"clevr": 128}.get(task, 512)) nset(args, "normalize", "layer") nset(args, "integration", "mul") nset(args, "kmeans", True) nset(args, "use_pos", True) nset(args, "mapping_ltnt2ltnt", task != "clevr") nset(args, "style", task != "clevr") nset(args, "g_arch", "resnet") nset(args, "mapping_resnet", True) gammas = {"ffhq": 10, "cityscapes": 20, "clevr": 40, "bedrooms": 100} nset(args, "gamma", gammas.get(task, 10)) if args.baseline == "GAN": nset(args, "style", False) nset(args, "latent_stem", True) ## k-GAN and SAGAN are not currently supported in the pytorch version. ## See the TF version for implementation of these baselines! # if args.baseline == "SAGAN": # nset(args, "style", False) # nset(args, "latent_stem", True) # nset(args, "g_img2img", 5) # if args.baseline == "kGAN": # nset(args, "kgan", True) # nset(args, "merge_layer", 5) # nset(args, "merge_type", "softmax") # nset(args, "components_num", 8) # General setup # ---------------------------------------------------------------------------- # If the flag is specified without arguments (--arg), set to True for arg in [ "cuda_bench", "allow_tf32", "keep_samples", "style", "local_noise" ]: if args[arg] is None: args[arg] = True if not any([args.train, args.eval, args.vis]): misc.log( "Warning: None of --train, --eval or --vis are provided. Therefore, we only print network shapes", "red") for arg in ["train", "eval", "vis", "last_snapshots"]: cset(train, arg, args[arg]) if args.gpus != "": num_gpus = len(args.gpus.split(",")) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not (num_gpus >= 1 and num_gpus & (num_gpus - 1) == 0): misc.error("Number of GPUs must be a power of two") args.num_gpus = num_gpus # CUDA settings for arg in ["batch_size", "batch_gpu", "allow_tf32"]: cset(train, arg, args[arg]) cset(train, "cudnn_benchmark", args.cuda_bench) # Data setup # ---------------------------------------------------------------------------- # For bedrooms, we choose the most common ratio in the # dataset and crop the other images into that ratio. ratios = { "clevr": 0.75, "bedrooms": 188 / 256, "cityscapes": 0.5, "ffhq": 1.0 } args.ratio = args.ratio or ratios.get(args.dataset, 1.0) args.crop_ratio = 0.5 if args.resolution > 256 and args.ratio < 0.5 else None args.printname = args.expname for arg in ["total_kimg", "printname"]: cset(train, arg, args[arg]) dataset_args = EasyDict(class_name="training.dataset.ImageFolderDataset", path=f"{args.data_dir}/{args.dataset}", max_items=args.train_images_num, resolution=args.resolution, ratio=args.ratio, mirror_augment=args.mirror_augment) dataset_args.loader_args = EasyDict(num_workers=args.num_threads, pin_memory=True, prefetch_factor=2) # Optimization setup # ---------------------------------------------------------------------------- cG = set_net("Generator", ["mapping", "synthesis"], args.g_lr, 4) cD = set_net("Discriminator", ["mapping", "block", "epilogue"], args.d_lr, 16) cset([cG, cD], "crop_ratio", args.crop_ratio) mbstd = min( args.batch_gpu, 4 ) # other hyperparams behave more predictably if mbstd group size remains fixed cset(cD.epilogue_kwargs, "mbstd_group_size", mbstd) # Automatic tuning if args.autotune: batch_size = max( min(args.num_gpus * min(4096 // args.resolution, 32), 64), args.num_gpus) # keep gpu memory consumption at bay batch_gpu = args.batch_size // args.num_gpus nset(args, "batch_size", batch_size) nset(args, "batch_gpu", batch_gpu) fmap_decay = 1 if args.resolution >= 512 else 0.5 # other hyperparams behave more predictably if mbstd group size remains fixed lr = 0.002 if args.resolution >= 1024 else 0.0025 gamma = 0.0002 * (args.resolution** 2) / args.batch_size # heuristic formula cset([cG.synthesis_kwargs, cD], "dim_base", int(fmap_decay * 32768)) nset(args, "g_lr", lr) cset(cG.opt_args, "lr", args.g_lr) nset(args, "d_lr", lr) cset(cD.opt_args, "lr", args.d_lr) nset(args, "gamma", gamma) train.ema_rampup = 0.05 train.ema_kimg = batch_size * 10 / 32 if args.batch_size % (args.batch_gpu * args.num_gpus) != 0: misc.error( "--batch-size should be divided by --batch-gpu * 'num_gpus'") # Loss and regularization settings loss_args = EasyDict(class_name="training.loss.StyleGAN2Loss", g_loss=args.g_loss, d_loss=args.d_loss, r1_gamma=args.gamma, pl_weight=args.pl_weight) # if args.fp16: # cset([cG.synthesis_kwargs, cD], "num_fp16_layers", 4) # enable mixed-precision training # cset([cG.synthesis_kwargs, cD], "conv_clamp", 256) # clamp activations to avoid float16 overflow # cset([cG.synthesis_kwargs, cD.block_args], "fp16_channels_last", args.nhwc) # Evaluation and visualization # ---------------------------------------------------------------------------- from metrics import metric_main for metric in args.metrics: if not metric_main.is_valid_metric(metric): misc.error( f"Unknown metric: {metric}. The valid metrics are: {metric_main.list_valid_metrics()}" ) for arg in ["num_gpus", "metrics", "eval_images_num", "truncation_psi"]: cset(train, arg, args[arg]) for arg in ["keep_samples", "num_heads"]: cset(vis, arg, args[arg]) args.vis_imgs = args.vis_images args.vis_ltnts = args.vis_latents vis_types = [ "imgs", "ltnts", "maps", "layer_maps", "interpolations", "noise_var", "style_mix" ] # Set of all the set visualization types option vis.vis_types = list({arg for arg in vis_types if args[f"vis_{arg}"]}) vis_args = { "attention": "transformer", "grid": "vis_grid", "num": "vis_num", "rich_num": "vis_rich_num", "section_size": "vis_section_size", "intrp_density": "interpolation_density", # "intrp_per_component": "interpolation_per_component", "alpha": "blending_alpha" } for arg, cmd_arg in vis_args.items(): cset(vis, arg, args[cmd_arg]) # Networks setup # ---------------------------------------------------------------------------- # Networks architecture cset(cG.synthesis_kwargs, "architecture", args.g_arch) cset(cD, "architecture", args.d_arch) # Latent sizes if args.components_num > 0: if not args.transformer: # or args.kgan): misc.error( "--components-num > 0 but the model is not using components. " + "Add --transformer for GANformer (which uses latent components)." ) if args.latent_size % args.components_num != 0: misc.error( f"--latent-size ({args.latent_size}) should be divisible by --components-num (k={k})" ) args.latent_size = int(args.latent_size / args.components_num) cG.z_dim = cG.w_dim = args.latent_size cset([cG, vis], "k", args.components_num + 1) # We add a component to modulate features globally # Mapping network args.mapping_layer_dim = args.mapping_dim for arg in ["num_layers", "layer_dim", "resnet", "shared", "ltnt2ltnt"]: field = f"mapping_{arg}" cset(cG.mapping_kwargs, arg, args[field]) # StyleGAN settings for arg in ["style", "latent_stem", "local_noise"]: cset(cG.synthesis_kwargs, arg, args[arg]) # GANformer cset([cG.synthesis_kwargs, cG.mapping_kwargs], "transformer", args.transformer) # Attention related settings for arg in ["use_pos", "num_heads", "ltnt_gate", "attention_dropout"]: cset([cG.mapping_kwargs, cG.synthesis_kwargs], arg, args[arg]) # Attention types and layers for arg in ["start_res", "end_res" ]: # , "local_attention" , "ltnt2ltnt", "img2img", "img2ltnt" cset(cG.synthesis_kwargs, arg, args[f"g_{arg}"]) # Mixing and dropout for arg in ["style_mixing", "component_mixing"]: cset(loss_args, arg, args[arg]) cset(cG, "component_dropout", args["component_dropout"]) # Extra transformer options args.norm = args.normalize for arg in [ "norm", "integration", "img_gate", "iterative", "kmeans", "kmeans_iters" ]: cset(cG.synthesis_kwargs, arg, args[arg]) # Positional encoding # args.pos_dim = args.pos_dim or args.latent_size for arg in ["dim", "type", "init", "directions_num"]: field = f"pos_{arg}" cset(cG.synthesis_kwargs, field, args[field]) # k-GAN # for arg in ["layer", "type", "same"]: # field = "merge_{}".format(arg) # cset(cG.args, field, args[field]) # cset(cG.synthesis_kwargs, "merge", args.kgan) # if args.kgan and args.transformer: # misc.error("Either have --transformer for GANformer or --kgan for k-GAN, not both") config = EasyDict(train) config.update(cG=cG, cD=cD, loss_args=loss_args, dataset_args=dataset_args, vis_args=vis) # Save config file with open(os.path.join(run_dir, "training_options.json"), "wt") as f: json.dump(config, f, indent=2) return config