def run(args): # Set default arguments & check for incompatible options args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen args.g_iters = args.iters if args.g_iters is None else args.g_iters args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni # -if [log_per_task], reset all logs if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters args.sample_log = args.iters # -if [iCaRL] is selected, select all accompanying options if hasattr(args, "icarl") and args.icarl: args.use_exemplars = True args.add_exemplars = True args.bce = True args.bce_distill = True # -if XdG is selected but not the Task-IL scenario, give error if (not args.scenario == "task") and args.xdg: raise ValueError("'XdG' is only compatible with the Task-IL scenario.") # -if EWC, SI or XdG is selected together with 'feedback', give error if args.feedback and (args.ewc or args.si or args.xdg or args.icarl): raise NotImplementedError( "EWC, SI, XdG and iCaRL are not supported with feedback connections." ) # -if binary classification loss is selected together with 'feedback', give error if args.feedback and args.bce: raise NotImplementedError( "Binary classification loss not supported with feedback connections." ) # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine) if args.xdg and (not args.replay == "none") and (args.ewc or args.si): raise NotImplementedError( "XdG is not supported with both '{}' replay and EWC / SI.".format( args.replay)) #--> problem is that applying different task-masks interferes with gradient calculation # (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately) # -if 'BCEdistill' is selected for other than scenario=="class", give error if args.bce_distill and not args.scenario == "class": raise ValueError( "BCE-distill can only be used for class-incremental learning.") # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) scenario = args.scenario # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain" # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL) if args.singlehead and args.scenario == "task": scenario = "domain" # If only want param-stamp, get it printed to screen and exit if hasattr(args, "get_stamp") and args.get_stamp: _ = get_param_stamp_from_args(args=args) exit() # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) #-------------------------------------------------------------------------------------------------# #----------------# #----- DATA -----# #----------------# # Prepare data for chosen experiment (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=True, exception=True if args.seed == 0 else False, ) #print(train_datasets, test_datasets) #a = input() #-------------------------------------------------------------------------------------------------# #------------------------------# #----- MODEL (CLASSIFIER) -----# #------------------------------# # Define main model (i.e., classifier, if requested with feedback connections) if args.feedback: model = AutoEncoder( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, z_dim=args.z_dim, fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) model.lamda_pl = 1. #--> to make that this VAE is also trained to classify else: model = Classifier( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, fc_drop=args.fc_drop, fc_nl=args.fc_nl, fc_bn=True if args.fc_bn == "yes" else False, excit_buffer=True if args.xdg and args.gating_prop > 0 else False, binaryCE=args.bce, binaryCE_distill=args.bce_distill, ).to(device) # Define optimizer (only include parameters that "requires_grad") model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) #-------------------------------------------------------------------------------------------------# #----------------------------------# #----- CL-STRATEGY: EXEMPLARS -----# #----------------------------------# # Store in model whether, how many and in what way to store exemplars if isinstance(model, ExemplarHandler) and (args.use_exemplars or args.add_exemplars or args.replay == "exemplars"): model.memory_budget = args.budget model.norm_exemplars = args.norm_exemplars model.herding = args.herding #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- CL-STRATEGY: ALLOCATION -----# #-----------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 if args.ewc: model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 if args.si: model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and (args.xdg and args.gating_prop > 0): mask_dict = {} excit_buffer_list = [] for task_id in range(args.tasks): mask_dict[task_id + 1] = {} for i in range(model.fcE.layers): layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear if task_id == 0: excit_buffer_list.append(layer.excit_buffer) n_units = len(layer.excit_buffer) gated_units = np.random.choice(n_units, size=int(args.gating_prop * n_units), replace=False) mask_dict[task_id + 1][i] = gated_units model.mask_dict = mask_dict model.excit_buffer_list = excit_buffer_list #-------------------------------------------------------------------------------------------------# #-------------------------------# #----- CL-STRATEGY: REPLAY -----# #-------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) if isinstance(model, Replayer): model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = True if (args.replay == "generative" and not args.feedback) else False if train_gen: # -specify architecture generator = AutoEncoder( image_size=config['size'], image_channels=config['channels'], fc_layers=args.g_fc_lay, fc_units=args.g_fc_uni, z_dim=args.g_z_dim, classes=config['classes'], fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) # -set optimizer(s) generator.optim_list = [{ 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr_gen }] generator.optim_type = args.optimizer if generator.optim_type in ("adam", "adam_reset"): generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) elif generator.optim_type == "sgd": generator.optimizer = optim.SGD(generator.optim_list) else: generator = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- REPORTING -----# #---------------------# # Get parameter-stamp (and print on screen) param_stamp = get_param_stamp( args, model.name, verbose=True, replay=True if (not args.replay == "none") else False, replay_model_name=generator.name if (args.replay == "generative" and not args.feedback) else None, ) # Print some model-characteristics on the screen # -main model print("\n") utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Prepare for plotting in visdom # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf precision_dict = evaluate.initiate_precision_dict(args.tasks) precision_dict_exemplars = evaluate.initiate_precision_dict( args.tasks) if args.use_exemplars else None # -visdom-settings if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format( fb="1M-" if args.feedback else "", replay="{}{}".format(args.replay, "D" if args.distill else ""), syn="-si{}".format(args.si_c) if args.si else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if args.online else "") if args.ewc else "", xdg="" if (not args.xdg) or args.gating_prop == 0 else "-XdG{}".format(args.gating_prop), icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars and args.bce and args.bce_distill) else "", bud="-bud{}".format(args.budget) if (args.use_exemplars or args.add_exemplars or args.replay == "exemplars") else "", ) visdom = {'env': env_name, 'graph': graph_name} if args.use_exemplars: visdom_exemplars = { 'env': env_name, 'graph': "{}-EX".format(graph_name) } else: visdom = visdom_exemplars = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb( log=args.loss_log, visdom=visdom, model=model if args.feedback else generator, tasks=args.tasks, iters_per_task=args.iters if args.feedback else args.g_iters, replay=False if args.replay == "none" else True) ] if (train_gen or args.feedback) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, tasks=args.tasks, iters_per_task=args.iters, replay=False if args.replay == "none" else True) ] if (not args.feedback) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples sample_cbs = [ cb._sample_cb( log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=args.iters if args.feedback else args.g_iters) ] if (train_gen or args.feedback) else [None] # Callbacks for reporting and visualizing accuracy # -visdom (i.e., after each [prec_log] eval_cb = cb._eval_cb( log=args.prec_log, test_datasets=test_datasets, visdom=visdom, precision_dict=None, iters_per_task=args.iters, test_size=args.prec_n, classes_per_task=classes_per_task, scenario=scenario, ) # -pdf / reporting: summary plots (i.e, only after each task) eval_cb_full = cb._eval_cb( log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, iters_per_task=args.iters, classes_per_task=classes_per_task, scenario=scenario, ) # -with exemplars (both for visdom & reporting / pdf) eval_cb_exemplars = cb._eval_cb( log=args.iters, test_datasets=test_datasets, visdom=visdom_exemplars, classes_per_task=classes_per_task, precision_dict=precision_dict_exemplars, scenario=scenario, iters_per_task=args.iters, with_exemplars=True, ) if args.use_exemplars else None # -collect them in <lists> eval_cbs = [eval_cb, eval_cb_full] eval_cbs_exemplars = [eval_cb_exemplars] #-------------------------------------------------------------------------------------------------# #--------------------# #----- TRAINING -----# #--------------------# print("--> Training:" + args.name) print("Total tasks:" + str(args.tasks_to_complete)) # Keep track of training-time start = time.time() # Train model train_cl( args.tasks_to_complete, args.name, model, train_datasets, test_datasets, replay_mode=args.replay, scenario=scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, generator=generator, gen_iters=args.g_iters, gen_loss_cbs=generator_loss_cbs, sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs, eval_cbs_exemplars=eval_cbs_exemplars, use_exemplars=args.use_exemplars, add_exemplars=args.add_exemplars, ) # Get total training-time in seconds, and write to file training_time = time.time() - start time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w') time_file.write('{}\n'.format(training_time)) time_file.close() #-------------------------------------------------------------------------------------------------# #----------------------# #----- EVALUATION -----# #----------------------# print("\n\n--> Evaluation ({}-incremental learning scenario):".format( args.scenario)) # Evaluate precision of final model on full test-set precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] print("\n Precision on test-set (softmax classification):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}'.format( args.tasks, average_precs)) # -with exemplars if args.use_exemplars: precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=True, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] print("\n Precision on test-set (classification using exemplars):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs_ex = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}'.format( args.tasks, average_precs_ex)) print("\n") #-------------------------------------------------------------------------------------------------# #------------------# #----- OUTPUT -----# #------------------# # Average precision on full test set output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format( average_precs_ex if args.use_exemplars else average_precs)) output_file.close() # -precision-dict file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object( precision_dict_exemplars if args.use_exemplars else precision_dict, file_name) # Average precision on full test set not evaluated using exemplars (i.e., using softmax on final layer) if args.use_exemplars: output_file = open( "{}/prec_noex-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(average_precs)) output_file.close() # -precision-dict: file_name = "{}/dict_noex-{}".format(args.r_dir, param_stamp) utils.save_object(precision_dict, file_name) #-------------------------------------------------------------------------------------------------# #--------------------# #----- PLOTTING -----# #--------------------# # If requested, generate pdf if args.pdf: # -open pdf pp = visual_plt.open_pdf("{}/{}.pdf".format(args.p_dir, param_stamp)) # -show samples and reconstructions (either from main model or from separate generator) if args.feedback or args.replay == "generative": evaluate.show_samples(model if args.feedback else generator, config, size=args.sample_n, pdf=pp) for i in range(args.tasks): evaluate.show_reconstruction( model if args.feedback else generator, test_datasets[i], config, pdf=pp, task=i + 1) # -show metrics reflecting progression during training figure_list = [] #-> create list to store all figures to be plotted # -generate all figures (and store them in [figure_list]) figure = visual_plt.plot_lines( precision_dict["all_tasks"], x_axes=precision_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([precision_dict["average"]], x_axes=precision_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) if args.use_exemplars: figure = visual_plt.plot_lines( precision_dict_exemplars["all_tasks"], x_axes=precision_dict_exemplars["x_task"], line_names=[ 'task {}'.format(i + 1) for i in range(args.tasks) ]) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) # -close pdf pp.close()
def run(args): # Set default arguments args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni args.g_iters = args.iters if args.g_iters is None else args.g_iters # -if [log_per_task], reset all logs if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters args.sample_log = args.iters # -if XdG is selected but not the incremental task learning scenario, give error if (not args.scenario == "task") and args.gating_prop > 0: raise ValueError( "'XdG' only works for the incremental task learning scenario.") # -if EWC, SI or XdG is selected together with 'feedback', give error if args.feedback and (args.ewc or args.si or args.gating_prop > 0): raise NotImplementedError( "EWC, SI and XdG are not supported with feedback connections.") # -if XdG is selected together with replay of any kind, give error if args.gating_prop > 0 and (not args.replay == "none"): raise NotImplementedError( "XdG is not supported with '{}' replay.".format(args.replay)) # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) #-------------------------------------------------------------------------------------------------# #----------------# #----- DATA -----# #----------------# # Prepare data for chosen experiment (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=args.scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=True, exception=True if args.seed == 0 else False, ) #-------------------------------------------------------------------------------------------------# #------------------------------# #----- MODEL (CLASSIFIER) -----# #------------------------------# # Define main model (i.e., classifier, if requested with feedback connections) if args.feedback: model = AutoEncoder( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, z_dim=args.z_dim, fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) model.lamda_pl = 1. #--> to make that this VAE is also trained to classify else: model = Classifier( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, fc_drop=args.fc_drop, fc_nl=args.fc_nl, fc_bn=True if args.fc_bn == "yes" else False, excit_buffer=True if args.gating_prop > 0 else False, ).to(device) # Define optimizer (only include parameters that "requires_grad") model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) # Set loss-function for reconstruction if args.feedback: model.recon_criterion = nn.BCELoss(size_average=True) #-------------------------------------------------------------------------------------------------# #-----------------------------------# #----- CL-STRATEGY: ALLOCATION -----# #-----------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and args.gating_prop > 0: mask_dict = {} excit_buffer_list = [] for task_id in range(args.tasks): mask_dict[task_id + 1] = {} for i in range(model.fcE.layers): layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear if task_id == 0: excit_buffer_list.append(layer.excit_buffer) n_units = len(layer.excit_buffer) gated_units = np.random.choice(n_units, size=int(args.gating_prop * n_units), replace=False) mask_dict[task_id + 1][i] = gated_units model.mask_dict = mask_dict model.excit_buffer_list = excit_buffer_list #-------------------------------------------------------------------------------------------------# #-------------------------------# #----- CL-STRATEGY: REPLAY -----# #-------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = True if (args.replay == "generative" and not args.feedback) else False if train_gen: # -specify architecture generator = AutoEncoder( image_size=config['size'], image_channels=config['channels'], fc_layers=args.g_fc_lay, fc_units=args.g_fc_uni, z_dim=args.z_dim, classes=config['classes'], fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) # -set optimizer(s) generator.optim_list = [{ 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr }] generator.optim_type = args.optimizer if generator.optim_type in ("adam", "adam_reset"): generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) elif generator.optim_type == "sgd": generator.optimizer = optim.SGD(generator.optim_list) # -set reconstruction criterion generator.recon_criterion = nn.BCELoss(size_average=True) else: generator = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- REPORTING -----# #---------------------# # Get parameter-stamp (and print on screen) param_stamp = utils.get_param_stamp( args, model.name, verbose=True, replay=True if (not args.replay == "none") else False, replay_model_name=generator.name if (args.replay == "generative" and not args.feedback) else None, ) # Print some model-characteristics on the screen # -main model print("\n") utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Prepare for plotting # -open pdf pp = visual_plt.open_pdf("{}/{}.pdf".format( args.p_dir, param_stamp)) if args.pdf else None # -define [precision_dict] to keep track of performance during training for later plotting precision_dict = evaluate.initiate_precision_dict(args.tasks) # -visdom-settings if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) graph_name = "{fb}{mode}{syn}{ewc}{XdG}".format( fb="1M-" if args.feedback else "", mode=args.replay, syn="-si{}".format(args.si_c) if args.si else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if args.online else "") if args.ewc else "", XdG="" if args.gating_prop == 0 else "-XdG{}".format(args.gating_prop)) visdom = {'env': env_name, 'graph': graph_name} else: visdom = None #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb(log=args.loss_log, visdom=visdom, model=model if args.feedback else generator, tasks=args.tasks, iters_per_task=args.g_iters, replay=False if args.replay == "none" else True) ] if (train_gen or args.feedback) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, tasks=args.tasks, iters_per_task=args.iters, replay=False if args.replay == "none" else True) ] if (not args.feedback) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples sample_cbs = [ cb._sample_cb(log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=args.g_iters) ] if (train_gen or args.feedback) else [None] # Callbacks for reporting and visualizing accuracy # -visdom (i.e., after each [prec_log]) eval_cb = cb._eval_cb( log=args.prec_log, test_datasets=test_datasets, visdom=visdom, iters_per_task=args.iters, scenario=args.scenario, collate_fn=utils.label_squeezing_collate_fn, test_size=args.prec_n, classes_per_task=classes_per_task, task_mask=True if isinstance(model, ContinualLearner) and (args.gating_prop > 0) else False) # -pdf: for summary plots (i.e, only after each task) eval_cb_full = cb._eval_cb( log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, scenario=args.scenario, collate_fn=utils.label_squeezing_collate_fn, iters_per_task=args.iters, classes_per_task=classes_per_task, task_mask=True if isinstance(model, ContinualLearner) and (args.gating_prop > 0) else False) # -collect them in <lists> eval_cbs = [eval_cb, eval_cb_full] #-------------------------------------------------------------------------------------------------# #--------------------# #----- TRAINING -----# #--------------------# print("--> Training:") # Keep track of training-time start = time.time() # Train model train_cl( model, train_datasets, replay_mode=args.replay, scenario=args.scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, collate_fn=utils.label_squeezing_collate_fn, visualize=True if args.visdom else False, generator=generator, gen_iters=args.g_iters, gen_loss_cbs=generator_loss_cbs, sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs, ) # Get total training-time in seconds, and write to file training_time = time.time() - start time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w') time_file.write('{}\n'.format(training_time)) time_file.close() #-------------------------------------------------------------------------------------------------# #----------------------# #----- EVALUATION -----# #----------------------# print('\n\n--> Evaluation ("incremental {} learning scenario"):'.format( args.scenario)) # Generation (plot in pdf) if (pp is not None) and train_gen: evaluate.show_samples(generator, config, size=args.sample_n, pdf=pp) if (pp is not None) and args.feedback: evaluate.show_samples(model, config, size=args.sample_n, pdf=pp) # Reconstruction (plot in pdf) if (pp is not None) and (train_gen or args.feedback): for i in range(args.tasks): if args.feedback: evaluate.show_reconstruction(model, test_datasets[i], config, pdf=pp, task=i + 1) else: evaluate.show_reconstruction(generator, test_datasets[i], config, pdf=pp, task=i + 1) # Classifier (print on screen & write to file) if args.scenario == "task": precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task_mask=True if isinstance(model, ContinualLearner) and args.gating_prop > 0 else False, task=i + 1, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1)))) for i in range(args.tasks) ] else: precs = [ evaluate.validate(model, test_datasets[i], verbose=False, test_size=None, task=i + 1) for i in range(args.tasks) ] print("\n Precision on test-set:") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / args.tasks print('=> average precision over all {} tasks: {:.4f}\n'.format( args.tasks, average_precs)) #-------------------------------------------------------------------------------------------------# #------------------# #----- OUTPUT -----# #------------------# # Average precision on full test set (no restrictions on which nodes can be predicted: "incremental" / "singlehead") output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format(average_precs)) output_file.close() # Precision-dictionary file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object(precision_dict, file_name) #-------------------------------------------------------------------------------------------------# #--------------------# #----- PLOTTING -----# #--------------------# # If requested, generate pdf if pp is not None: # -create list to store all figures to be plotted. figure_list = [] # -generate all figures (and store them in [figure_list]) figure = visual_plt.plot_lines( precision_dict["all_tasks"], x_axes=precision_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([precision_dict["average"]], x_axes=precision_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) # Close pdf if pp is not None: pp.close()
def run(args, verbose=False): # Set default arguments & check for incompatible options args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen args.g_iters = args.iters if args.g_iters is None else args.g_iters args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni # -if [log_per_task], reset all logs if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters args.sample_log = args.iters # -if [iCaRL] is selected, select all accompanying options if hasattr(args, "icarl") and args.icarl: args.use_exemplars = True args.add_exemplars = True args.bce = True args.bce_distill = True # -if XdG is selected but not the Task-IL scenario, give error if (not args.scenario == "task") and args.xdg: raise ValueError("'XdG' is only compatible with the Task-IL scenario.") # -if EWC, SI, XdG, A-GEM or iCaRL is selected together with 'feedback', give error if args.feedback and (args.ewc or args.si or args.xdg or args.icarl or args.agem): raise NotImplementedError( "EWC, SI, XdG, A-GEM and iCaRL are not supported with feedback connections." ) # -if A-GEM is selected without any replay, give warning if args.agem and args.replay == "none": raise Warning( "The '--agem' flag is selected, but without any type of replay. " "For the original A-GEM method, also select --replay='exemplars'.") # -if EWC, SI, XdG, A-GEM or iCaRL is selected together with offline-replay, give error if args.replay == "offline" and (args.ewc or args.si or args.xdg or args.icarl or args.agem): raise NotImplementedError( "Offline replay cannot be combined with EWC, SI, XdG, A-GEM or iCaRL." ) # -if binary classification loss is selected together with 'feedback', give error if args.feedback and args.bce: raise NotImplementedError( "Binary classification loss not supported with feedback connections." ) # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine) if (args.xdg and args.gating_prop > 0) and ( not args.replay == "none") and (args.ewc or args.si): raise NotImplementedError( "XdG is not supported with both '{}' replay and EWC / SI.".format( args.replay)) # --> problem is that applying different task-masks interferes with gradient calculation # (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately) # -if 'BCEdistill' is selected for other than scenario=="class", give error if args.bce_distill and not args.scenario == "class": raise ValueError( "BCE-distill can only be used for class-incremental learning.") # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) scenario = args.scenario # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain" # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL) if args.singlehead and args.scenario == "task": scenario = "domain" # If only want param-stamp, get it printed to screen and exit if hasattr(args, "get_stamp") and args.get_stamp: print(get_param_stamp_from_args(args=args)) exit() # Use cuda? cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") if verbose: print("CUDA is {}used".format("" if cuda else "NOT(!!) ")) # Set random seeds np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) # -------------------------------------------------------------------------------------------------# # ----------------# # ----- DATA -----# # ----------------# # Prepare data for chosen experiment if verbose: print("\nPreparing the data...") (train_datasets, test_datasets), config, classes_per_task = get_multitask_experiment( name=args.experiment, scenario=scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=verbose, exception=True if args.seed == 0 else False, ) # -------------------------------------------------------------------------------------------------# # ------------------------------# # ----- MODEL (CLASSIFIER) -----# # ------------------------------# # Define main model (i.e., classifier, if requested with feedback connections) if args.feedback: model = AutoEncoder( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, z_dim=args.z_dim, fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) model.lamda_pl = 1. # --> to make that this VAE is also trained to classify else: model = Classifier( image_size=config['size'], image_channels=config['channels'], classes=config['classes'], fc_layers=args.fc_lay, fc_units=args.fc_units, fc_drop=args.fc_drop, fc_nl=args.fc_nl, fc_bn=True if args.fc_bn == "yes" else False, excit_buffer=True if args.xdg and args.gating_prop > 0 else False, binaryCE=args.bce, binaryCE_distill=args.bce_distill, AGEM=args.agem, ).to(device) # Define optimizer (only include parameters that "requires_grad") model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) # -------------------------------------------------------------------------------------------------# # ----------------------------------# # ----- CL-STRATEGY: EXEMPLARS -----# # ----------------------------------# # Store in model whether, how many and in what way to store exemplars if isinstance(model, ExemplarHandler) and (args.use_exemplars or args.add_exemplars or args.replay == "exemplars"): model.memory_budget = args.budget model.norm_exemplars = args.norm_exemplars model.herding = args.herding # -------------------------------------------------------------------------------------------------# # -----------------------------------# # ----- CL-STRATEGY: ALLOCATION -----# # -----------------------------------# # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 if args.ewc: model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 if args.si: model.epsilon = args.epsilon # XdG: create for every task a "mask" for each hidden fully connected layer if isinstance(model, ContinualLearner) and (args.xdg and args.gating_prop > 0): mask_dict = {} excit_buffer_list = [] for task_id in range(args.tasks): mask_dict[task_id + 1] = {} for i in range(model.fcE.layers): layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear if task_id == 0: excit_buffer_list.append(layer.excit_buffer) n_units = len(layer.excit_buffer) gated_units = np.random.choice(n_units, size=int(args.gating_prop * n_units), replace=False) mask_dict[task_id + 1][i] = gated_units model.mask_dict = mask_dict model.excit_buffer_list = excit_buffer_list # -------------------------------------------------------------------------------------------------# # -------------------------------# # ----- CL-STRATEGY: REPLAY -----# # -------------------------------# # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature) if isinstance(model, Replayer): model.replay_targets = "soft" if args.distill else "hard" model.KD_temp = args.temp # If needed, specify separate model for the generator train_gen = True if (args.replay == "generative" and not args.feedback) else False if train_gen: # -specify architecture generator = AutoEncoder( image_size=config['size'], image_channels=config['channels'], fc_layers=args.g_fc_lay, fc_units=args.g_fc_uni, z_dim=args.g_z_dim, classes=config['classes'], fc_drop=args.fc_drop, fc_bn=True if args.fc_bn == "yes" else False, fc_nl=args.fc_nl, ).to(device) # -set optimizer(s) generator.optim_list = [{ 'params': filter(lambda p: p.requires_grad, generator.parameters()), 'lr': args.lr_gen }] generator.optim_type = args.optimizer if generator.optim_type in ("adam", "adam_reset"): generator.optimizer = optim.Adam(generator.optim_list, betas=(0.9, 0.999)) elif generator.optim_type == "sgd": generator.optimizer = optim.SGD(generator.optim_list) else: generator = None # -------------------------------------------------------------------------------------------------# # ---------------------# # ----- REPORTING -----# # ---------------------# # Get parameter-stamp (and print on screen) if verbose: print("\nParameter-stamp...") param_stamp = get_param_stamp( args, model.name, verbose=verbose, replay=True if (not args.replay == "none") else False, replay_model_name=generator.name if (args.replay == "generative" and not args.feedback) else None, ) # Print some model-characteristics on the screen if verbose: # -main model utils.print_model_info(model, title="MAIN MODEL") # -generator if generator is not None: utils.print_model_info(generator, title="GENERATOR") # Prepare for keeping track of statistics required for metrics (also used for plotting in pdf) if args.pdf or args.metrics: # -define [metrics_dict] to keep track of performance during training for storing & for later plotting in pdf metrics_dict = evaluate.initiate_metrics_dict(n_tasks=args.tasks, scenario=args.scenario) # -evaluate randomly initiated model on all tasks & store accuracies in [metrics_dict] (for calculating metrics) if not args.use_exemplars: metrics_dict = evaluate.intial_accuracy( model, test_datasets, metrics_dict, classes_per_task=classes_per_task, scenario=scenario, test_size=None, no_task_mask=False) else: metrics_dict = None # Prepare for plotting in visdom # -visdom-settings if args.visdom: env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment, tasks=args.tasks, scenario=args.scenario) graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format( fb="1M-" if args.feedback else "", replay="{}{}{}".format(args.replay, "D" if args.distill else "", "-aGEM" if args.agem else ""), syn="-si{}".format(args.si_c) if args.si else "", ewc="-ewc{}{}".format( args.ewc_lambda, "-O{}".format(args.gamma) if args.online else "") if args.ewc else "", xdg="" if (not args.xdg) or args.gating_prop == 0 else "-XdG{}".format(args.gating_prop), icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars and args.bce and args.bce_distill) else "", bud="-bud{}".format(args.budget) if (args.use_exemplars or args.add_exemplars or args.replay == "exemplars") else "", ) visdom = {'env': env_name, 'graph': graph_name} else: visdom = None # -------------------------------------------------------------------------------------------------# # ---------------------# # ----- CALLBACKS -----# # ---------------------# # Callbacks for reporting on and visualizing loss generator_loss_cbs = [ cb._VAE_loss_cb( log=args.loss_log, visdom=visdom, model=model if args.feedback else generator, tasks=args.tasks, iters_per_task=args.iters if args.feedback else args.g_iters, replay=False if args.replay == "none" else True) ] if (train_gen or args.feedback) else [None] solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, visdom=visdom, model=model, tasks=args.tasks, iters_per_task=args.iters, replay=False if args.replay == "none" else True) ] if (not args.feedback) else [None] # Callbacks for evaluating and plotting generated / reconstructed samples sample_cbs = [ cb._sample_cb( log=args.sample_log, visdom=visdom, config=config, test_datasets=test_datasets, sample_size=args.sample_n, iters_per_task=args.iters if args.feedback else args.g_iters) ] if (train_gen or args.feedback) else [None] # Callbacks for reporting and visualizing accuracy # -visdom (i.e., after each [prec_log] eval_cbs = [ cb._eval_cb(log=args.prec_log, test_datasets=test_datasets, visdom=visdom, iters_per_task=args.iters, test_size=args.prec_n, classes_per_task=classes_per_task, scenario=scenario, with_exemplars=False) ] if (not args.use_exemplars) else [None] # --> during training on a task, evaluation cannot be with exemplars as those are only selected after training # (instead, evaluation for visdom is only done after each task, by including callback-function into [metric_cbs]) # Callbacks for calculating statists required for metrics # -pdf / reporting: summary plots (i.e, only after each task) (when using exemplars, also for visdom) metric_cbs = [ cb._metric_cb(log=args.iters, test_datasets=test_datasets, classes_per_task=classes_per_task, metrics_dict=metrics_dict, scenario=scenario, iters_per_task=args.iters, with_exemplars=args.use_exemplars), cb._eval_cb(log=args.iters, test_datasets=test_datasets, visdom=visdom, iters_per_task=args.iters, test_size=args.prec_n, classes_per_task=classes_per_task, scenario=scenario, with_exemplars=True) if args.use_exemplars else None ] # -------------------------------------------------------------------------------------------------# # --------------------# # ----- TRAINING -----# # --------------------# if verbose: print("\nTraining...") # Keep track of training-time start = time.time() # Train model train_cl( model, train_datasets, replay_mode=args.replay, scenario=scenario, classes_per_task=classes_per_task, iters=args.iters, batch_size=args.batch, generator=generator, gen_iters=args.g_iters, gen_loss_cbs=generator_loss_cbs, sample_cbs=sample_cbs, eval_cbs=eval_cbs, loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs, metric_cbs=metric_cbs, use_exemplars=args.use_exemplars, add_exemplars=args.add_exemplars, param_stamp=param_stamp, ) # Get total training-time in seconds, and write to file if args.time: training_time = time.time() - start time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w') time_file.write('{}\n'.format(training_time)) time_file.close() # -------------------------------------------------------------------------------------------------# # ----------------------# # ----- EVALUATION -----# # ----------------------# if verbose: print("\n\nEVALUATION RESULTS:") # Evaluate precision of final model on full test-set precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] average_precs = sum(precs) / args.tasks # -print on screen if verbose: print("\n Precision on test-set{}:".format( " (softmax classification)" if args.use_exemplars else "")) for i in range(args.tasks): print(" - Task {} [{}-{}]: {:.4f}".format( i + 1, classes_per_task * i, classes_per_task * (i + 1) - 1, precs[i])) print('=> Average precision over all {} tasks: {:.4f}\n'.format( args.tasks, average_precs)) # -with exemplars if args.use_exemplars: precs = [ evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=True, allowed_classes=list( range(classes_per_task * i, classes_per_task * (i + 1))) if scenario == "task" else None) for i in range(args.tasks) ] average_precs_ex = sum(precs) / args.tasks # -print on screen if verbose: print(" Precision on test-set (classification using exemplars):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) print('=> Average precision over all {} tasks: {:.4f}\n'.format( args.tasks, average_precs_ex)) if args.metrics: # Accuracy matrix if args.scenario in ('task', 'domain'): R = pd.DataFrame(data=metrics_dict['acc per task'], index=[ 'after task {}'.format(i + 1) for i in range(args.tasks) ]) R.loc['at start'] = metrics_dict['initial acc per task'] if ( not args.use_exemplars) else ['NA' for _ in range(args.tasks)] R = R.reindex( ['at start'] + ['after task {}'.format(i + 1) for i in range(args.tasks)]) BWTs = [(R.loc['after task {}'.format(args.tasks), 'task {}'.format(i + 1)] - \ R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 1)]) for i in range(args.tasks - 1)] FWTs = [ 0. if args.use_exemplars else (R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 2)] - R.loc['at start', 'task {}'.format(i + 2)]) for i in range(args.tasks - 1) ] forgetting = [] for i in range(args.tasks - 1): forgetting.append( max(R.iloc[1:args.tasks, i]) - R.iloc[args.tasks, i]) R.loc['FWT (per task)'] = ['NA'] + FWTs R.loc['BWT (per task)'] = BWTs + ['NA'] R.loc['F (per task)'] = forgetting + ['NA'] BWT = sum(BWTs) / (args.tasks - 1) F = sum(forgetting) / (args.tasks - 1) FWT = sum(FWTs) / (args.tasks - 1) metrics_dict['BWT'] = BWT metrics_dict['F'] = F metrics_dict['FWT'] = FWT # -print on screen if verbose: print("Accuracy matrix") print(R) print("\nFWT = {:.4f}".format(FWT)) print("BWT = {:.4f}".format(BWT)) print(" F = {:.4f}\n\n".format(F)) else: if verbose: # Accuracy matrix based only on classes in that task (i.e., evaluation as if Task-IL scenario) R = pd.DataFrame( data=metrics_dict['acc per task (only classes in task)'], index=[ 'after task {}'.format(i + 1) for i in range(args.tasks) ]) R.loc['at start'] = metrics_dict[ 'initial acc per task (only classes in task)'] if not args.use_exemplars else [ 'NA' for _ in range(args.tasks) ] R = R.reindex( ['at start'] + ['after task {}'.format(i + 1) for i in range(args.tasks)]) print( "Accuracy matrix, based on only classes in that task ('as if Task-IL scenario')" ) print(R) # Accuracy matrix, always based on all classes R = pd.DataFrame( data=metrics_dict['acc per task (all classes)'], index=[ 'after task {}'.format(i + 1) for i in range(args.tasks) ]) R.loc['at start'] = metrics_dict[ 'initial acc per task (only classes in task)'] if not args.use_exemplars else [ 'NA' for _ in range(args.tasks) ] R = R.reindex( ['at start'] + ['after task {}'.format(i + 1) for i in range(args.tasks)]) print("\nAccuracy matrix, always based on all classes") print(R) # Accuracy matrix, based on all classes thus far R = pd.DataFrame(data=metrics_dict[ 'acc per task (all classes up to trained task)'], index=[ 'after task {}'.format(i + 1) for i in range(args.tasks) ]) print( "\nAccuracy matrix, based on all classes up to the trained task" ) print(R) # Accuracy matrix, based on all classes up to the task being evaluated # (this is the accuracy-matrix used for calculating the metrics in the Class-IL scenario) R = pd.DataFrame(data=metrics_dict[ 'acc per task (all classes up to evaluated task)'], index=[ 'after task {}'.format(i + 1) for i in range(args.tasks) ]) R.loc['at start'] = metrics_dict[ 'initial acc per task (only classes in task)'] if not args.use_exemplars else [ 'NA' for _ in range(args.tasks) ] R = R.reindex( ['at start'] + ['after task {}'.format(i + 1) for i in range(args.tasks)]) BWTs = [(R.loc['after task {}'.format(args.tasks), 'task {}'.format(i + 1)] - \ R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 1)]) for i in range(args.tasks - 1)] FWTs = [ 0. if args.use_exemplars else (R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 2)] - R.loc['at start', 'task {}'.format(i + 2)]) for i in range(args.tasks - 1) ] forgetting = [] for i in range(args.tasks - 1): forgetting.append( max(R.iloc[1:args.tasks, i]) - R.iloc[args.tasks, i]) R.loc['FWT (per task)'] = ['NA'] + FWTs R.loc['BWT (per task)'] = BWTs + ['NA'] R.loc['F (per task)'] = forgetting + ['NA'] BWT = sum(BWTs) / (args.tasks - 1) F = sum(forgetting) / (args.tasks - 1) FWT = sum(FWTs) / (args.tasks - 1) metrics_dict['BWT'] = BWT metrics_dict['F'] = F metrics_dict['FWT'] = FWT # -print on screen if verbose: print( "\nAccuracy matrix, based on all classes up to the evaluated task" ) print(R) print("\n=> FWT = {:.4f}".format(FWT)) print("=> BWT = {:.4f}".format(BWT)) print("=> F = {:.4f}\n".format(F)) if verbose and args.time: print( "=> Total training time = {:.1f} seconds\n".format(training_time)) # -------------------------------------------------------------------------------------------------# # ------------------# # ----- OUTPUT -----# # ------------------# # Average precision on full test set output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w') output_file.write('{}\n'.format( average_precs_ex if args.use_exemplars else average_precs)) output_file.close() # -metrics-dict if args.metrics: file_name = "{}/dict-{}".format(args.r_dir, param_stamp) utils.save_object(metrics_dict, file_name) # -------------------------------------------------------------------------------------------------# # --------------------# # ----- PLOTTING -----# # --------------------# # If requested, generate pdf if args.pdf: # -open pdf plot_name = "{}/{}.pdf".format(args.p_dir, param_stamp) pp = visual_plt.open_pdf(plot_name) # -show samples and reconstructions (either from main model or from separate generator) if args.feedback or args.replay == "generative": evaluate.show_samples(model if args.feedback else generator, config, size=args.sample_n, pdf=pp) for i in range(args.tasks): evaluate.show_reconstruction( model if args.feedback else generator, test_datasets[i], config, pdf=pp, task=i + 1) # -show metrics reflecting progression during training figure_list = [] # -> create list to store all figures to be plotted # -generate all figures (and store them in [figure_list]) key = "acc per task ({} task)".format( "all classes up to trained" if scenario == 'class' else "only classes in") plot_list = [] for i in range(args.tasks): plot_list.append(metrics_dict[key]["task {}".format(i + 1)]) figure = visual_plt.plot_lines( plot_list, x_axes=metrics_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([metrics_dict["average"]], x_axes=metrics_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) # -close pdf pp.close() # -print name of generated plot on screen if verbose: print("\nGenerated plot: {}\n".format(plot_name))
def run(args): if not args.single_test: import pidfile resfile = pidfile.exclusive_dirfn( os.path.join(args.r_dir, args.save_dir)) if args.log_per_task: args.prec_log = args.iters args.loss_log = args.iters # -create plots- and results-directories if needed if not os.path.isdir(args.r_dir): os.mkdir(args.r_dir) if args.pdf and not os.path.isdir(args.p_dir): os.mkdir(args.p_dir) # set cuda cuda = torch.cuda.is_available() and args.cuda device = torch.device("cuda" if cuda else "cpu") # set random seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) scenario = args.scenario #------------------------------------------------------------------------------------------------- # DATA #------------------------------------------------------------------------------------------------- (train_datasets, test_datasets), config = get_multitask_experiment( args, name=args.experiment, scenario=scenario, tasks=args.tasks, data_dir=args.d_dir, verbose=True, exception=True if args.seed == 0 else False, ) args.tasks = len(config['labels_per_task']) args.labels_per_task = config['labels_per_task'] if not args.task_boundary: args.iterations_per_virtual_epc = config['iterations_per_virtual_epc'] args.task_dict = config['task_dict'] #------------------------------------------------------------------------------------------------- # MODEL #------------------------------------------------------------------------------------------------- if args.ebm: model = EBM(args, image_size=config['size'], image_channels=config['channels'], classes=config['num_classes'], fc_units=args.fc_units).to(device) else: model = Classifier(args, image_size=config['size'], image_channels=config['channels'], classes=config['num_classes'], fc_units=args.fc_units).to(device) if args.experiment == 'cifar100': model = utils.init_params(model, args) for param in model.convE.parameters(): param.requires_grad = False if args.pretrain: checkpoint = torch.load(args.pretrain) best_acc = checkpoint['best_acc'] checkpoint_state = checkpoint['state_dict'] print( '-----------------------------------------------------------------------------' ) print('load pretrained model %s' % args.pretrain) print('best_acc', best_acc) print( '-----------------------------------------------------------------------------' ) model_dict = model.fcE.state_dict() checkpoint_state = { k[7:]: v for k, v in checkpoint_state.items() if k[7:] in model_dict } ## remove module. del checkpoint_state['classifier.weight'] del checkpoint_state['classifier.bias'] if 'y_ebm.weight' in checkpoint_state: del checkpoint_state['y_ebm.weight'] model_dict.update(checkpoint_state) model.fcE.load_state_dict(model_dict) for param in model.fcE.model.parameters(): param.requires_grad = False model.optim_list = [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr }] model.optim_type = args.optimizer if model.optim_type in ("adam", "adam_reset"): model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) elif model.optim_type == "sgd": model.optimizer = optim.SGD(model.optim_list) else: raise ValueError( "Unrecognized optimizer, '{}' is not currently a valid option". format(args.optimizer)) #------------------------------------------------------------------------------------------------- # CL-STRATEGY: ALLOCATION #------------------------------------------------------------------------------------------------- # Elastic Weight Consolidation (EWC) if isinstance(model, ContinualLearner): model.ewc_lambda = args.ewc_lambda if args.ewc else 0 if args.ewc: model.fisher_n = args.fisher_n model.gamma = args.gamma model.online = args.online model.emp_FI = args.emp_fi # Synpatic Intelligence (SI) if isinstance(model, ContinualLearner): model.si_c = args.si_c if args.si else 0 if args.si: model.epsilon = args.epsilon #------------------------------------------------------------------------------------------------- # Get parameter-stamp (and print on screen) #------------------------------------------------------------------------------------------------- param_stamp = get_param_stamp(args, model.name, verbose=True) param_stamp = param_stamp + '--' + args.model_name # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf precision_dict = evaluate.initiate_precision_dict(args.tasks) #-------------------------------------------------------------------------------------------------# #---------------------# #----- CALLBACKS -----# #---------------------# solver_loss_cbs = [ cb._solver_loss_cb(log=args.loss_log, model=model, tasks=args.tasks, iters_per_task=args.iters) ] eval_cb = cb._eval_cb(log=args.prec_log, test_datasets=test_datasets, visdom=args.visdom, precision_dict=None, iters_per_task=args.iters, test_size=args.prec_n, labels_per_task=config['labels_per_task'], scenario=scenario) eval_cb_full = cb._eval_cb(log=args.iters, test_datasets=test_datasets, precision_dict=precision_dict, iters_per_task=args.iters, labels_per_task=config['labels_per_task'], scenario=scenario) eval_cbs = [eval_cb, eval_cb_full] #------------------------------------------------------------------------------------------------- # TRAINING #------------------------------------------------------------------------------------------------- print("--> Training:") start = time.time() if args.task_boundary: train_cl(args, model, train_datasets, scenario=scenario, labels_per_task=config['labels_per_task'], iters=args.iters, batch_size=args.batch, eval_cbs=eval_cbs, loss_cbs=solver_loss_cbs) else: train_cl_noboundary(args, model, train_datasets, scenario=scenario, labels_per_task=config['labels_per_task'], iters=args.iters, batch_size=args.batch, eval_cbs=eval_cbs, loss_cbs=solver_loss_cbs) training_time = time.time() - start #------------------------------------------------------------------------------------------------- # EVALUATION #------------------------------------------------------------------------------------------------- print("\n\n--> Evaluation ({}-incremental learning scenario):".format( args.scenario)) if args.ebm: precs = [ evaluate.validate_ebm(args, model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, current_task=args.tasks) for i in range(args.tasks) ] else: precs = [ evaluate.validate(args, model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, current_task=args.tasks) for i in range(args.tasks) ] print("\n Precision on test-set (softmax classification):") for i in range(args.tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / args.tasks print('average precision over all {} tasks: {:.4f}'.format( args.tasks, average_precs)) #------------------------------------------------------------------------------------------------- # OUTPUT #------------------------------------------------------------------------------------------------- if not os.path.exists(os.path.join(args.r_dir, args.save_dir)): os.makedirs(os.path.join(args.r_dir, args.save_dir)) output_file = open( "{}/{}/{}.txt".format(args.r_dir, args.save_dir, param_stamp), 'w') output_file.write("Training time {} \n".format(training_time)) for i in range(args.tasks): output_file.write(" - Task {}: {:.4f}".format(i + 1, precs[i])) output_file.write("\n") output_file.write(' - Average {}\n'.format(average_precs)) output_file.close() file_name = "{}/{}/{}".format(args.r_dir, args.save_dir, param_stamp) utils.save_object(precision_dict, file_name) if args.pdf: pp = visual_plt.open_pdf("{}/{}/{}.pdf".format(args.r_dir, args.save_dir, param_stamp)) # -show metrics reflecting progression during training figure_list = [] #-> create list to store all figures to be plotted # -generate all figures (and store them in [figure_list]) figure = visual_plt.plot_lines( precision_dict["all_tasks"], x_axes=precision_dict["x_task"], line_names=['task {}'.format(i + 1) for i in range(args.tasks)]) figure_list.append(figure) figure = visual_plt.plot_lines([precision_dict["average"]], x_axes=precision_dict["x_task"], line_names=['average all tasks so far']) figure_list.append(figure) # -add figures to pdf (and close this pdf). for figure in figure_list: pp.savefig(figure) pp.close() if not args.single_test: resfile.done()
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments', cfg.exp_type, cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir, cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda = use_cuda device = torch.device( "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer writer = SummaryWriter(experiment_dir + '/tboard') ## get the dataloaders dloader_train, dloader_val, dloader_test = dataloaders.get_dataloaders( cfg, val_split=.2) # Load the model model = models.get_model(cfg) if cfg.ssl_pretrained_exp_path: ssl_exp_dir = experiment_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict = state_dict['state_dict'] del state_dict['fc.weight'] del state_dict['fc.bias'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer for name, param in model.named_parameters(): if 'fc' not in name: param.requires_grad = False model = model.to(device) images, _, _, _ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler = None criterion = nn.CrossEntropyLoss() best_loss = 1000 for epoch in range(cfg.num_epochs + 1): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch, cfg.num_epochs)) train_loss, train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format( epoch, cfg.num_epochs)) val_loss, val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info( 'Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch + 1, val_loss, val_acc)) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights == 0: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_{}rot_epoch{}_checkpoint.pth'.format( cfg.network.lower(), str(cfg.num_rot),str(epoch)),\ best_model='{}_{}rot_epoch{}_best.pth'.format(cfg.network.lower(), str(cfg.num_rot),str(epoch)) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate on test') test_loss, test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg, save_path=os.path.join(experiment_dir, 'config_sl.yaml')) logging.info('-----------End of Experiment------------')
def test_valid(self): fileName = self.param print fileName self.assertEqual(validate(fileName),0)
def train(model, device, train_loader, val_loader, test_loader, optimizer, epoch, batch_log_interval=10, patience=20, min_delta=0.003): """ This function runs the training script of the model Args: model (obj): which model to train device (torch.device): device to run on, cpu or whether to enable cuda train_loader (torch.utils.data.dataloader.DataLoader): dataloader object val_loader (torch.utils.data.dataloader.DataLoader): dataloader object for validation test_loader (torch.utils.data.dataloader.DataLoader): dataloader object for testing at would-be early stopping iteration epoch (int): which epoch we're on optimizer (torch.optim obj): which optimizer to use batch_log_interval (int): how often to log results patience (int): how many iterations/batches (not epochs) we will tolerate a val_loss improvement < min_delta min_delta (float): early stopping threshold; if val_loss < min_delta for patience # of iterations, we consider early stopping to have occurred Returns stop (bool): whether early stopping would have occurred """ print('min_delta:', min_delta) no_improvement_count = 0 stop = False print('IN TRAIN, DEVICE:', device) model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = model.loss(output, target) loss.backward() optimizer.step() # for each "iteration" (assuming that means batch), get validation loss for early stopping if batch_idx == 0: val_loss = validate(model, device, val_loader) prev_val_loss = val_loss else: prev_val_loss = val_loss val_loss = validate(model, device, val_loader) # if no improvement on this batch if abs(prev_val_loss - val_loss) < min_delta: no_improvement_count += 1 else: no_improvement_count = 0 # trigger early stopping if no_improvement_count == patience: print( 'Early Stopping Triggered at iteration {} within epoch. Done Training. val_loss = {:.6f}, prev_val_loss = {:.6f}' .format(batch_idx, val_loss, prev_val_loss)) test(model, device, test_loader) stop = True return stop, batch_idx if batch_idx % batch_log_interval == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) return stop, None
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments',cfg.exp_type,cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir,cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda=use_cuda device = torch.device("cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer #writer = SummaryWriter(experiment_dir + '/tboard' ) logs=os.path.join('experiments',cfg.exp_type,'tboard_sup_demo') writer = SummaryWriter(logs + '/rotnet_without_pretrain' ) ## get the dataloaders dloader_train,dloader_val,dloader_test = dataloaders.get_dataloaders(cfg) # Load the model model = models.get_model(cfg) # for name, param in model.named_parameters(): # param.requires_grad = False # print(name) # model.avgpool=nn.AdaptiveAvgPool2d(output_size=(1, 1)) #model.fc=nn.Linear(in_features=512, out_features=5, bias=True) if cfg.use_pretrained: pretrained_path = os.path.join('experiments','supervised',cfg.pretrained_dir,cfg.pretrained_weights) state_dict = torch.load(pretrained_path,map_location=device) model.load_state_dict(state_dict, strict=False) logging.info('loading pretrained_weights {}'.format(cfg.pretrained_weights)) if cfg.use_ssl: ssl_exp_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict=state_dict['state_dict'] print(state_dict.keys()) del state_dict['fc.weight'] del state_dict['fc.bias'] del state_dict['layer4.0.conv1.weight'] del state_dict['layer4.0.conv2.weight'] del state_dict['layer4.1.conv1.weight'] del state_dict['layer4.1.conv2.weight'] del state_dict['layer3.0.conv1.weight'] del state_dict['layer3.0.conv2.weight'] del state_dict['layer3.1.conv1.weight'] del state_dict['layer3.1.conv2.weight'] #del state_dict['layer2.0.conv1.weight'] #del state_dict['layer2.0.conv2.weight'] #del state_dict['layer2.1.conv1.weight'] #del state_dict['layer2.1.conv2.weight'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer #layers_list=['fc','avgpool','layer3.0.conv']#,'layer3.1.conv','layer4.0.conv','layer4.1.conv'] #params_update=[] for name, param in model.named_parameters(): #for l in layers_list: if 'fc' or 'layer3.0.conv' or 'layer3.1.conv' or'layer4.0.conv' or 'layer4.1.conv' in name: param.requires_grad = True ### print(name) else: param.requires_grad = False # print(name) # params_update.append(param) # print(param.requires_grad) model = model.to(device) images,_ ,_,_ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper #model.parameters() if cfg.opt=='sgd': optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) elif cfg.opt=='adam': optimizer = optim.Adam(model.parameters(), lr=float(cfg.lr))#, momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler=None criterion = nn.CrossEntropyLoss() global iter_cnt iter_cnt=0 best_loss = 1000 for epoch in range(cfg.num_epochs): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) train_loss,train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) val_loss,val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info('Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(epoch, val_loss, val_acc)) # for name, weight in model.named_parameters(): # writer.add_histogram(name,weight, epoch) # writer.add_histogram(f'{name}.grad',weight.grad, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights==0 or is_best: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_epoch{}_checkpoint.pth'.format( cfg.network.lower(),str(epoch)),\ best_model='{}_best.pth'.format(cfg.network.lower()) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate test result on best ckpt') state_dict = torch.load(os.path.join(experiment_dir,'{}_best.pth'.format(cfg.network.lower())),\ map_location=device) model.load_state_dict(state_dict, strict=False) test_loss,test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg,save_path=os.path.join(experiment_dir,'config_sl.yaml')) logging.info('-----------End of Experiment------------')
def train_cl(model, train_datasets, test_datasets, replay_mode="none", classes_per_task=None, iters=2000, batch_size=32, generator=None, gen_iters=0, gen_loss_cbs=list(), loss_cbs=list(), eval_cbs=list(), sample_cbs=list(), use_exemplars=True, add_exemplars=False, eval_cbs_exemplars=list(), savepath='./'): '''Train a model (with a "train_a_batch" method) on multiple tasks, with replay-strategy specified by [replay_mode]. [model] <nn.Module> main model to optimize across all tasks [train_datasets] <list> with for each task the training <DataSet> [replay_mode] <str>, choice from "generative", "exact", "current", "offline" and "none" [classes_per_task] <int>, # of classes per task [iters] <int>, # of optimization-steps (i.e., # of batches) per task [generator] None or <nn.Module>, if a seperate generative model should be trained (for [gen_iters] per task) [*_cbs] <list> of call-back functions to evaluate training-progress''' # Set model in training-mode model.train() # Use cuda? cuda = model._is_on_cuda() device = model._device() # Initiate possible sources for replay (no replay for 1st task) Exact = Generative = Current = False previous_model = None # Register starting param-values (needed for "intelligent synapses"). if isinstance(model, ContinualLearner) and (model.si_c > 0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') model.register_buffer('{}_SI_prev_task'.format(n), p.data.clone()) # Loop over all tasks. for task, train_dataset in enumerate(train_datasets, 1): # If offline replay-setting, create large database of all tasks so far if replay_mode == "offline": train_dataset = ConcatDataset(train_datasets[:task]) # Add exemplars (if available) to current dataset (if requested) if add_exemplars and task > 1: # ---------- ADHOC SOLUTION: permMNIST needs transform to tensor, while splitMNIST does not ---------- # if len(train_datasets) > 6: target_transform = lambda y, x=classes_per_task: torch.tensor(y % x) else: target_transform = lambda y, x=classes_per_task: y % x # ---------------------------------------------------------------------------------------------------- # exemplar_dataset = ExemplarDataset(model.exemplar_sets, target_transform=target_transform) training_dataset = ConcatDataset([train_dataset, exemplar_dataset]) else: training_dataset = train_dataset # Prepare <dicts> to store running importance estimates and param-values before update ("Synaptic Intelligence") if isinstance(model, ContinualLearner) and (model.si_c > 0): W = {} p_old = {} for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') W[n] = p.data.clone().zero_() p_old[n] = p.data.clone() # Find [active_classes] active_classes = None # -> for Domain-IL scenario, always all classes are active # Reset state of optimizer(s) for every task (if requested) if model.optim_type == "adam_reset": model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) if (generator is not None) and generator.optim_type == "adam_reset": generator.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) # Initialize # iters left on current data-loader(s) iters_left = iters_left_previous = 1 # Define tqdm progress bar(s) progress = tqdm.tqdm(range(1, iters + 1)) if generator is not None: progress_gen = tqdm.tqdm(range(1, gen_iters + 1)) # Loop over all iterations iters_to_use = iters if (generator is None) else max(iters, gen_iters) for batch_index in range(1, iters_to_use + 1): # Update # iters left on current data-loader(s) and, if needed, create new one(s) iters_left -= 1 if iters_left == 0: data_loader = iter(utils.get_data_loader(training_dataset, batch_size, cuda=cuda, drop_last=True)) # NOTE: [train_dataset] is training-set of current task # [training_dataset] is training-set of current task with stored exemplars added (if requested) iters_left = len(data_loader) if Exact: iters_left_previous -= 1 if iters_left_previous == 0: batch_size_to_use = min(batch_size, len(ConcatDataset(previous_datasets))) data_loader_previous = iter(utils.get_data_loader(ConcatDataset(previous_datasets), batch_size_to_use, cuda=cuda, drop_last=True)) iters_left_previous = len(data_loader_previous) # -----------------Collect data------------------# #####-----CURRENT BATCH-----##### x, y = next(data_loader) # --> sample training data of current task x, y = x.to(device), y.to(device) # --> transfer them to correct device scores = None #####-----REPLAYED BATCH-----##### if not Exact and not Generative and not Current: x_ = y_ = scores_ = None # -> if no replay ##-->> Exact Replay <<--## if Exact: scores_ = None # Sample replayed training data, move to correct device x_, y_ = next(data_loader_previous) x_ = x_.to(device) y_ = y_.to(device) if (model.replay_targets == "hard") else None # If required, get target scores (i.e, [scores_] -- using previous model, with no_grad() if (model.replay_targets == "soft"): with torch.no_grad(): scores_ = previous_model(x_) scores_ = scores_ ##-->> Generative / Current Replay <<--## if Generative or Current: # Get replayed data (i.e., [x_]) -- either current data or use previous generator x_ = x if Current else previous_generator.sample(batch_size) # Get target scores and labels (i.e., [scores_] / [y_]) -- using previous model, with no_grad() scores_ = all_scores_ _, y_ = torch.max(scores_, dim=1) # Only keep predicted y/scores if required (as otherwise unnecessary computations will be done) y_ = y_ if (model.replay_targets == "hard") else None scores_ = scores_ if (model.replay_targets == "soft") else None # ---> Train MAIN MODEL if batch_index <= iters: # Train the main model with this batch loss_dict = model.train_a_batch(x, y, x_=x_, y_=y_, scores=scores, scores_=scores_, active_classes=active_classes, rnt=1. / task) # Update running parameter importance estimates in W if isinstance(model, ContinualLearner) and (model.si_c > 0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') if p.grad is not None: W[n].add_(-p.grad * (p.detach() - p_old[n])) p_old[n] = p.detach().clone() # Fire callbacks (for visualization of training-progress / evaluating performance after each task) for loss_cb in loss_cbs: if loss_cb is not None: loss_cb(progress, batch_index, loss_dict, task=task) for eval_cb in eval_cbs: if eval_cb is not None: eval_cb(model, batch_index, task=task) if model.label == "VAE": for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(model, batch_index, task=task) # ---> Train GENERATOR if generator is not None and batch_index <= gen_iters: # Train the generator with this batch loss_dict = generator.train_a_batch(x, y, x_=x_, y_=y_, scores_=scores_, active_classes=active_classes, rnt=1. / task) # Fire callbacks on each iteration for loss_cb in gen_loss_cbs: if loss_cb is not None: loss_cb(progress_gen, batch_index, loss_dict, task=task) for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(generator, batch_index, task=task) ##----------> UPON FINISHING EACH TASK... # Close progres-bar(s) progress.close() if generator is not None: progress_gen.close() # EWC: estimate Fisher Information matrix (FIM) and update term for quadratic penalty if isinstance(model, ContinualLearner) and (model.ewc_lambda > 0): # -find allowed classes allowed_classes = None # -estimate FI-matrix model.estimate_fisher(training_dataset, allowed_classes=allowed_classes) # SI: calculate and update the normalized path integral if isinstance(model, ContinualLearner) and (model.si_c > 0): model.update_omega(W, model.epsilon) # EXEMPLARS: update exemplar sets if (add_exemplars or use_exemplars) or replay_mode == "exemplars": exemplars_per_class = int(np.floor(model.memory_budget / (classes_per_task * task))) # reduce examplar-sets model.reduce_exemplar_sets(exemplars_per_class) # for each new class trained on, construct examplar-set new_classes = list(range(classes_per_task)) for class_id in new_classes: start = time.time() # create new dataset containing only all examples of this class class_dataset = SubDataset(original_dataset=train_dataset, sub_labels=[class_id]) # based on this dataset, construct new exemplar-set for this class model.construct_exemplar_set(dataset=class_dataset, n=exemplars_per_class) print("Constructed exemplar-set for class {}: {} seconds".format(class_id, round(time.time() - start))) model.compute_means = True # evaluate this way of classifying on test set for eval_cb in eval_cbs_exemplars: if eval_cb is not None: eval_cb(model, iters, task=task) # REPLAY: update source for replay previous_model = copy.deepcopy(model).eval() if replay_mode == 'generative': Generative = True previous_generator = copy.deepcopy(generator).eval() if generator is not None else previous_model elif replay_mode == 'current': Current = True elif replay_mode in ('exemplars', 'exact'): Exact = True if replay_mode == "exact": previous_datasets = train_datasets[:task] else: target_transform = (lambda y, x=classes_per_task: y % x) previous_datasets = [ ExemplarDataset(model.exemplar_sets, target_transform=target_transform)] precs = [evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, allowed_classes=None ) for i in range(len(test_datasets))] output.append(precs) precs5 = [evaluate.validate5( model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False, allowed_classes=None ) for i in range(len(test_datasets))] output5.append(precs5) os.makedirs(savepath + '/top5', exist_ok=True) savepath1 = savepath + '/' + str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + '.csv' f = open(savepath1, 'w') writer = csv.writer(f) writer.writerows(output) f.close() savepath5 = savepath + '/top5/' + str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + '.csv' f = open(savepath5, 'w') writer = csv.writer(f) writer.writerows(output5) f.close() print(savepath)
def train_cl(model, train_datasets, replay_mode="none", scenario="class",classes_per_task=None,iters=2000,batch_size=32, generator=None, gen_iters=0, gen_loss_cbs=list(), loss_cbs=list(), eval_cbs=list(), sample_cbs=list(), use_exemplars=True, add_exemplars=False, eval_cbs_exemplars=list(), sparsity=0., x_tasks=5, test_datasets=None): '''Train a model (with a "train_a_batch" method) on multiple tasks, with replay-strategy specified by [replay_mode]. [model] <nn.Module> main model to optimize across all tasks [train_datasets] <list> with for each task the training <DataSet> [replay_mode] <str>, choice from "generative", "exact", "current", "offline" and "none" [scenario] <str>, choice from "task", "domain" and "class" [classes_per_task] <int>, # of classes per task [iters] <int>, # of optimization-steps (i.e., # of batches) per task [generator] None or <nn.Module>, if a seperate generative model should be trained (for [gen_iters] per task) [*_cbs] <list> of call-back functions to evaluate training-progress''' # Set model in training-mode model.train() # Use cuda? cuda = model._is_on_cuda() device = model._device() # Initiate possible sources for replay (no replay for 1st task) Exact = Generative = Current = False previous_model = None # Register starting param-values (needed for "intelligent synapses"). if isinstance(model, ContinualLearner) and (model.si_c>0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') model.register_buffer('{}_SI_prev_task'.format(n), p.data.clone()) # Loop over all tasks. for task, train_dataset in enumerate(train_datasets, 1): # If offline replay-setting, create large database of all tasks so far if replay_mode=="offline" and (not scenario=="task"): train_dataset = ConcatDataset(train_datasets[:task]) # -but if "offline"+"task"-scenario: all tasks so far included in 'exact replay' & no current batch if replay_mode=="offline" and scenario == "task": Exact = True previous_datasets = train_datasets # Add exemplars (if available) to current dataset (if requested) if add_exemplars and task>1: # ---------- ADHOC SOLUTION: permMNIST needs transform to tensor, while splitMNIST does not ---------- # if len(train_datasets)>6: target_transform = (lambda y, x=classes_per_task: torch.tensor(y%x)) if ( scenario=="domain" ) else (lambda y: torch.tensor(y)) else: target_transform = (lambda y, x=classes_per_task: y%x) if scenario=="domain" else None # ---------------------------------------------------------------------------------------------------- # exemplar_dataset = ExemplarDataset(model.exemplar_sets, target_transform=target_transform) training_dataset = ConcatDataset([train_dataset, exemplar_dataset]) else: training_dataset = train_dataset # Prepare <dicts> to store running importance estimates and param-values before update ("Synaptic Intelligence") if isinstance(model, ContinualLearner) and (model.si_c>0): W = {} p_old = {} for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') W[n] = p.data.clone().zero_() p_old[n] = p.data.clone() # Find [active_classes] active_classes = None # -> for Domain-IL scenario, always all classes are active if scenario == "task": # -for Task-IL scenario, create <list> with for all tasks so far a <list> with the active classes active_classes = [list(range(classes_per_task * i, classes_per_task * (i + 1))) for i in range(task)] elif scenario == "class": # -for Class-IL scenario, create one <list> with active classes of all tasks so far active_classes = list(range(classes_per_task * task)) # Reset state of optimizer(s) for every task (if requested) if model.optim_type=="adam_reset": model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) if (generator is not None) and generator.optim_type=="adam_reset": generator.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) # Initialize # iters left on current data-loader(s) iters_left = iters_left_previous = 1 if scenario=="task": up_to_task = task if replay_mode=="offline" else task-1 iters_left_previous = [1]*up_to_task data_loader_previous = [None]*up_to_task # Define tqdm progress bar(s) progress = tqdm.tqdm(range(1, iters+1)) if generator is not None: progress_gen = tqdm.tqdm(range(1, gen_iters+1)) # Loop over all iterations iters_to_use = iters if (generator is None) else max(iters, gen_iters) for batch_index in range(1, iters_to_use+1): # Update # iters left on current data-loader(s) and, if needed, create new one(s) iters_left -= 1 if iters_left==0: data_loader = iter(utils.get_data_loader(training_dataset, batch_size, cuda=cuda, drop_last=True)) # NOTE: [train_dataset] is training-set of current task # [training_dataset] is training-set of current task with stored exemplars added (if requested) iters_left = len(data_loader) if Exact: if scenario=="task": up_to_task = task if replay_mode=="offline" else task-1 batch_size_replay = int(np.ceil(batch_size/up_to_task)) if (up_to_task>1) else batch_size # -in Task-IL scenario, need separate replay for each task for task_id in range(up_to_task): batch_size_to_use = min(batch_size_replay, len(previous_datasets[task_id])) iters_left_previous[task_id] -= 1 if iters_left_previous[task_id]==0: data_loader_previous[task_id] = iter(utils.get_data_loader( train_datasets[task_id], batch_size_to_use, cuda=cuda, drop_last=True )) iters_left_previous[task_id] = len(data_loader_previous[task_id]) else: iters_left_previous -= 1 if iters_left_previous==0: batch_size_to_use = min(batch_size, len(ConcatDataset(previous_datasets))) data_loader_previous = iter(utils.get_data_loader(ConcatDataset(previous_datasets), batch_size_to_use, cuda=cuda, drop_last=True)) iters_left_previous = len(data_loader_previous) # -----------------Collect data------------------# #####-----CURRENT BATCH-----##### if replay_mode=="offline" and scenario=="task": x = y = scores = None else: x, y = next(data_loader) #--> sample training data of current task y = y-classes_per_task*(task-1) if scenario=="task" else y #--> ITL: adjust y-targets to 'active range' x, y = x.to(device), y.to(device) #--> transfer them to correct device # If --bce, --bce-distill & scenario=="class", calculate scores of current batch with previous model binary_distillation = hasattr(model, "binaryCE") and model.binaryCE and model.binaryCE_distill if binary_distillation and scenario=="class" and (previous_model is not None): with torch.no_grad(): scores = previous_model(x)[:, :(classes_per_task * (task - 1))] else: scores = None #####-----REPLAYED BATCH-----##### if not Exact and not Generative and not Current: x_ = y_ = scores_ = None #-> if no replay ##-->> Exact Replay <<--## if Exact: scores_ = None if scenario in ("domain", "class"): # Sample replayed training data, move to correct device x_, y_ = next(data_loader_previous) x_ = x_.to(device) y_ = y_.to(device) if (model.replay_targets=="hard") else None # If required, get target scores (i.e, [scores_] -- using previous model, with no_grad() if (model.replay_targets=="soft"): with torch.no_grad(): scores_ = previous_model(x_) scores_ = scores_[:, :(classes_per_task*(task-1))] if scenario=="class" else scores_ #-> when scenario=="class", zero probabilities will be added in the [utils.loss_fn_kd]-function elif scenario=="task": # Sample replayed training data, wrap in (cuda-)Variables and store in lists x_ = list() y_ = list() up_to_task = task if replay_mode=="offline" else task-1 for task_id in range(up_to_task): x_temp, y_temp = next(data_loader_previous[task_id]) x_.append(x_temp.to(device)) # -only keep [y_] if required (as otherwise unnecessary computations will be done) if model.replay_targets=="hard": y_temp = y_temp - (classes_per_task*task_id) #-> adjust y-targets to 'active range' y_.append(y_temp.to(device)) else: y_.append(None) # If required, get target scores (i.e, [scores_] -- using previous model if (model.replay_targets=="soft") and (previous_model is not None): scores_ = list() for task_id in range(up_to_task): with torch.no_grad(): scores_temp = previous_model(x_[task_id]) scores_temp = scores_temp[:, (classes_per_task*task_id):(classes_per_task*(task_id+1))] scores_.append(scores_temp) ##-->> Generative / Current Replay <<--## if Generative or Current: # Get replayed data (i.e., [x_]) -- either current data or use previous generator x_ = x if Current else previous_generator.sample(batch_size) # Get target scores and labels (i.e., [scores_] / [y_]) -- using previous model, with no_grad() # -if there are no task-specific mask, obtain all predicted scores at once if (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None): with torch.no_grad(): all_scores_ = previous_model(x_) # -depending on chosen scenario, collect relevant predicted scores (per task, if required) if scenario in ("domain", "class") and ( (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None) ): scores_ = all_scores_[:,:(classes_per_task * (task - 1))] if scenario == "class" else all_scores_ _, y_ = torch.max(scores_, dim=1) else: # NOTE: it's possible to have scenario=domain with task-mask (so actually it's the Task-IL scenario) # -[x_] needs to be evaluated according to each previous task, so make list with entry per task scores_ = list() y_ = list() for task_id in range(task - 1): # -if there is a task-mask (i.e., XdG is used), obtain predicted scores for each task separately if hasattr(previous_model, "mask_dict") and previous_model.mask_dict is not None: previous_model.apply_XdGmask(task=task_id + 1) with torch.no_grad(): all_scores_ = previous_model(x_) if scenario=="domain": temp_scores_ = all_scores_ else: temp_scores_ = all_scores_[:, (classes_per_task * task_id):(classes_per_task * (task_id + 1))] _, temp_y_ = torch.max(temp_scores_, dim=1) scores_.append(temp_scores_) y_.append(temp_y_) # Only keep predicted y/scores if required (as otherwise unnecessary computations will be done) y_ = y_ if (model.replay_targets == "hard") else None scores_ = scores_ if (model.replay_targets == "soft") else None #---> Train MAIN MODEL if batch_index <= iters: # Train the main model with this batch loss_dict = model.train_a_batch(x, y, x_=x_, y_=y_, scores=scores, scores_=scores_, active_classes=active_classes, task=task, rnt = 1./task) # Update running parameter importance estimates in W if isinstance(model, ContinualLearner) and (model.si_c>0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') if p.grad is not None: W[n].add_(-p.grad*(p.detach()-p_old[n])) p_old[n] = p.detach().clone() # Fire callbacks (for visualization of training-progress / evaluating performance after each task) for loss_cb in loss_cbs: if loss_cb is not None: loss_cb(progress, batch_index, loss_dict, task=task) for eval_cb in eval_cbs: if eval_cb is not None: eval_cb(model, batch_index, task=task) if model.label == "VAE": for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(model, batch_index, task=task) #---> Train GENERATOR if generator is not None and batch_index <= gen_iters: # Train the generator with this batch loss_dict = generator.train_a_batch(x, y, x_=x_, y_=y_, scores_=scores_, active_classes=active_classes, task=task, rnt=1./task) # Fire callbacks on each iteration for loss_cb in gen_loss_cbs: if loss_cb is not None: loss_cb(progress_gen, batch_index, loss_dict, task=task) for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(generator, batch_index, task=task) ##----------> UPON FINISHING EACH TASK... # Close progres-bar(s) progress.close() if generator is not None: progress_gen.close() ###### if test_datasets: print("\n\n--> Evaluation ({}-incremental learning scenario: Before pruning):".format(scenario)) # Evaluate precision of final model on full test-set precs = [evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=False, allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None ) for i in range(x_tasks)] print("\n Precision on test-set (softmax classification):") for i in range(x_tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / x_tasks print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs)) # -with exemplars if use_exemplars: precs = [evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=True, allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None ) for i in range(x_tasks)] print("\n Precision on test-set (classification using exemplars):") for i in range(x_tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs_ex = sum(precs) / x_tasks print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs_ex)) print("\n") # ------------------------------------------------------------- #pruning total = 0 for m in model.modules(): if isinstance(m, LinearExcitability): print("module") print(m) total += m.weight.data.numel() x_weights = torch.zeros(total) index = 0 for m in model.modules(): if isinstance(m, LinearExcitability): size = m.weight.data.numel() x_weights[index:(index+size)] = m.weight.data.view(-1).abs().clone() index += size y, i = torch.sort(x_weights) thre_index = int(total * sparsity) thre = y[thre_index] pruned = 0 print('Pruning threshold: {}'.format(thre)) zero_flag = False for k, m in enumerate(model.modules()): if isinstance(m, LinearExcitability): weight_copy = m.weight.data.abs().clone() mask = weight_copy.gt(thre).float() pruned = pruned + mask.numel() - torch.sum(mask) m.weight.data.mul_(mask) if int(torch.sum(mask)) == 0: zero_flag = True print('layer index: {:d} \t total params: {:d} \t remaining params: {:d}'. format(k, mask.numel(), int(torch.sum(mask)))) print('Total conv params: {}, Pruned conv params: {}, Pruned ratio: {}'.format(total, pruned, pruned/total)) # ------------------------------------------------------------- print("\n\n--> Evaluation ({}-incremental learning scenario: After pruning):".format(scenario)) # Evaluate precision of final model on full test-set precs = [evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=False, allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None ) for i in range(x_tasks)] print("\n Precision on test-set (softmax classification):") for i in range(x_tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs = sum(precs) / x_tasks print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs)) # -with exemplars if use_exemplars: precs = [evaluate.validate( model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=True, allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None ) for i in range(x_tasks)] print("\n Precision on test-set (classification using exemplars):") for i in range(x_tasks): print(" - Task {}: {:.4f}".format(i + 1, precs[i])) average_precs_ex = sum(precs) / x_tasks print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs_ex)) print("\n") ###### # EWC: estimate Fisher Information matrix (FIM) and update term for quadratic penalty if isinstance(model, ContinualLearner) and (model.ewc_lambda>0): # -find allowed classes allowed_classes = list( range(classes_per_task*(task-1), classes_per_task*task) ) if scenario=="task" else (list(range(classes_per_task*task)) if scenario=="class" else None) # -if needed, apply correct task-specific mask if model.mask_dict is not None: model.apply_XdGmask(task=task) # -estimate FI-matrix model.estimate_fisher(training_dataset, allowed_classes=allowed_classes) # SI: calculate and update the normalized path integral if isinstance(model, ContinualLearner) and (model.si_c>0): model.update_omega(W, model.epsilon) # EXEMPLARS: update exemplar sets if (add_exemplars or use_exemplars) or replay_mode=="exemplars": exemplars_per_class = int(np.floor(model.memory_budget / (classes_per_task*task))) # reduce examplar-sets model.reduce_exemplar_sets(exemplars_per_class) # for each new class trained on, construct examplar-set new_classes = list(range(classes_per_task)) if scenario=="domain" else list(range(classes_per_task*(task-1), classes_per_task*task)) for class_id in new_classes: start = time.time() # create new dataset containing only all examples of this class class_dataset = SubDataset(original_dataset=train_dataset, sub_labels=[class_id]) # based on this dataset, construct new exemplar-set for this class model.construct_exemplar_set(dataset=class_dataset, n=exemplars_per_class) print("Constructed exemplar-set for class {}: {} seconds".format(class_id, round(time.time()-start))) model.compute_means = True # evaluate this way of classifying on test set for eval_cb in eval_cbs_exemplars: if eval_cb is not None: eval_cb(model, iters, task=task) # REPLAY: update source for replay previous_model = copy.deepcopy(model).eval() if replay_mode == 'generative': Generative = True previous_generator = copy.deepcopy(generator).eval() if generator is not None else previous_model elif replay_mode == 'current': Current = True elif replay_mode in ('exemplars', 'exact'): Exact = True if replay_mode == "exact": previous_datasets = train_datasets[:task] else: if scenario == "task": previous_datasets = [] for task_id in range(task): previous_datasets.append( ExemplarDataset( model.exemplar_sets[ (classes_per_task * task_id):(classes_per_task * (task_id + 1))], target_transform=lambda y, x=classes_per_task * task_id: y + x) ) else: target_transform = (lambda y, x=classes_per_task: y % x) if scenario == "domain" else None previous_datasets = [ ExemplarDataset(model.exemplar_sets, target_transform=target_transform)]
prefix = f'baseline_{currentTime}' if args.baseline else f'{args.alpha}_{args.beta}_{args.eta}_{currentTime}' if args.baseline: criterion = nn.CrossEntropyLoss().cuda() else: criterion = MixedEntropy(alpha=args.alpha, beta=args.beta).cuda() # training if args.eval: if args.resume: dicts = torch.load(args.resume) model_dict = dicts['state_dict'] model.load_state_dict(model_dict) model.cuda() else: raise RuntimeError('please specify the path to the model') acc = validate(test_loader, model, args) print(f'the accuracy is: {acc}') else: model.train() for epoch in range(120): print(f'training epoch {epoch}!') total_train_loss = train(train_loader, model, criterion, optimizer, scheduler, epoch, args) total_train_loss /= train_dataset.__len__() writer.add_scalar('Train/Loss', total_train_loss, epoch) if epoch in [20, 40, 60, 80, 100]: state_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(),
optimizer = get_optimizer(opt, model) criterion = nn.CrossEntropyLoss() best_accuracy = 0 iter = 0 for epoch in range(opt.epoch): # train for one epoch iter, loss = train(train_loader, model, criterion, optimizer, epoch, iter=iter) # evaluate loss, accuracy = validate(val_loader, model, criterion, epoch) is_best = accuracy < best_accuracy best_accuracy = min(accuracy, best_accuracy) # If best_eval, best_save_path # Save latest/best weights in the model directory save_checkpoint( { "state_dict": model, "epoch": epoch + 1, "accuracy": accuracy, "optimizer": optimizer.state_dict(), }, is_best, opt.SAVE_DIR, 'checkpoint.pth') print('accuracy: {:.2f}%'.format(100 * accuracy))