def run_meta_learning(task_generator, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create a 'dummy' model to generate the set of parameters of the shared prior: prior_model = get_model(prm) meta_batch_size = prm.meta_batch_size n_meta_iterations = prm.n_meta_train_epochs n_inner_steps = prm.n_inner_steps # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(prior_model), prm) write_to_log('---- Meta-Training with infinite tasks...', prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: test_acc_avg = 0.0 # object.grad_init(prm, prior_model, loss_criterion, iter(data_loaders[0]['train']), data_loaders[0]['train'], prior_optimizer) for i_iter in range(n_meta_iterations): prior_model, posteriors_models, test_acc_avg = run_meta_iteration( i_iter, prior_model, task_generator, prm) # Note: test_acc_avg is the last checked test error in a meta-training batch # (not the final evaluation which is done on the meta-test tasks) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type) # Return learned prior: return prior_model
def run_learning(task_data, meta_model, prm, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create model for task: task_model = get_model(prm) # Load initial point from meta-parameters: task_model.load_state_dict(meta_model.state_dict()) # The data-sets of the new task: train_loader = task_data['train'] test_loader = task_data['test'] n_train_samples = len(train_loader.dataset) n_batches = len(train_loader) # Get task optimizer: task_optimizer = SGD(task_model.parameters(), lr=prm.alpha) # In meta-testing, use SGD with step-size alpha # ------------------------------------------------------------------------------------------- # Learning function # ------------------------------------------------------------------------------------------- def run_meta_test_learning(task_model, train_loader): task_model.train() train_loader_iter = iter(train_loader) # Gradient steps (training) loop for i_grad_step in range(prm.n_meta_test_grad_steps): # get batch: batch_data = data_gen.get_next_batch_cyclic( train_loader_iter, train_loader) inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate empirical loss: outputs = task_model(inputs) task_objective = loss_criterion(outputs, targets) # Take gradient step with the task weights: grad_step(task_objective, task_optimizer) # end gradient step loop return task_model # ------------------------------------------------------------------------------------------- # Test evaluation function # -------------------------------------------------------------------------------------------- def run_test(model, test_loader): model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) outputs = model(inputs) test_loss += loss_criterion(outputs, targets) # sum the mean loss in batch n_correct += count_correct(outputs, targets) n_test_samples = len(test_loader.dataset) n_test_batches = len(test_loader) test_loss = test_loss.data[0] / n_test_batches test_acc = n_correct / n_test_samples print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'. format(test_loss, test_acc, n_correct, n_test_samples)) return test_acc # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.num_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: task_model = run_meta_test_learning(task_model, train_loader) # Test: test_acc = run_test(task_model, test_loader) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, verbose=verbose) test_err = 1 - test_acc return test_err, task_model
def run_meta_iteration(i_iter, prior_model, task_generator, prm): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with prior. # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) meta_batch_size = prm.meta_batch_size n_inner_steps = prm.n_inner_steps n_meta_iterations = prm.n_meta_train_epochs # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch(prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # The posteriors models will adjust to new tasks in eacxh meta-batch # Create posterior models for each task: posteriors_models = [get_model(prm) for _ in range(meta_batch_size)] init_from_prior = True if init_from_prior: for post_model in posteriors_models: post_model.load_state_dict(prior_model.state_dict()) # Gather all tasks posterior params: all_post_param = sum([ list(posterior_model.parameters()) for posterior_model in posteriors_models ], []) # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) all_params = all_post_param + prior_params all_optimizer = optim_func(all_params, **optim_args) # all_optimizer = optim_func(prior_params, **optim_args) ## DeBUG test_acc_avg = 0.0 for i_inner_step in range(n_inner_steps): # Get objective based on tasks in meta-batch: total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, posteriors_models, loss_criterion, prm.n_train_tasks) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_iter) # Print status: log_interval = 20 if (i_inner_step) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, n_meta_iterations, i_inner_step, n_inner_steps, batch_acc, total_objective.data[0]) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t'.format( info['avg_empirical_loss'], info['avg_intra_task_comp'])) # Print status = on test set of meta-batch: log_interval_eval = 10 if (i_iter) % log_interval_eval == 0 and i_iter > 0: test_acc_avg = run_test(mb_data_loaders, posteriors_models, loss_criterion, prm) print('Meta-iter: {} \t Meta-Batch Test Acc: {:1.3}\t'.format( i_iter, test_acc_avg)) # End of inner steps return prior_model, posteriors_models, test_acc_avg
def run_prior_learning(task_data, prm, prior_model): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: print('DP_test_data_prior_learning: setting-up') optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create a 'dummy' model to generate the set of parameters of the shared prior: # prior_model = prior_model # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) # here annotate the code all_params = prior_params # all_params = all_post_param all_optimizer = optim_func(all_params, **optim_args) data_prior_loader = task_data['data_prior'] n_data_prior_samples = len(data_prior_loader.dataset) n_data_prior_batches = len(data_prior_loader) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): prior_model.train() for batch_idx, batch_data in enumerate(data_prior_loader): correct_count = 0 sample_count = 0 # Monte-Carlo iterations: n_MC = prm.n_MC task_empirical_loss = 0 for i_MC in range(n_MC): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate empirical loss: outputs = prior_model(inputs) curr_empirical_loss = loss_criterion(outputs, targets) task_empirical_loss += (1 / n_MC) * curr_empirical_loss correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) # Total objective: total_objective = task_empirical_loss # Take gradient step with the posterior: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch) log_interval = 20 if batch_idx % log_interval == 0: batch_acc = correct_count / sample_count print( 'number meta batch:{} \t avg_empiric_loss:{:.3f} \t batch accuracy:{:.3f}' .format(batch_idx, total_objective, batch_acc)) # return total_objective.item() # end run_epoch() # Training loop: for i_epoch in range(prm.data_prior_test_epochs): run_train_epoch(i_epoch) print('Data_dependent_prior testing epoch:{}'.format(i_epoch)) return prior_model
def run_meta_learning(train_data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_tasks = len(train_data_loaders) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in train_data_loaders] n_batches_per_task = np.max(n_batch_list) # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)] # The task order to take batches from: task_order = [] task_ids_list = list(range(n_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) # ----------- meta-batches loop (batches of tasks) -----------------------------------# for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] n_tasks_in_batch = len(task_ids_in_meta_batch) # it may be less than prm.meta_batch_size at the last one # note: it is OK if some task appear several times in the meta-batch mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective))) # end meta-batches loop # end run_epoch() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(train_data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() num_epochs = int(np.ceil(prm.n_meta_train_iterations / np.ceil(n_tasks / prm.meta_batch_size))) # Training loop: for i_epoch in range(num_epochs): run_train_epoch(i_epoch) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model
def run_meta_learning(data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_train_tasks = len(data_loaders) #import pudb #pudb.set_trace() # Create posterior models for each task: posteriors_models = [get_model(prm) for _ in range(n_train_tasks)] # Create a 'dummy' model to generate the set of parameters of the shared prior: prior_model = get_model(prm) if prm.from_pretrain: if prm.data_source == "CIFAR100": pretrain_path = "pretrained_cifar100/epoch-46-acc0.478.pth" elif prm.data_source == 'Caltech256': pretrain_path = "pretrained_caltech256/epoch-7-acc0.303.pth" else: pretrain_path = None for e in posteriors_models: load_model_state(e, pretrain_path, pop_softmax = True ) load_model_state(prior_model, pretrain_path, pop_softmax = True ) write_to_log("load pretrained from" + pretrain_path, prm) # Gather all tasks posterior params: all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], []) # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) all_params = all_post_param + prior_params all_optimizer = optim_func(all_params, **optim_args) # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in data_loaders] n_batches_per_task = np.max(n_batch_list) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch] # Get objective based on tasks in meta-batch: total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks) # Take gradient step with the shared prior and all tasks' posteriors: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch) # Print training status of current batch: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] write_to_log(cmn.status_string(i_epoch, prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}, w_kld : {:.4}, b_kld : {:.4}'. format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'], info['w_kld'], info['b_kld']), prm) # end meta-batches loop # end run_epoch() # ------------------------------------------------------------------------------------------- # Test evaluation function - # Evaluate the mean loss on samples from the test sets of the training tasks # -------------------------------------------------------------------------------------------- def run_test(): test_acc_avg = 0.0 n_tests = 0 for i_task in range(n_train_tasks): model = posteriors_models[i_task] test_loader = data_loaders[i_task]['test'] if len(test_loader) > 0: test_acc, test_loss = run_test_Bayes(model, test_loader, loss_criterion, prm, verbose=0) n_tests += 1 test_acc_avg += test_acc n_test_samples = len(test_loader.dataset) #write_to_log('Train Task {}, Test set: {} - Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'.format( #i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm) else: print('Train Task {}, Test set: {} - No test data'.format(i_task, prm.test_type)) if n_tests > 0: test_acc_avg /= n_tests return test_acc_avg # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(prior_model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.n_meta_train_epochs): save_path = os.path.join(prm.result_dir, 'Epoch_{}_model.pth'.format(i_epoch)) run_train_epoch(i_epoch) if i_epoch % 1 == 0: save_model_state(prior_model, save_path) #utils.debug() import pudb #pudb.set_trace() test_acc_avg = run_test() print("Epoch {}: test_acc is {}".format(i_epoch, test_acc_avg)) stop_time = timeit.default_timer() # Test: test_acc_avg = run_test() # Update Log file: cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type) # Return learned prior: return prior_model
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create posterior model for the new task: post_model = get_model(prm) if init_from_prior: post_model.load_state_dict(prior_model.state_dict()) # prior_model_dict = prior_model.state_dict() # post_model_dict = post_model.state_dict() # # # filter out unnecessary keys: # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k} # # overwrite entries in the existing state dict: # post_model_dict.update(prior_model_dict) # # # # load the new state dict # post_model.load_state_dict(post_model_dict) # add_noise_to_model(post_model, prm.kappa_factor) # The data-sets of the new task: train_loader = task_data['train'] test_loader = task_data['test'] #import pudb #pudb.set_trace() n_train_samples = len(train_loader.dataset) n_batches = len(train_loader) # Get optimizer: optimizer = optim_func(post_model.parameters(), **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 post_model.train() train_info = {} train_info["task_comp"] = 0.0 train_info["total_loss"] = 0.0 cnt = 0 for batch_idx, batch_data in enumerate(train_loader): cnt += 1 correct_count = 0 sample_count = 0 # Monte-Carlo iterations: n_MC = prm.n_MC task_empirical_loss = 0 task_complexity = 0 for i_MC in range(n_MC): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate empirical loss: outputs = post_model(inputs) curr_empirical_loss = loss_criterion(outputs, targets) #hyper_kl = 0 when testing curr_empirical_loss, curr_complexity, task_info = get_bayes_task_objective( prm, prior_model, post_model, n_train_samples, curr_empirical_loss, noised_prior=False) task_empirical_loss += (1 / n_MC) * curr_empirical_loss task_complexity += (1 / n_MC) * curr_complexity correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) # Total objective: total_objective = task_empirical_loss + prm.task_complex_w * task_complexity train_info["task_comp"] += task_complexity.data[0] train_info["total_loss"] += total_objective.data[0] # Take gradient step with the posterior: grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_count / sample_count write_to_log( cmn.status_string(i_epoch, prm.n_meta_test_epochs, batch_idx, n_batches, batch_acc, total_objective.data[0]) + ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}, w_kld {:.4}, b_kld {:.4}' .format(task_empirical_loss.data[0], task_complexity.data[0], task_info["w_kld"], task_info["b_kld"]), prm) train_info["task_comp"] /= cnt train_info["total_loss"] /= cnt return train_info # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.n_meta_test_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: best_acc = -1 best_acc_loss = -1 best_acc_comp = -1 for i_epoch in range(prm.n_meta_test_epochs): train_info = run_train_epoch(i_epoch) test_acc, test_loss = run_test_Bayes(post_model, test_loader, loss_criterion, prm) if test_acc > best_acc: best_acc = test_acc best_acc_loss = test_loss best_acc_comp = train_info["task_comp"] # Test: test_acc, test_loss = run_test_Bayes(post_model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(best_acc, stop_time - start_time, prm, result_name=prm.test_type, verbose=verbose) test_err = 1 - best_acc return test_err, best_acc_comp, best_acc_loss, post_model
def run_learning(data_loader, prm, verbose=1, initial_model=None): # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # The data-sets: train_loader = data_loader['train'] test_loader = data_loader['test'] n_batches = len(train_loader) # Create model: if hasattr(prm, 'func_model') and prm.func_model: import Models.deterministic_models as func_models model = func_models.get_model(prm) else: model = get_model(prm) # Load initial weights: if initial_model: model.load_state_dict(initial_model.state_dict()) # Gather modules list: modules_list = list(model.named_children()) if hasattr(model, 'net'): # extract the modules from 'net' field: modules_list += list(model.net.named_children()) modules_list = [m for m in modules_list if m[0] is not 'net'] # Determine which parameters are optimized and which are frozen: if hasattr(prm, 'freeze_list'): freeze_list = prm.freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if not named_module[0] in freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) elif hasattr(prm, 'not_freeze_list'): not_freeze_list = prm.not_freeze_list optimized_modules = [ named_module[1] for named_module in modules_list if named_module[0] in not_freeze_list ] optimized_params = sum( [list(mo.parameters()) for mo in optimized_modules], []) else: optimized_params = model.parameters() # Get optimizer: optimizer = optim_func(optimized_params, **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate loss: outputs = model(inputs) loss = loss_criterion(outputs, targets) # Take gradient step: grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, get_value(loss))) # -----------------------------------------------------------------------------------------------------------# # Update Log file # -----------------------------------------------------------------------------------------------------------# update_file = not verbose == 0 cmn.write_to_log(cmn.get_model_string(model), prm, update_file=update_file) cmn.write_to_log('Total number of steps: {}'.format(n_batches * prm.num_epochs), prm, update_file=update_file) cmn.write_to_log('Number of training samples: {}'.format( data_loader['n_train_samples']), prm, update_file=update_file) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.num_epochs): run_train_epoch(i_epoch) # Test: test_acc = run_test(model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, verbose=verbose, result_name='Standard') test_err = 1 - test_acc return test_err, model
def run_prior_learning(data_loaders, prm, prior_model): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: print('DP_train_data_prior_learning: setting-up') optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_train_tasks = len(data_loaders) # Create a 'dummy' model to generate the set of parameters of the shared prior: prior_model = prior_model # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) # here annotate the code all_params = prior_params # all_params = all_post_param all_optimizer = optim_func(all_params, **optim_args) # number of sample-batches in each task: n_batch_list = [ len(data_loader['data_prior']) for data_loader in data_loaders ] n_batches_per_task = np.max(n_batch_list) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch, i_step=0): # For each task, prepare an iterator to generate training batches: train_iterators = [ iter(data_loaders[ii]['data_prior']) for ii in range(n_train_tasks) ] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) # create -- n_batches_per_task * n_train_tasks -- number list for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # random.shuffle(task_ids_list) # -- ############ -- TEMP # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch # - maximum -- prm.meta_batch_size tasks -- in each meta batch # total -- len(task_order) / prm.meta_batch_size -- tasks meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) # totally update -- len(meta_batch_starts) -- times n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): # only select prm.meta_batch_size 5 tasks in each meta batch meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start:( meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [ data_loaders[task_id] for task_id in task_ids_in_meta_batch ] mb_iterators = [ train_iterators[task_id] for task_id in task_ids_in_meta_batch ] i_step += 1 # Get objective based on tasks in meta-batch: empirical_error = get_risk(prior_model, prm, mb_data_loaders, mb_iterators, loss_criterion, n_train_tasks) grad_step(empirical_error, all_optimizer, lr_schedule, prm.lr, i_epoch) log_interval = 20 if i_meta_batch % log_interval == 0: print('number meta batch:{} \t avg_empiric_loss:{:.3f}'.format( i_meta_batch, empirical_error)) # print(i_step) # end meta-batches loop # return i_step # end run_epoch() # Training loop: def get_risk(prior_model, prm, mb_data_loaders, mb_iterators, loss_criterion, n_train_tasks): ''' Calculate objective based on tasks in meta-batch ''' # note: it is OK if some tasks appear several times in the meta-batch n_tasks_in_mb = len(mb_data_loaders) sum_empirical_loss = 0 sum_intra_task_comp = 0 correct_count = 0 sample_count = 0 # ----------- loop over tasks in meta-batch -----------------------------------# for i_task in range(n_tasks_in_mb): n_samples = mb_data_loaders[i_task]['n_train_samples'] # get sample-batch data from current task to calculate the empirical loss estimate: batch_data = data_gen.get_next_batch_cyclic( mb_iterators[i_task], mb_data_loaders[i_task]['train']) # The posterior model corresponding to the task in the batch: # post_model = mb_posteriors_models[i_task] prior_model.train() # Monte-Carlo iterations: n_MC = prm.n_MC task_empirical_loss = 0 # task_complexity = 0 # ----------- Monte-Carlo loop -----------------------------------# for i_MC in range(n_MC): # get batch variables: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Empirical Loss on current task: outputs = prior_model(inputs) curr_empirical_loss = loss_criterion(outputs, targets) correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) task_empirical_loss += (1 / n_MC) * curr_empirical_loss # end Monte-Carlo loop sum_empirical_loss += task_empirical_loss # end loop over tasks in meta-batch avg_empirical_loss = (1 / n_tasks_in_mb) * sum_empirical_loss return avg_empirical_loss for i_epoch in range(prm.data_prior_train_epochs): run_train_epoch(i_epoch) print('Data_dependent_prior training epoch:{}'.format(i_epoch)) return prior_model
def run_meta_learning(data_loaders, prm, data_prior_model): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_train_tasks = len(data_loaders) # Create posterior models for each task: # posteriors_models = [get_model(prm) for _ in range(n_train_tasks)] # Create a 'dummy' model to generate the set of parameters of the shared prior: # prior_model = get_model(prm) prior_model = data_prior_model posteriors_models = [prior_model for _ in range(n_train_tasks)] # post_model.load_state_dict(prior_model.state_dict()) # posteriors_models = [posterior_model.load_state_dict(prior_model.state_dict()) for posterior_model in posteriors_models] # Gather all tasks posterior params: all_post_param = sum([ list(posterior_model.parameters()) for posterior_model in posteriors_models ], []) # Create optimizer for all parameters (posteriors + prior) prior_params = list(prior_model.parameters()) all_params = all_post_param + prior_params all_optimizer = optim_func(all_params, **optim_args) # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in data_loaders] n_batches_per_task = np.max(n_batch_list) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [ iter(data_loaders[ii]['train']) for ii in range(n_train_tasks) ] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start:( meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [ data_loaders[task_id] for task_id in task_ids_in_meta_batch ] mb_iterators = [ train_iterators[task_id] for task_id in task_ids_in_meta_batch ] mb_posteriors_models = [ posteriors_models[task_id] for task_id in task_ids_in_meta_batch ] # Get objective based on tasks in meta-batch: total_objective, info = get_objective( prior_model, prm, mb_data_loaders, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks) # Take gradient step with the shared prior and all tasks' posteriors: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 50 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] write_to_log( cmn.status_string(i_epoch, prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}' .format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp']), prm) data_objective.append(get_value(total_objective)) data_accuracy.append(batch_acc) data_emp_loss.append(info['avg_empirical_loss']) data_task_comp.append(info['avg_intra_task_comp']) data_meta_comp.append(info['meta_comp']) # write_to_log('Train Task {}, Test set: {} - Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n'.format( # i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm) # end meta-batches loop # end run_epoch() # ------------------------------------------------------------------------------------------- # Test evaluation function - # Evaluate the mean loss on samples from the test sets of the training tasks # -------------------------------------------------------------------------------------------- def run_test(): test_acc_avg = 0.0 n_tests = 0 for i_task in range(n_train_tasks): model = posteriors_models[i_task] test_loader = data_loaders[i_task]['test'] if len(test_loader) > 0: test_acc, test_loss = run_test_Bayes(model, test_loader, loss_criterion, prm) n_tests += 1 test_acc_avg += test_acc n_test_samples = len(test_loader.dataset) write_to_log( 'Train Task {}, Test set: {} - Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n' .format(i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm) else: print('Train Task {}, Test set: {} - No test data'.format( i_task, prm.test_type)) if n_tests > 0: test_acc_avg /= n_tests return test_acc_avg # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(prior_model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: data_objective = [] data_accuracy = [] data_emp_loss = [] data_task_comp = [] data_meta_comp = [] for i_epoch in range(prm.n_meta_train_epochs): run_train_epoch(i_epoch) stop_time = timeit.default_timer() with open( os.path.join(prm.result_dir, 'run_train_data_prior_bound_data.pkl'), 'wb') as f: pickle.dump( { 'data_objective': data_objective, "data_accuracy": data_accuracy, 'data_emp_loss': data_emp_loss, 'data_task_comp': data_task_comp, 'data_meta_comp': data_meta_comp }, f) # Test: test_acc_avg = run_test() # Update Log file: cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type) # Return learned prior: return prior_model
def run_meta_learning(prm, task_generator): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule n_iterations = prm.n_meta_train_iterations # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta): model = get_model(prm) model.train() # Create optimizer for meta-params (theta) meta_params = list(model.parameters()) meta_optimizer = optim_func(meta_params, **optim_args) meta_batch_size = prm.meta_batch_size # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_meta_iteration(i_iter): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with theta. # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch( prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_iter) # Print status: log_interval = 5 if (i_iter) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, n_iterations, 1, 1, batch_acc, total_objective.data[0])) # end run_meta_iteration() # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(model), prm) write_to_log('---- Meta-Training with infinite tasks...', prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_iter in range(n_iterations): run_meta_iteration(i_iter) stop_time = timeit.default_timer() # Update Log file: cmn.write_final_result(0.0, stop_time - start_time, prm) # Return learned meta-parameters: return model
def run_learning(data_loader, prm, prior_model=None, init_from_prior=True, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) train_loader = data_loader['train'] test_loader = data_loader['test'] n_batches = len(train_loader) n_train_samples = data_loader['n_train_samples'] # get model: if prior_model and init_from_prior: # init from prior model: post_model = deepcopy(prior_model) else: post_model = get_model(prm) # post_model.set_eps_std(0.0) # DEBUG: turn off randomness # Get optimizer: optimizer = optim_func(post_model.parameters(), **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # # Adjust randomness (eps_std) # if hasattr(prm, 'use_randomness_schedeule') and prm.use_randomness_schedeule: # if i_epoch > prm.randomness_full_epoch: # eps_std = 1.0 # elif i_epoch > prm.randomness_init_epoch: # eps_std = (i_epoch - prm.randomness_init_epoch) / (prm.randomness_full_epoch - prm.randomness_init_epoch) # else: # eps_std = 0.0 # turn off randomness # post_model.set_eps_std(eps_std) # post_model.set_eps_std(0.00) # debug complexity_term = 0 post_model.train() for batch_idx, batch_data in enumerate(train_loader): # Monte-Carlo iterations: empirical_loss = 0 n_MC = prm.n_MC for i_MC in range(n_MC): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # calculate objective: outputs = post_model(inputs) empirical_loss_c = loss_criterion(outputs, targets) empirical_loss += (1 / n_MC) * empirical_loss_c # complexity/prior term: if prior_model: empirical_loss, complexity_term = get_bayes_task_objective( prm, prior_model, post_model, n_train_samples, empirical_loss) else: complexity_term = 0.0 # Total objective: objective = empirical_loss + complexity_term # Take gradient step: grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 500 if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, objective.data[0]) + ' Loss: {:.4}\t Comp.: {:.4}'.format( get_value(empirical_loss), get_value(complexity_term))) # ------------------------------------------------------------------------------------------- # Main Script # ------------------------------------------------------------------------------------------- # Update Log file update_file = not verbose == 0 cmn.write_to_log(cmn.get_model_string(post_model), prm, update_file=update_file) cmn.write_to_log('Total number of steps: {}'.format(n_batches * prm.num_epochs), prm, update_file=update_file) cmn.write_to_log('Number of training samples: {}'.format( data_loader['n_train_samples']), prm, update_file=update_file) start_time = timeit.default_timer() # Run training epochs: for i_epoch in range(prm.num_epochs): run_train_epoch(i_epoch) # Test: test_acc, test_loss = run_test_Bayes(post_model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, result_name=prm.test_type) test_err = 1 - test_acc return test_err, post_model
def run_meta_iteration(i_iter, prior_model, task_generator, prm): # In each meta-iteration we draw a meta-batch of several tasks # Then we take a grad step with prior. # Unpack parameters: optim_func, optim_args, lr_schedule = \ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) meta_batch_size = prm.meta_batch_size n_inner_steps = prm.n_inner_steps n_meta_iterations = prm.n_meta_train_epochs # Generate the data sets of the training-tasks for meta-batch: mb_data_loaders = task_generator.create_meta_batch(prm, meta_batch_size, meta_split='meta_train') # For each task, prepare an iterator to generate training batches: mb_iterators = [ iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size) ] # The posteriors models will adjust to new tasks in eacxh meta-batch # Create posterior models for each task: posteriors_models = [get_model(prm) for _ in range(meta_batch_size)] init_from_prior = True if init_from_prior: for post_model in posteriors_models: post_model.load_state_dict(prior_model.state_dict()) # # Gather all tasks posterior params: # all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], []) # # # Create optimizer for all parameters (posteriors + prior) # prior_params = list(prior_model.parameters()) # all_params = all_post_param + prior_params # all_optimizer = optim_func(all_params, **optim_args) # # all_optimizer = optim_func(prior_params, **optim_args) ## DeBUG prior_params = filter(lambda p: p.requires_grad, prior_model.parameters()) prior_optimizer = optim_func(prior_params, optim_args) if prior_optimizer.param_groups[0]['params'][0].grad is None: object.grad_init(prm, prior_model, loss_criterion, iter(mb_data_loaders[0]['train']), mb_data_loaders[0]['train'], prior_optimizer) # all_params = all_post_param + prior_params all_posterior_optimizers = [ optim_func( filter(lambda p: p.requires_grad, posteriors_models[i].parameters()), optim_args) for i in range(meta_batch_size) ] test_acc_avg = 0.0 for i_inner_step in range(n_inner_steps): # Get objective based on tasks in meta-batch: # total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, # posteriors_models, loss_criterion, prm.n_train_tasks) task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval, mb_iterators, posteriors_models, loss_criterion, meta_batch_size, range(meta_batch_size)) # Take gradient step with the meta-parameters (theta) based on validation data: # grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_iter) prior_optimizer.zero_grad() for i_task in range(meta_batch_size): if isinstance(task_loss_list[i_task], int): continue grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm, mb_iterators[i_task], mb_data_loaders[i_task]['train'], lr_schedule, prm.lr, i_iter) # if i_meta_batch==n_meta_batches-1: prior_get_grad(prior_optimizer, all_posterior_optimizers[i_task]) # prior_grad_step(prior_model, prior_optimizer, all_posterior_optimizers, prm.meta_batch_size, prm, # lr_schedule, prm.prior_lr, i_epoch) prior_grad_step(prior_optimizer, prm.meta_batch_size, prm, prm.prior_lr_schedule, prm.prior_lr, i_iter) # Print status: log_interval = 1 # if (i_inner_step) % log_interval == 0: # batch_acc = info['correct_count'] / info['sample_count'] # print(cmn.status_string(i_iter, n_meta_iterations, i_inner_step, n_inner_steps, batch_acc, total_objective.data[0]) + # ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t'. # format(info['avg_empirical_loss'], info['avg_intra_task_comp'])) if (i_inner_step) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_iter, prm.n_meta_train_epochs, i_inner_step, n_inner_steps, batch_acc) + ' Empiric-Loss: {:.4f}'.format(info['avg_empirical_loss'])) # Print status = on test set of meta-batch: log_interval_eval = 1 if (i_iter) % log_interval_eval == 0: test_acc_avg = run_test(mb_data_loaders, posteriors_models, loss_criterion, prm) print('Meta-iter: {} \t Meta-Batch Test Acc: {:1.3}\t'.format( i_iter, test_acc_avg)) # End of inner steps return prior_model, posteriors_models, test_acc_avg
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create posterior model for the new task: post_model = get_model(prm) if init_from_prior: post_model.load_state_dict(prior_model.state_dict()) # prior_model_dict = prior_model.state_dict() # post_model_dict = post_model.state_dict() # # # filter out unnecessary keys: # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k} # # overwrite entries in the existing state dict: # post_model_dict.update(prior_model_dict) # # # # load the new state dict # post_model.load_state_dict(post_model_dict) # add_noise_to_model(post_model, prm.kappa_factor) # The data-sets of the new task: train_loader = task_data['train'] test_loader = task_data['test'] n_train_samples = len(train_loader.dataset) n_batches = len(train_loader) # Get optimizer: optimizer = optim_func(post_model.parameters(), **optim_args) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): log_interval = 500 post_model.train() for batch_idx, batch_data in enumerate(train_loader): correct_count = 0 sample_count = 0 # Monte-Carlo iterations: n_MC = prm.n_MC task_empirical_loss = 0 task_complexity = 0 for i_MC in range(n_MC): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate empirical loss: outputs = post_model(inputs) curr_empirical_loss = loss_criterion(outputs, targets) curr_empirical_loss, curr_complexity = get_bayes_task_objective( prm, prior_model, post_model, n_train_samples, curr_empirical_loss, noised_prior=False) task_empirical_loss += (1 / n_MC) * curr_empirical_loss task_complexity += (1 / n_MC) * curr_complexity correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) # Total objective: total_objective = task_empirical_loss + task_complexity # Take gradient step with the posterior: grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_count / sample_count print( cmn.status_string(i_epoch, prm.n_meta_test_epochs, batch_idx, n_batches, batch_acc, total_objective.item()) + ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}'.format( task_empirical_loss.item(), task_complexity.item())) data_objective.append(total_objective.item()) data_accuracy.append(batch_acc) data_emp_loss.append(task_empirical_loss.item()) data_task_comp.append(task_complexity.item()) return total_objective.item() # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.n_meta_test_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() data_objective = [] data_accuracy = [] data_emp_loss = [] data_task_comp = [] # Training loop: for i_epoch in range(prm.n_meta_test_epochs): test_bound = run_train_epoch(i_epoch) with open( os.path.join(prm.result_dir, 'run_test_data_prior_bound_data.pkl'), 'wb') as f: pickle.dump( { 'data_objective': data_objective, "data_accuracy": data_accuracy, 'data_emp_loss': data_emp_loss, 'data_task_comp': data_task_comp }, f) # Test: test_acc, test_loss = run_test_Bayes(post_model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, result_name=prm.test_type, verbose=verbose) test_err = 1 - test_acc return test_err, test_loss, test_bound, post_model
def run_learning(task_data, prior_model, prm, init_from_prior=True, verbose=1): # prm.optim_func, prm.optim_args = optim.EntropySGD, {'llr':0.01, 'lr':0.1, 'momentum':0.9, 'damp':0, 'weight_decay':1e-3, 'nesterov':True, # 'L':20, 'eps':1e-3, 'g0':1e-4, 'g1':1e-3} # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: # prm.optim_args['llr'] = 0.1 # prm.optim_args['L'] = 20 # # prm.optim_args['weight_decay'] = 1e-3 # # prm.optim_args['g1'] = 0 # prm.optim_args['g0'] = 1e-4 optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule_test # prm.optim_func, prm.optim_args = optim.Adam, {'lr': prm.lr} # 'weight_decay': 1e-4 # lr_schedule = {'decay_factor': 0.1, 'decay_epochs': [15, 20]} # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) # Create posterior model for the new task: post_model = get_model(prm) if init_from_prior: post_model.load_state_dict(prior_model.state_dict()) # prior_model_dict = prior_model.state_dict() # post_model_dict = post_model.state_dict() # # # filter out unnecessary keys: # prior_model_dict = {k: v for k, v in prior_model_dict.items() if '_log_var' in k or '_mu' in k} # # overwrite entries in the existing state dict: # post_model_dict.update(prior_model_dict) # # # # load the new state dict # post_model.load_state_dict(post_model_dict) # add_noise_to_model(post_model, prm.kappa_factor) # The data-sets of the new task: train_loader = task_data test_loader = task_data['test'] # n_train_samples = len(train_loader['train'].dataset) n_batches = len(train_loader) # Get optimizer: optimizer = optim_func( filter(lambda p: p.requires_grad, post_model.parameters()), optim_args) # optimizer = optim_func(filter(lambda p: p.requires_grad, post_model.parameters()), optim_args['lr']) # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # log_interval = 500 post_model.train() train_iterators = iter(train_loader['train']) for batch_idx, batch_data in enumerate(train_loader['train']): task_loss, info = get_objective(prior_model, prm, [train_loader], object.feval, [train_iterators], [post_model], loss_criterion, 1, [0]) grad_step(task_loss[0], post_model, loss_criterion, optimizer, prm, train_iterators, train_loader['train'], lr_schedule, prm.optim_args['lr'], i_epoch) # for log_var in post_model.parameters(): # if log_var.requires_grad is False: # log_var.data = log_var.data - (i_epoch + 1) * math.log(1 + prm.gamma1) # Print status: log_interval = 10 if (batch_idx) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_epoch, prm.n_meta_train_epochs, batch_idx, n_batches, batch_acc) + ' Empiric-Loss: {:.4f}'.format(info['avg_empirical_loss'])) # -----------------------------------------------------------------------------------------------------------# # Update Log file if verbose == 1: write_to_log( 'Total number of steps: {}'.format(n_batches * prm.n_meta_test_epochs), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.n_meta_test_epochs): run_train_epoch(i_epoch) # Test: test_acc, test_loss = run_test_Bayes(post_model, test_loader, loss_criterion, prm) stop_time = timeit.default_timer() cmn.write_final_result(test_acc, stop_time - start_time, prm, result_name=prm.test_type, verbose=verbose) test_err = 1 - test_acc return test_err, post_model
def run_meta_learning(data_loaders, prm): # ------------------------------------------------------------------------------------------- # Setting-up # ------------------------------------------------------------------------------------------- # Unpack parameters: optim_func, optim_args, lr_schedule =\ prm.optim_func, prm.optim_args, prm.lr_schedule # Loss criterion loss_criterion = get_loss_criterion(prm.loss_type) n_train_tasks = len(data_loaders) # Create a 'dummy' model to generate the set of parameters of the shared prior: prior_model = get_model(prm) # Create posterior models for each task: posteriors_models = [ transfer_weights(prior_model, get_model(prm)) for _ in range(n_train_tasks) ] # Gather all tasks posterior params: # all_post_param = sum([list(posterior_model.parameters()) for posterior_model in posteriors_models], []) # Create optimizer for all parameters (posteriors + prior) prior_params = filter(lambda p: p.requires_grad, prior_model.parameters()) prior_optimizer = optim_func(prior_params, optim_args) object.grad_init(prm, prior_model, loss_criterion, iter(data_loaders[0]['train']), data_loaders[0]['train'], prior_optimizer) # all_params = all_post_param + prior_params all_posterior_optimizers = [ optim_func( filter(lambda p: p.requires_grad, posteriors_models[i].parameters()), optim_args) for i in range(n_train_tasks) ] # number of sample-batches in each task: n_batch_list = [len(data_loader['train']) for data_loader in data_loaders] n_batches_per_task = np.max(n_batch_list) L = prm.optim_args['L'] # ------------------------------------------------------------------------------------------- # Training epoch function # ------------------------------------------------------------------------------------------- def run_train_epoch(i_epoch): # optim_args['L'] = L-5*i_epoch # For each task, prepare an iterator to generate training batches: train_iterators = [ iter(data_loaders[ii]['train']) for ii in range(n_train_tasks) ] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch # meta_batch_starts = list(range(0, len(task_order), n_train_tasks)) meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start:( meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [ data_loaders[task_id] for task_id in task_ids_in_meta_batch ] mb_iterators = [ train_iterators[task_id] for task_id in task_ids_in_meta_batch ] mb_posteriors_models = [ posteriors_models[task_id] for task_id in task_ids_in_meta_batch ] #task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval, # mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch) # Take gradient step with the shared prior and all tasks' posteriors: # for i_task in range(n_train_tasks): # prior_optimizer.zero_grad() #for i_task in range(n_train_tasks): # if isinstance(task_loss_list[i_task], int): # continue # grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm, # train_iterators[i_task], data_loaders[i_task]['train'], lr_schedule, prm.lr, i_epoch) #task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval, # mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch) # Take gradient step with the shared prior and all tasks' posteriors: # for i_task in range(n_train_tasks): # prior_optimizer.zero_grad() #for i_task in range(n_train_tasks): # if isinstance(task_loss_list[i_task], int): # continue # grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm, # train_iterators[i_task], data_loaders[i_task]['train'], lr_schedule, prm.lr, i_epoch) # Get objective based on tasks in meta-batch: task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch) # Take gradient step with the shared prior and all tasks' posteriors: # for i_task in range(n_train_tasks): prior_optimizer.zero_grad() for i_task in range(n_train_tasks): if isinstance(task_loss_list[i_task], int): continue grad_step(task_loss_list[i_task], posteriors_models[i_task], loss_criterion, all_posterior_optimizers[i_task], prm, train_iterators[i_task], data_loaders[i_task]['train'], lr_schedule, prm.lr, i_epoch) # if i_meta_batch==n_meta_batches-1: # if i_epoch == prm.n_meta_train_epochs-1: prior_get_grad(prior_optimizer, all_posterior_optimizers[i_task]) # task_loss_list, info = get_objective(prior_model, prm, mb_data_loaders, object.feval, # mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch) # prior_grad_step(prior_optimizer, prm.meta_batch_size, prm,prm.prior_lr_schedule, prm.prior_lr, i_epoch) prior_updates(prior_optimizer, n_train_tasks, prm) for post_model in posteriors_models: post_model.load_state_dict(prior_model.state_dict()) # Print status: log_interval = 10 if (i_meta_batch) % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print( cmn.status_string(i_epoch, prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc) + ' Empiric-Loss: {:.4f}'.format(info['avg_empirical_loss'])) # for i in range(20): # prior_grad_step(prior_optimizer, prm.meta_batch_size, prm, lr_schedule, prm.prior_lr, i_epoch) # end meta-batches loop # end run_epoch() # ------------------------------------------------------------------------------------------- # Test evaluation function - # Evaluate the mean loss on samples from the test sets of the training tasks # -------------------------------------------------------------------------------------------- def run_test(): test_acc_avg = 0.0 n_tests = 0 for i_task in range(n_train_tasks): model = posteriors_models[i_task] test_loader = data_loaders[i_task]['test'] if len(test_loader) > 0: test_acc, test_loss = run_test_Bayes(model, test_loader, loss_criterion, prm) n_tests += 1 test_acc_avg += test_acc n_test_samples = len(test_loader.dataset) write_to_log( 'Train Task {}, Test set: {} - Average loss: {:.4}, Accuracy: {:.3} (of {} samples)\n' .format(i_task, prm.test_type, test_loss, test_acc, n_test_samples), prm) else: print('Train Task {}, Test set: {} - No test data'.format( i_task, prm.test_type)) if n_tests > 0: test_acc_avg /= n_tests return test_acc_avg # -----------------------------------------------------------------------------------------------------------# # Main script # -----------------------------------------------------------------------------------------------------------# # Update Log file write_to_log(cmn.get_model_string(prior_model), prm) write_to_log('---- Meta-Training set: {0} tasks'.format(len(data_loaders)), prm) # ------------------------------------------------------------------------------------------- # Run epochs # ------------------------------------------------------------------------------------------- start_time = timeit.default_timer() # Training loop: for i_epoch in range(prm.n_meta_train_epochs): # if (i_epoch+1) % 50 == 0: # prm.lr = prm.lr/2 # for post_model in posteriors_models: # post_model.load_state_dict(prior_model.state_dict()) run_train_epoch(i_epoch) # for post_model in posteriors_models: # post_model.load_state_dict(prior_model.state_dict()) # prior_update(prior_optimizer,prm.meta_batch_size,prm) stop_time = timeit.default_timer() # Test: test_acc_avg = run_test() # Update Log file: cmn.write_final_result(test_acc_avg, stop_time - start_time, prm, result_name=prm.test_type) # Return learned prior: return prior_model