def run_train_epoch(i_epoch): # # Adjust randomness (eps_std) # if hasattr(prm, 'use_randomness_schedeule') and prm.use_randomness_schedeule: # if i_epoch > prm.randomness_full_epoch: # eps_std = 1.0 # elif i_epoch > prm.randomness_init_epoch: # eps_std = (i_epoch - prm.randomness_init_epoch) / (prm.randomness_full_epoch - prm.randomness_init_epoch) # else: # eps_std = 0.0 # turn off randomness # post_model.set_eps_std(eps_std) # post_model.set_eps_std(0.00) # debug complexity_term = 0 post_model.train() for batch_idx, batch_data in enumerate(train_loader): # Monte-Carlo iterations: empirical_loss = 0 n_MC = prm.n_MC for i_MC in range(n_MC): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # calculate objective: outputs = post_model(inputs) empirical_loss_c = loss_criterion(outputs, targets) empirical_loss += (1 / n_MC) * empirical_loss_c # complexity/prior term: if prior_model: empirical_loss, complexity_term = get_bayes_task_objective( prm, prior_model, post_model, n_train_samples, empirical_loss) else: complexity_term = 0.0 # Total objective: objective = empirical_loss + complexity_term # Take gradient step: grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 500 if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, objective.data[0]) + ' Loss: {:.4}\t Comp.: {:.4}'.format( get_value(empirical_loss), get_value(complexity_term)))
def get_params_statistics(param_list): n_list = len(param_list) mean_list = np.zeros(n_list) std_list = np.zeros(n_list) for i_param, named_param in enumerate(param_list): param_name = named_param[0] param_vals = named_param[1] param_mean = get_value(param_vals.mean()) param_std = get_value(param_vals.std()) mean_list[i_param] = param_mean std_list[i_param] = param_std print('Parameter name: {}, mean value: {:.3}, STD: {:.3}'.format( param_name, param_mean, param_std)) return mean_list, std_list
def run_test_max_posterior(model, test_loader, loss_criterion, prm): n_test_samples = len(test_loader.dataset) model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) old_eps_std = model.set_eps_std(0.0) # test with max-posterior outputs = model(inputs) model.set_eps_std(old_eps_std) # return model to normal behaviour test_loss += loss_criterion(outputs, targets) # sum the mean loss in batch n_correct += count_correct(outputs, targets) test_loss /= n_test_samples test_acc = n_correct / n_test_samples info = { 'test_acc': test_acc, 'n_correct': n_correct, 'test_type': 'max_posterior', 'n_test_samples': n_test_samples, 'test_loss': get_value(test_loss) } return info
def get_objective(prior_model, prm, mb_data_loaders, feval, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks, task_ids_in_meta_batch): ''' Calculate objective based on tasks in meta-batch ''' # note: it is OK if some tasks appear several times in the meta-batch n_tasks_in_mb = len(mb_data_loaders) sum_empirical_loss = 0 correct_count = 0 sample_count = 0 task_loss_list = [] for i in range(n_train_tasks): task_loss_list.append(0) # all_task_loss = 0 # ----------- loop over tasks in meta-batch -----------------------------------# for i_task in range(n_tasks_in_mb): # n_samples = mb_data_loaders[i_task]['n_train_samples'] # The posterior model corresponding to the task in the batch: post_model = mb_posteriors_models[i_task] post_model.train() loss, cor_count, sum_count = feval(prm, post_model, loss_criterion, mb_iterators[i_task], mb_data_loaders[i_task]['train']) correct_count += cor_count sample_count += sum_count # Intra-task complexity of current task: task_loss_list[task_ids_in_meta_batch[i_task]] += loss # task_empirical_loss += loss sum_empirical_loss += loss # end loop over tasks in meta-batch avg_empirical_loss = (1 / n_tasks_in_mb) * sum_empirical_loss info = { 'sample_count': get_value(sample_count), 'correct_count': get_value(correct_count), 'avg_empirical_loss': get_value(avg_empirical_loss) } return task_loss_list, info
def run_test(model, test_loader, loss_criterion, prm): model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) outputs = model(inputs) test_loss += loss_criterion(outputs, targets) # sum the mean loss in batch n_correct += count_correct(outputs, targets) n_test_samples = len(test_loader.dataset) n_test_batches = len(test_loader) test_loss = get_value(test_loss) / n_test_batches test_acc = n_correct / n_test_samples print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'.format( test_loss, test_acc, n_correct, n_test_samples)) return test_acc
def run_test_majority_vote(model, test_loader, loss_criterion, prm, n_votes=9): # n_test_samples = len(test_loader.dataset) n_test_batches = len(test_loader) model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) batch_size = inputs.shape[ 0] # min(prm.test_batch_size, n_test_samples) info = data_gen.get_info(prm) n_labels = info['n_classes'] votes = cmn.zeros_gpu((batch_size, n_labels)) for i_vote in range(n_votes): outputs = model(inputs) test_loss += loss_criterion(outputs, targets) pred = outputs.data.max( 1, keepdim=True)[1] # get the index of the max output for i_sample in range(batch_size): pred_val = pred[i_sample].cpu().numpy()[0] votes[i_sample, pred_val] += 1 majority_pred = votes.max( 1, keepdim=True)[1] # find argmax class for each sample n_correct += majority_pred.eq( targets.data.view_as(majority_pred)).cpu().sum() test_loss /= n_test_samples test_acc = n_correct / n_test_samples info = { 'test_acc': test_acc, 'n_correct': n_correct, 'test_type': 'majority_vote', 'n_test_samples': n_test_samples, 'test_loss': get_value(test_loss) } return info
def run_train_epoch(i_epoch): log_interval = 500 model.train() for batch_idx, batch_data in enumerate(train_loader): # get batch: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Calculate loss: outputs = model(inputs) loss = loss_criterion(outputs, targets) # Take gradient step: grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch) # Print status: if batch_idx % log_interval == 0: batch_acc = correct_rate(outputs, targets) print( cmn.status_string(i_epoch, prm.num_epochs, batch_idx, n_batches, batch_acc, get_value(loss)))
def run_test_avg_vote(model, test_loader, loss_criterion, prm, n_votes=5): n_test_samples = len(test_loader.dataset) n_test_batches = len(test_loader) model.eval() test_loss = 0 n_correct = 0 for batch_data in test_loader: inputs, targets = data_gen.get_batch_vars(batch_data, prm, is_test=True) batch_size = min(prm.test_batch_size, n_test_samples) info = data_gen.get_info(prm) n_labels = info['n_classes'] votes = cmn.zeros_gpu((batch_size, n_labels)) for i_vote in range(n_votes): outputs = model(inputs) test_loss += loss_criterion(outputs, targets) votes += outputs.data majority_pred = votes.max(1, keepdim=True)[1] n_correct += majority_pred.eq( targets.data.view_as(majority_pred)).cpu().sum() test_loss /= n_test_samples test_acc = n_correct / n_test_samples info = { 'test_acc': test_acc, 'n_correct': n_correct, 'test_type': 'AvgVote', 'n_test_samples': n_test_samples, 'test_loss': get_value(test_loss) } return info
def get_objective(prior_model, prm, mb_data_loaders, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks): ''' Calculate objective based on tasks in meta-batch ''' # note: it is OK if some tasks appear several times in the meta-batch n_tasks_in_mb = len(mb_data_loaders) sum_empirical_loss = 0 sum_intra_task_comp = 0 correct_count = 0 sample_count = 0 #set_trace() # KLD between hyper-posterior and hyper-prior: hyper_kl = (1 / (2 * prm.kappa_prior**2)) * net_norm( prior_model, p=2) #net_norm is L2-regularization # Hyper-prior term: meta_complex_term = get_meta_complexity_term(hyper_kl, prm, n_train_tasks) sum_w_kld = 0.0 sum_b_kld = 0.0 # ----------- loop over tasks in meta-batch -----------------------------------# for i_task in range(n_tasks_in_mb): n_samples = mb_data_loaders[i_task]['n_train_samples'] # get sample-batch data from current task to calculate the empirical loss estimate: batch_data = data_gen.get_next_batch_cyclic( mb_iterators[i_task], mb_data_loaders[i_task]['train']) # The posterior model corresponding to the task in the batch: post_model = mb_posteriors_models[i_task] post_model.train() # Monte-Carlo iterations: n_MC = prm.n_MC task_empirical_loss = 0 task_complexity = 0 # ----------- Monte-Carlo loop -----------------------------------# for i_MC in range(n_MC): # get batch variables: inputs, targets = data_gen.get_batch_vars(batch_data, prm) # Debug # print(targets[0].data[0]) # print first image label # import matplotlib.pyplot as plt # plt.imshow(inputs[0].cpu().data[0].numpy()) # show first image # plt.show() # Empirical Loss on current task: outputs = post_model(inputs) curr_empirical_loss = loss_criterion(outputs, targets) correct_count += count_correct(outputs, targets) sample_count += inputs.size(0) # Intra-task complexity of current task: curr_empirical_loss, curr_complexity, task_info = get_bayes_task_objective( prm, prior_model, post_model, n_samples, curr_empirical_loss, hyper_kl, n_train_tasks=n_train_tasks) sum_w_kld += task_info["w_kld"] sum_b_kld += task_info["b_kld"] task_empirical_loss += (1 / n_MC) * curr_empirical_loss task_complexity += (1 / n_MC) * curr_complexity # end Monte-Carlo loop sum_empirical_loss += task_empirical_loss sum_intra_task_comp += task_complexity # end loop over tasks in meta-batch avg_empirical_loss = (1 / n_tasks_in_mb) * sum_empirical_loss avg_intra_task_comp = (1 / n_tasks_in_mb) * sum_intra_task_comp avg_w_kld += (1 / n_tasks_in_mb) * sum_w_kld avg_b_kld += (1 / n_tasks_in_mb) * sum_b_kld # Approximated total objective: total_objective = avg_empirical_loss + prm.task_complex_w * avg_intra_task_comp + prm.meta_complex_w * meta_complex_term info = { 'sample_count': get_value(sample_count), 'correct_count': get_value(correct_count), 'avg_empirical_loss': get_value(avg_empirical_loss), 'avg_intra_task_comp': get_value(avg_intra_task_comp), 'meta_comp': get_value(meta_complex_term), 'w_kld': avg_w_kld, 'b_kld': avg_b_kld } return total_objective, info
def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)] # The task order to take batches from: task_order = [] task_ids_list = list(range(n_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) # ----------- meta-batches loop (batches of tasks) -----------------------------------# for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] n_tasks_in_batch = len(task_ids_in_meta_batch) # it may be less than prm.meta_batch_size at the last one # note: it is OK if some task appear several times in the meta-batch mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] # Get objective based on tasks in meta-batch: total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion) # Take gradient step with the meta-parameters (theta) based on validation data: grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch) # Print status: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)))
def run_train_epoch(i_epoch): # For each task, prepare an iterator to generate training batches: train_iterators = [iter(data_loaders[ii]['train']) for ii in range(n_train_tasks)] # The task order to take batches from: # The meta-batch will be balanced - i.e, each task will appear roughly the same number of times # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch task_order = [] task_ids_list = list(range(n_train_tasks)) for i_batch in range(n_batches_per_task): random.shuffle(task_ids_list) task_order += task_ids_list # Note: this method ensures each training sample in each task is drawn in each epoch. # If all the tasks have the same number of sample, then each sample is drawn exactly once in an epoch. # ----------- meta-batches loop (batches of tasks) -----------------------------------# # each meta-batch includes several tasks # we take a grad step with theta after each meta-batch meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size)) n_meta_batches = len(meta_batch_starts) for i_meta_batch in range(n_meta_batches): meta_batch_start = meta_batch_starts[i_meta_batch] task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)] # meta-batch size may be less than prm.meta_batch_size at the last one # note: it is OK if some tasks appear several times in the meta-batch mb_data_loaders = [data_loaders[task_id] for task_id in task_ids_in_meta_batch] mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch] mb_posteriors_models = [posteriors_models[task_id] for task_id in task_ids_in_meta_batch] # Get objective based on tasks in meta-batch: total_objective, info = get_objective(prior_model, prm, mb_data_loaders, mb_iterators, mb_posteriors_models, loss_criterion, n_train_tasks) # Take gradient step with the shared prior and all tasks' posteriors: grad_step(total_objective, all_optimizer, lr_schedule, prm.lr, i_epoch) # Print training status of current batch: log_interval = 200 if i_meta_batch % log_interval == 0: batch_acc = info['correct_count'] / info['sample_count'] write_to_log(cmn.status_string(i_epoch, prm.n_meta_train_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)) + ' Empiric-Loss: {:.4}\t Task-Comp. {:.4}\t Meta-Comp.: {:.4}, w_kld : {:.4}, b_kld : {:.4}'. format(info['avg_empirical_loss'], info['avg_intra_task_comp'], info['meta_comp'], info['w_kld'], info['b_kld']), prm)
def learn(data_set, complexity_type): n_tasks = len(data_set) n_dim = data_set[0].shape[1] n_samples_list = [task_data.shape[0] for task_data in data_set] from Utils import config if config.USE_GPU: # Define prior: w_P_mu = Variable(torch.randn(n_dim).cuda(), requires_grad=True) w_P_log_sigma = Variable(torch.randn(n_dim).cuda(), requires_grad=True) # Init posteriors: w_mu = Variable(torch.randn(n_tasks, n_dim).cuda(), requires_grad=True) w_log_sigma = Variable(torch.randn(n_tasks, n_dim).cuda(), requires_grad=True) else: # Define prior: w_P_mu = Variable(torch.randn(n_dim), requires_grad=True) w_P_log_sigma = Variable(torch.randn(n_dim), requires_grad=True) # Init posteriors: w_mu = Variable(torch.randn(n_tasks, n_dim), requires_grad=True) w_log_sigma = Variable(torch.randn(n_tasks, n_dim), requires_grad=True) learning_rate = 1e-1 # create your optimizer optimizer = optim.Adam([w_mu, w_log_sigma, w_P_mu, w_P_log_sigma], lr=learning_rate) n_epochs = 800 batch_size = 128 for i_epoch in range(n_epochs): # Sample data batch: b_task = np.random.randint(0, n_tasks) # sample a random task index batch_size_curr = min(n_samples_list[b_task], batch_size) batch_inds = np.random.choice(n_samples_list[b_task], batch_size_curr, replace=False) task_data = torch.from_numpy(data_set[b_task][batch_inds]) from Utils import config if config.USE_GPU: task_data = Variable(task_data.cuda(), requires_grad=False) # Re-Parametrization: w_sigma = torch.exp(w_log_sigma[b_task]) epsilon = Variable(torch.randn(n_dim).cuda(), requires_grad=False) w = w_mu[b_task] + w_sigma * epsilon else: task_data = Variable(task_data, requires_grad=False) # Re-Parametrization: w_sigma = torch.exp(w_log_sigma[b_task]) epsilon = Variable(torch.randn(n_dim), requires_grad=False) w = w_mu[b_task] + w_sigma * epsilon # Empirical Loss: empirical_loss = (w - task_data).pow(2).mean() # Complexity terms: sigma_sqr_prior = torch.exp(2 * w_P_log_sigma) complex_term_sum = 0 for i_task in range(n_tasks): sigma_sqr_post = torch.exp(2 * w_log_sigma[i_task]) kl_dist = torch.sum(w_P_log_sigma - w_log_sigma[i_task] + ( (w_mu[i_task] - w_P_mu).pow(2) + sigma_sqr_post) / (2 * sigma_sqr_prior) - 0.5) n_samples = n_samples_list[i_task] if complexity_type == 'PAC_Bayes_McAllaster': delta = 0.95 complex_term_sum += torch.sqrt( (1 / (2 * n_samples)) * (kl_dist + np.log(2 * np.sqrt(n_samples) / delta))) elif complexity_type == 'Variational_Bayes': complex_term_sum += (1 / n_samples) * kl_dist elif complexity_type == 'KL': complex_term_sum += kl_dist else: raise ValueError('Invalid complexity_type') hyper_prior_factor = 1e-6 * np.sqrt(1 / n_tasks) hyper_prior = torch.sum(sigma_sqr_prior + w_P_mu.pow(2)) * hyper_prior_factor # Total objective: complex_term = (1 / n_tasks) * complex_term_sum objective = empirical_loss + complex_term + hyper_prior # Gradient step: optimizer.zero_grad() # zero the gradient buffers objective.backward() optimizer.step() # Does the update if i_epoch % 100 == 0: print('Step: {0}, objective: {1}'.format(i_epoch, get_value(objective))) # Switch back to numpy: w_mu = w_mu.data.cpu().numpy() w_log_sigma = w_log_sigma.data.cpu().numpy() w_sigma = np.exp(w_log_sigma) w_P_mu = w_P_mu.data.cpu().numpy() w_P_log_sigma = w_P_log_sigma.data.cpu().numpy() w_P_sigma = np.exp(w_P_log_sigma) # Plots: fig1 = plt.figure() ax = plt.subplot(111, aspect='equal') # plot prior: plt.plot(w_P_mu[0], w_P_mu[1], 'o', label='prior mean ') ell = Ellipse(xy=(w_P_mu[0], w_P_mu[1]), width=w_P_sigma[0], height=w_P_sigma[1], angle=0, color='blue') ell.set_facecolor('none') ax.add_artist(ell) for i_task in range(n_tasks): # plot task data points: plt.plot(data_set[i_task][:, 0], data_set[i_task][:, 1], '.', label='Task {0} samples'.format(1 + i_task)) # plot posterior: plt.plot(w_mu[i_task][0], w_mu[i_task][1], 'o', label='posterior {0} mean'.format(1 + i_task)) ell = Ellipse(xy=(w_mu[i_task][0], w_mu[i_task][1]), width=w_sigma[i_task][0], height=w_sigma[i_task][1], angle=0, color='black') ell.set_facecolor('none') ax.add_artist(ell) # plt.plot(0, 0, 'x', label='hyper-prior ') # plt.legend(loc='upper left') plt.legend() plt.xlabel('Dimension 1') plt.ylabel('Dimension 2')