for key, val in step_vals.items(): checkpoint_vals[key].append(val) if (step % checkpoint_freq == 0) or (step == n_steps - 1): results = { 'step': step, 'epoch': step / steps_per_epoch, } for key, val in checkpoint_vals.items(): results[key] = np.mean(val) evals = zip(eval_loader_names, eval_loaders, eval_weights) for name, loader, weights in evals: acc = misc.accuracy(algorithm, loader, weights, device) results[name + '_acc'] = acc results_keys = sorted(results.keys()) if results_keys != last_results_keys: misc.print_row(results_keys, colwidth=12) last_results_keys = results_keys misc.print_row([results[key] for key in results_keys], colwidth=12) results.update({'hparams': hparams, 'args': vars(args)}) epochs_path = os.path.join(args.output_dir, 'results.jsonl') with open(epochs_path, 'a') as f: f.write(json.dumps(results, sort_keys=True) + "\n") algorithm_dict = algorithm.state_dict()
def MULDENS_accuracy(algorithm,eval_dict, test_envs,correct_models_selected_for_each_domain,device,acc_flags): compute_test_beta=acc_flags['compute_test_beta'] # setting this to false will give you ensemble ensemble_for_obs= acc_flags['ensemble_for_obs'] correct = 0 total = 0 weights_offset = 0 eval_loader_names= list(eval_dict.keys()) beta_all =[] test_env = test_envs[0] # obs_loader_insplit_names = ['env{}_in'.format(i) # for i in range(len(eval_loader_names)//2) if i not in test_envs] # obs_loader_outsplit_names= ['env{}_out'.format(i) # for i in range(len(eval_loader_names)//2) if i not in test_envs] # un_obs_insplit_name = ['env{}_in'.format(i) for i in test_envs] # un_obs_outsplit_name = ['env{}_out'.format(i) for i in test_envs] for network_i in algorithm.MULDENS_networks: network_i.eval() domains_selected_for_each_model= [[] for i in range(len(algorithm.MULDENS_networks))] model_domains = [] for model in range(len(algorithm.MULDENS_networks)): for i,ms in enumerate(correct_models_selected_for_each_domain): if ms is not np.nan: if ms == model: domains_selected_for_each_model[model].append(i) # for observed domains, we know what models to select. # So directly get the accuracies from corresponding model if ensemble_for_obs:# ensemble and individual for both observed and unobserved domains results ={} for i in range(len(eval_loader_names)//2): # for split in ['_in','_out']: for split in ['_out0']: name = 'env'+str(i)+split loader= eval_dict[name][0] weights= eval_dict[name][1] if i in test_envs: name = 'unobs_'+'env'+str(i)+'_in0' loader = eval_dict['env'+str(i)+'_in0'][0]# for test env we need 'in' not 'out weights= eval_dict['env'+str(i)+'_in0'][1] for m in range(len(algorithm.MULDENS_networks)): acc= accuracy(algorithm.MULDENS_networks[m],loader,weights,device) results[name+'_m_'+str(m)+'_acc'] = acc ensemble_result_dict= ensemble_accuracy(algorithm.MULDENS_networks,loader,weights,device) results[name+'_ens_acc']= ensemble_result_dict['acc'] results[name+'_preds_ens']= ensemble_result_dict['preds'] results[name+'_labels']= ensemble_result_dict['labels'] results[name+'_entropies'] = ensemble_result_dict['pred_entropies'] else: results={} eval_out_loader_names = [i for i in eval_loader_names if '_in' not in i] for i, name in enumerate(eval_loader_names): if (int(name[3]) not in test_envs): loader= eval_dict[name][0] weights= eval_dict[name][1] if '_in' in name: model_domain_name = 'env'+str(i)+'_out0' model_num_idx = eval_out_loader_names.index(model_domain_name) else: model_num_idx = eval_out_loader_names.index(name) model_num = int(correct_models_selected_for_each_domain[model_num_idx]) acc=accuracy(algorithm.MULDENS_networks[model_num],loader,weights,device) results[name+'_acc'] = acc # for unobserved domains we will pick top k models from beta and either do an ensemble or directly pick the best #model and return the accuracy #beta is a (num_testenvs X num_models) if compute_test_beta: beta = torch.zeros((len(test_envs), len(algorithm.MULDENS_networks))) for j, test_env in enumerate(test_envs): for i, domain_idx in enumerate(domains_selected_for_each_model): loaders = [] for domain in domain_idx: domain_name = eval_out_loader_names[domain] loaders.append(eval_dict[domain_name][0]) test_env_domain_name = 'env'+str(test_env)+'_out0' test_env_loader= eval_dict[test_env_domain_name][0] if len(domain_idx) != 0: beta[test_env,i] = MULDENS_beta_grads(loaders, test_env_loader, algorithm.MULDENS_networks[i], device) else: beta[test_env,i] = 0 for i,test_env in enumerate(test_envs): beta_test_env = beta[i,:] best_model_num = np.argmax(beta_test_env) for split in ['_in','_out']: name = 'env'+str(test_env)+split+str(0) loader= eval_dict[name][0] weights= eval_dict[name][1] acc=accuracy(algorithm.MULDENS_networks[best_model_num],loader,weights,device) results[name+'_acc'] = acc else: """ if we dont want to compute betas we want to get results using all the models and also an ensemble of them """ for i,test_env in enumerate(test_envs): for split in ['_in','_out']: name = 'env'+str(test_env)+split+str(0) loader= eval_dict[name][0] weights= eval_dict[name][1] for m in range(len(algorithm.MULDENS_networks)): acc= accuracy(algorithm.MULDENS_networks[m],loader,weights,device) results[name+'_m_'+str(m)+'_acc'] = acc ensemble_results= ensemble_accuracy(algorithm.MULDENS_networks,loader,weights,device) results[name+'_ens_acc']= ensemble_results['acc'] results[name+'_preds_models']= ensemble_results['preds'] results[name+'_labels']= ensemble_results['labels'] return results