def tune_parameters(self, valid_loader, out_valid_loader, num_samples=1000): self.regressor = None num_train = num_samples // 2 M_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] best_tnr = -np.inf best_m, best_regressor = None, None for magnitude in M_list: self.magnitude = magnitude X_in = detect_utils.get_scores(self, valid_loader, num_samples) X_out = detect_utils.get_scores(self, out_valid_loader, num_samples) X_train = np.concatenate((X_in[:num_train], X_out[:num_train]), axis=0) Y_train = np.concatenate((np.zeros(num_train), np.ones(num_train))) regressor = sklearn.linear_model.LogisticRegressionCV(n_jobs=-1)\ .fit(X_train, Y_train) X_valid_in = regressor.predict_proba( X_in[num_train:num_samples])[:, 1] X_valid_out = regressor.predict_proba( X_out[num_train:num_samples])[:, 1] test_results = callog.metric(-X_valid_in, -X_valid_out) tnr = test_results['TNR'] if tnr > best_tnr: best_tnr = tnr best_m, best_regressor = magnitude, regressor self.magnitude, self.regressor = best_m, best_regressor
def detection_performance(regressor, X, Y, outf): """ Measure the detection performance return: detection metrics """ num_samples = X.shape[0] l1 = open('%s/confidence_TMP_In.txt' % outf, 'w') l2 = open('%s/confidence_TMP_Out.txt' % outf, 'w') # y_pred = regressor.predict_proba(X)[:, 1] ## predict_proba for class =1 --> probability for out of distribution samples y_pred = 1 - np.exp(regressor.score_samples(np.log( (-1) * X))) ## 1- prob(belonging to the gmm distribution) # y_pred =1- regressor.score_samples(np.log((-1)*X)) ## 1- prob(belonging to the gmm distribution) # samples = np.log(-1*X) # log_proba = [] # for i in range(len(regressor)): # gmm_obj = regressor[i] # class_samples = samples[:,i::100] # log_proba.append(gmm_obj.score_samples(class_samples)) # # log_proba = np.asarray(log_proba) # y_pred = 1 - np.exp(logsumexp(log_proba, axis = 0)) for i in range(num_samples): if Y[i] == 0: l1.write("{}\n".format(-y_pred[i])) else: l2.write("{}\n".format(-y_pred[i])) l1.close() l2.close() results = callog.metric(outf, ['TMP']) return results
def detection_performance(regressor, X, Y, outf): """ Measure the detection performance return: detection metrics """ num_samples = X.shape[0] l1 = open('%s/confidence_TMP_In.txt' % outf, 'w') l2 = open('%s/confidence_TMP_Out.txt' % outf, 'w') y_pred = regressor.predict_proba(X)[:, 1] novel_count = 0 known_count = 0 for i in range(num_samples): if Y[i] == 0: known_count += 1 l1.write("{}\n".format(-y_pred[i])) else: novel_count += 1 l2.write("{}\n".format(-y_pred[i])) l1.close() l2.close() results = callog.metric(outf, ['TMP']) return results
def test(testset, testlabel, model, outf): total = 0 output = [] with torch.no_grad(): for data_index in range( int(np.floor(testset.size(0) / args.batch_size))): inputs = testset[total:total + args.batch_size].cuda() targets = testlabel[total:total + args.batch_size].cuda() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = model(inputs - vae(inputs)) outputs = torch.sigmoid(outputs).squeeze(dim=1) output.append(outputs) total += args.batch_size output = torch.cat(output) num_samples = output.shape[0] l1 = open('%s/confidence_TMP_In.txt' % outf, 'w') l2 = open('%s/confidence_TMP_Out.txt' % outf, 'w') for i in range(num_samples): if testlabel[i] == 0: l1.write("{}\n".format(-output[i])) else: l2.write("{}\n".format(-output[i])) l1.close() l2.close() results = callog.metric(outf, ['TMP']) mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['TMP']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['TMP']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['TMP']['AUOUT']), end='') wandb.log({'TNR': 100. * results['TMP']['TNR']}) wandb.log({'AUROC': 100. * results['TMP']['AUROC']}) wandb.log({'DTACC': 100. * results['TMP']['DTACC']}) wandb.log({'AUIN': 100. * results['TMP']['AUIN']}) wandb.log({'AUOUT': 100. * results['TMP']['AUOUT']})
def detection_performance(regressor, X, Y, outf): """ Measure the detection performance return: detection metrics """ num_samples = X.shape[0] l1 = open("%s/confidence_TMP_In.txt" % outf, "w") l2 = open("%s/confidence_TMP_Out.txt" % outf, "w") y_pred = regressor.predict_proba(X)[:, 1] for i in range(num_samples): if Y[i] == 0: l1.write("{}\n".format(-y_pred[i])) else: l2.write("{}\n".format(-y_pred[i])) l1.close() l2.close() results = callog.metric(outf, ["TMP"]) return results
def detection_performance(y_pred, Y, outf): """ Measure the detection performance return: detection metrics """ y_pred = y_pred.detach().cpu().numpy().astype(np.float64)[:, 1] Y = Y.detach().cpu().numpy().astype(np.float64) num_samples = Y.shape[0] l1 = open('%s/confidence_TMP_In.txt' % outf, 'w') l2 = open('%s/confidence_TMP_Out.txt' % outf, 'w') for i in range(num_samples): if Y[i] == 0: l1.write("{}\n".format(-y_pred[i])) else: l2.write("{}\n".format(-y_pred[i])) l1.close() l2.close() results = callog.metric(outf, ['TMP']) return results
def get_auroc(output, target_var, SaveDir): Bioutput = torch.zeros([output.shape[0], 2]) Bioutput[:, 0] = torch.max(output[:, 0:2], 1)[0] Bioutput[:, 1] = output[:, 2] target_var[np.nonzero(target_var.cpu().numpy() == 1)] = 0 target_var[np.nonzero(target_var.cpu().numpy() == 2)] = 1 Bioutput = torch.nn.Softmax(dim=1)(Bioutput) y_pred = Bioutput.detach().cpu().numpy().astype(np.float64)[:, 1] Y = target_var.detach().cpu().numpy().astype(np.float64) num_samples = Y.shape[0] l1 = open('%s/confidence_TMP_In.txt' % SaveDir, 'a') l2 = open('%s/confidence_TMP_Out.txt' % SaveDir, 'a') for i in range(num_samples): if Y[i] == 0: l1.write("{}\n".format(-y_pred[i])) else: l2.write("{}\n".format(-y_pred[i])) l1.flush() l2.flush() results = callog.metric(SaveDir, ['TMP']) return results
def tune_parameters(self, valid_loader, out_valid_loader, num_samples=1000): M_list = [ 0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2 ] T_list = [1, 10, 100, 1000] best_tnr = -np.inf best_m, best_t = None, None for magnitude, temperature in product(M_list, T_list): self.magnitude, self.temperature = magnitude, temperature X_in = detect_utils.get_scores(self, valid_loader, num_samples) X_out = detect_utils.get_scores(self, out_valid_loader, num_samples) test_results = callog.metric(-X_in, -X_out) tnr = test_results['TNR'] if tnr > best_tnr: best_tnr = tnr best_m, best_t = magnitude, temperature self.magnitude, self.temperature = best_m, best_t
print('\n Final Accuracy: {}/{} ({:.2f}%)\n'.format( correct, total, 100. * correct / total)) def generate_non_target(): model.eval() total = 0 f2 = open('%s/confidence_Base_Out.txt' % args.outf, 'w') for data, target in nt_test_loader: total += data.size(0) if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) batch_output = model(data) for i in range(data.size(0)): # confidence score: max_y p(y|x) output = batch_output[i].view(1, -1) soft_out = F.softmax(output) soft_out = torch.max(soft_out.data) f2.write("{}\n".format(soft_out)) print('generate log from in-distribution data') generate_target() print('generate log from out-of-distribution data') generate_non_target() print('calculate metrics') callog.metric(args.outf)
# pass # else : f2.write("{}\n".format(soft_out)) # f2.write("{}\n".format(F.sigmoid(batch_output[i,num_classes]).item())) # if soft_out > 0.9 : f6.write("{}\n".format(pred[i])) # f8.write("{}\n".format(F.sigmoid(batch_output[i,num_classes]).item())) f12.write("{}".format(bags.clone().cpu().data.detach().numpy())) if args.out_dataset == 'cifar100': if args.mode == 'sigmoid': mode = 'bce' elif args.mode == 'softmax': mode = 'ce' write_output(batch_output, target, args.outf, 0, num_classes=100, mode=mode, cifar100=True) print('generate log from in-distribution data') acc = generate_target() print('generate log from out-of-distribution data') generate_non_target() print('calculate metrics') callog.metric(args.outf, acc.cpu().numpy())
def test_ODIN(model, test_loader, out_test_loader, net_type, args): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') outf = "./output/" + str(args.gpu) # set the path to pre-trained model and output pre_trained_net = './pre_trained/dog/' + net_type + '.pth.tar' save_fig = "./save_fig/dog" outf = outf + net_type Path(outf).mkdir(exist_ok=True, parents=True) Path(save_fig).mkdir(exist_ok=True, parents=True) torch.cuda.manual_seed(0) mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] # check the in-distribution dataset model = model.to(device).eval() # measure the performance M_list = [0.001, 0.0014, 0.002] T_list = [100] # M_list = [0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2] # T_list = [1, 10, 100, 1000] base_line_list = [] ODIN_best_tnr = [0] #### OOD dataset이 여러개면 [0,0,0]... ODIN_best_results = [0] ODIN_best_temperature = [-1] ODIN_best_magnitude = [-1] f = open(os.path.join(save_fig, "save_result.txt"), "w") init_txt = "" for mtype in mtypes: init_txt += ' {mtype:6s}'.format(mtype=mtype) f.write(init_txt) lib_generation.get_posterior(model, net_type, test_loader, 0, 1, outf, True) lib_generation.get_posterior(model, net_type, out_test_loader, 0, 1, outf, False) ODIN_results = callog.metric(outf, save_fig, 1, 0, ['PoT']) result = "\n{:6.2f}{:6.2f}{:6.2f}{:6.2f}{:6.2f}".format( 100. * ODIN_results["PoT"]["TNR"], 100. * ODIN_results["PoT"]["AUROC"], 100. * ODIN_results["PoT"]["DTACC"], 100. * ODIN_results["PoT"]["AUIN"], 100. * ODIN_results["PoT"]["AUOUT"], ) f.write(result) out_count = 0 ODIN_best_results[out_count] = ODIN_results ODIN_best_tnr[out_count] = ODIN_results['PoT']['TNR'] ODIN_best_temperature[out_count] = 1 ODIN_best_magnitude[out_count] = 0 for T in T_list: for m in M_list: magnitude = m temperature = T lib_generation.get_posterior(model, net_type, test_loader, magnitude, temperature, outf, True) print('Temperature: ' + str(temperature) + ' / noise: ' + str(magnitude)) # print('Out-distribution: ' + "OOD") lib_generation.get_posterior(model, net_type, out_test_loader, magnitude, temperature, outf, False) if temperature == 1 and magnitude == 0: test_results = callog.metric(outf, save_fig, stypes=['PoT']) base_line_list.append(test_results) else: ODIN_results = callog.metric(outf, save_fig, temperature, magnitude, ['PoT']) result = "\n{:6.2f}{:6.2f}{:6.2f}{:6.2f}{:6.2f}".format( 100. * ODIN_results["PoT"]["TNR"], 100. * ODIN_results["PoT"]["AUROC"], 100. * ODIN_results["PoT"]["DTACC"], 100. * ODIN_results["PoT"]["AUIN"], 100. * ODIN_results["PoT"]["AUOUT"], ) f.write(result) if ODIN_best_tnr[out_count] < ODIN_results['PoT']['TNR']: ODIN_best_results[out_count] = ODIN_results ODIN_best_tnr[out_count] = ODIN_results['PoT']['TNR'] ODIN_best_temperature[out_count] = temperature ODIN_best_magnitude[out_count] = magnitude f.close() # print the results # print('Baseline method: in_distribution: ') # count_out = 0 # for results in base_line_list: # for mtype in mtypes: # print(' {mtype:6s}'.format(mtype=mtype), end='') # print('\n{val:6.2f}'.format(val=100.*results['PoT']['TNR']), end='') # print(' {val:6.2f}'.format(val=100.*results['PoT']['AUROC']), end='') # print(' {val:6.2f}'.format(val=100.*results['PoT']['DTACC']), end='') # print(' {val:6.2f}'.format(val=100.*results['PoT']['AUIN']), end='') # print(' {val:6.2f}\n'.format(val=100.*results['PoT']['AUOUT']), end='') # print('') # count_out += 1 print('ODIN method: in_distribution: ') count_out = 0 for results in ODIN_best_results: for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUROC']), end='') # print(' {val:6.2f}'.format(val=100.*results['PoT']['DTACC']), end='') # print(' {val:6.2f}'.format(val=100.*results['PoT']['AUIN']), end='') # print(' {val:6.2f}\n'.format(val=100.*results['PoT']['AUOUT']), end='') # print('temperature: ' + str(ODIN_best_temperature[count_out])) # print('magnitude: '+ str(ODIN_best_magnitude[count_out])) print('') count_out += 1 best_TNR = results['PoT']['TNR'] best_AUROC = results['PoT']['AUROC'] return best_TNR, best_AUROC # if __name__ == '__main__': # main()
def main(): # set the path to pre-trained model and output args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) out_dist_list = [ 'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct' ] # load networks if args.net_type == 'densenet_121': model = densenet_121.Net(models.densenet121(pretrained=False), 8) ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() elif args.net_type == 'mobilenet': model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8) ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'resnet_50': model = resnet_50.Net(models.resnet50(pretrained=False), 8) ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'vgg_16': model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8) ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") else: raise Exception(f"There is no net_type={args.net_type} available.") in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot) # measure the performance M_list = [ 0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2 ] T_list = [1, 10, 100, 1000] base_line_list = [] ODIN_best_tnr = [0, 0, 0] * 2 ODIN_best_results = [0, 0, 0] * 2 ODIN_best_temperature = [-1, -1, -1] * 2 ODIN_best_magnitude = [-1, -1, -1] * 2 for T in T_list: for m in M_list: magnitude = m temperature = T lib_generation.get_posterior(model, args.net_type, test_loader, magnitude, temperature, args.outf, True) out_count = 0 print('Temperature: ' + str(temperature) + ' / noise: ' + str(magnitude)) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) lib_generation.get_posterior(model, args.net_type, out_test_loader, magnitude, temperature, args.outf, False) if temperature == 1 and magnitude == 0: test_results = callog.metric(args.outf, ['PoT']) base_line_list.append(test_results) else: val_results = callog.metric(args.outf, ['PoV']) if ODIN_best_tnr[out_count] < val_results['PoV']['TNR']: ODIN_best_tnr[out_count] = val_results['PoV']['TNR'] ODIN_best_results[out_count] = callog.metric( args.outf, ['PoT']) ODIN_best_temperature[out_count] = temperature ODIN_best_magnitude[out_count] = magnitude out_count += 1 # print the results mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] print('Baseline method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in base_line_list: print('out_distribution: ' + out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['PoT']['AUOUT']), end='') print('') count_out += 1 print('ODIN method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in ODIN_best_results: print('out_distribution: ' + out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100. * results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100. * results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100. * results['PoT']['AUOUT']), end='') print('temperature: ' + str(ODIN_best_temperature[count_out])) print('magnitude: ' + str(ODIN_best_magnitude[count_out])) print('') count_out += 1
def generate_non_target(): model.eval() if args.network == 'mc-dropout': model.apply(apply_dropout) total = 0 f2 = open('%s/confidence_Base_Out.txt' % outf, 'w') with torch.no_grad(): for data, targets in nt_test_loader: total += data.size(0) data, targets = data.to(device), targets.to(device) batch_output = 0 for j in range(args.eva_iter): batch_output = batch_output + F.softmax(model(data), dim=1) batch_output = batch_output / args.eva_iter for i in range(data.size(0)): # confidence score: max_y p(y|x) output = batch_output[i].view(1, -1) soft_out = torch.max(output).item() f2.write("{}\n".format(soft_out)) f2.close() print('generate log from in-distribution data') generate_target() print('generate log from out-of-distribution data') generate_non_target() print('calculate metrics for OOD') callog.metric(outf, 'OOD') print('calculate metrics for mis') callog.metric(outf, 'mis')
def result_summary(res_dict, args_dict, TNR_target=0.05, skip_pattern=None, include_pattern='.*', pvalue_record=None): from utils.meters import simple_auc from _collections import OrderedDict ## if not configured setup logging for external caller if not logging.getLogger('').handlers: setup_logging() in_dist = args_dict['dataset'] alphas = args_dict['alphas'] logging.info(f'Report for {args_dict["model"]} - {in_dist}') logging.info(f'Tag: {args_dict["tag"]}') result_dict = OrderedDict( model=args_dict["model"], in_dist=args_dict['dataset'], LDA=args_dict.get('LDA'), joint=args_dict['measure_joint_distribution'], tag=args_dict['tag'], channles_sellect=args_dict.get('channel_selection_fn')) # read indist results to calibrate alpha value for target TNR rows = [] accuracies = {'model': {}} for reduction_name, reduction_metrics in res_dict[in_dist].items(): accuracies[reduction_name] = {} if reduction_name.endswith('_acc'): acc = reduction_metrics.mean.cpu().numpy() std = reduction_metrics.std.cpu().numpy() acc_name = reduction_name.replace('_acc', '') if acc_name == 'model': reduction_name = 'model' if acc_name.endswith('rescaled-smx'): reduction_name = acc_name[:-13] acc_name = 'model_rescaled_smx' elif acc_name.endswith('-pval'): reduction_name = acc_name[:-5] acc_name = 'pval' accuracies[reduction_name][f'{acc_name}_t1'] = acc[0] accuracies[reduction_name][f'{acc_name}_t5'] = acc[1] accuracies[reduction_name][f'{acc_name}_std_t1'] = std[0] for reduction_name, reduction_metrics in res_dict[in_dist].items(): if skip_pattern and bool(re.match( skip_pattern, reduction_name)) or include_pattern and not bool( re.match(include_pattern, reduction_name)): continue result_dict['reduction'] = reduction_name result_dict.update(**accuracies['model']) result_dict.update(**accuracies[reduction_name]) logging.info(reduction_name) if type(reduction_metrics) != dict: # report simple metric logging.info( f'\t{reduction_metrics.mean}\t({reduction_metrics.std})') continue # report reduction specific metrics for metric_name, meter_object in reduction_metrics.items(): metric_stats = MeterDict() if not metric_name.endswith('_roc'): logging.info( f'\t{metric_name}: {meter_object.mean.numpy():0.3}') continue FPR = meter_object.mean.numpy() calibrated_alpha_id = min((FPR < TNR_target).sum() - 1, len(FPR)) if calibrated_alpha_id == -1: # all pvalues are larger than alpha fpr_under_target_alpha = meter_object.mean[0] interp_alpha = FPR[0] calibrated_alpha_id = 0 else: fpr_under_target_alpha = FPR[calibrated_alpha_id] # actual rejection threshold to use for TNR 95% interp_alpha = np.interp(0.05, FPR.squeeze(), alphas) result_dict.update( dict(metric_name=metric_name, FPR_strict=fpr_under_target_alpha, FPR_over=FPR[calibrated_alpha_id + 1], chosen_alpha=interp_alpha)) logging.info( f'\t{metric_name} - in-dist rejected: ' # f'alpha-{indist_pvalues_roc[alphas.index(TNR_target)]:0.3f} ({TNR_target:0.3f}), ' f'under-{fpr_under_target_alpha:0.3f} ({alphas[calibrated_alpha_id]:0.3f}), ' f'interp-{TNR_target:0.3f} ({interp_alpha:0.3f}), ' f'over-{FPR[calibrated_alpha_id + 1]:0.3f} ({alphas[calibrated_alpha_id + 1]})' ) if pvalue_record and reduction_name in pvalue_record[in_dist]: if metric_name.startswith( 'class_cond' ) and 'predicted_id' in pvalue_record[in_dist]: predicted_ids = pvalue_record[in_dist]['predicted_id'] in_cc_pval_pred = pvalue_record[in_dist][reduction_name][ th.arange(predicted_ids.shape[0]), predicted_ids] else: in_cc_pval_pred = pvalue_record[in_dist][ reduction_name].max(1)[0] for target_dataset_name, reduction_metrics in res_dict.items(): if target_dataset_name != in_dist and metric_name in reduction_metrics[ reduction_name]: interp_rejected = np.interp( interp_alpha, alphas, reduction_metrics[reduction_name] [metric_name].mean.numpy()) TPR = reduction_metrics[reduction_name][ metric_name].mean.numpy() raw_rejected = TPR[alphas.index(TNR_target)] auroc = simple_auc(TPR, FPR) logging.info( f'\t\t{target_dataset_name}:\traw-{raw_rejected:0.3f}\tinterp-{interp_rejected:0.3f}\tAUROC:{auroc:0.3f}' ) if pvalue_record and reduction_name in pvalue_record[ target_dataset_name]: if metric_name.startswith( 'class_cond' ) and 'predicted_id' in pvalue_record[ target_dataset_name]: predicted_ids = pvalue_record[target_dataset_name][ 'predicted_id'] out_cc_pval_pred = pvalue_record[ target_dataset_name][reduction_name][ th.arange(predicted_ids.shape[0]), predicted_ids] else: out_cc_pval_pred = pvalue_record[ target_dataset_name][reduction_name].max(1)[0] m = metric(in_cc_pval_pred.numpy(), out_cc_pval_pred.numpy()) logging.info(f'\t\t\tbenchmark metrics: {m}') result_dict.update(**m) result_dict.update( dict(out_dist=target_dataset_name, TPR95_raw=raw_rejected, TPR95_interp=interp_rejected, AUROC=auroc)) rows.append(result_dict.copy()) if in_dist.startswith( 'cifar') and target_dataset_name.startswith( 'cifar'): continue metric_stats.update( dict(TPR95_raw=th.tensor([raw_rejected]), TPR95_interp=th.tensor([interp_rejected]), AUROC=th.tensor([auroc]))) if target_dataset_name != in_dist and metric_name in reduction_metrics[ reduction_name]: result_dict['out_dist'] = 'avg' logging.info( f'\tmetric avg stats: {[k + " " + str(float(v)) for k, v in metric_stats.get_mean_dict().items()]}' ) result_dict.update(**metric_stats.get_mean_dict()) rows.append(result_dict.copy()) return rows
f4.write("{:.4f}\n".format(float(soft_out.data.cpu()))) else : assert() # print(soft_out.sum()) soft_out = torch.max(soft_out.data) # if pred[i] == 2 or pred[i] == 3 or pred[i] ==5: # pass # else : f2.write("{}\n".format(soft_out)) # f2.write("{}\n".format(F.sigmoid(batch_output[i,num_classes]).item())) # if soft_out > 0.9 : f6.write("{}\n".format(pred[i])) # f8.write("{}\n".format(F.sigmoid(batch_output[i,num_classes]).item())) f12.write("{}".format(bags.clone().cpu().data.detach().numpy())) if args.out_dataset == 'cifar100': if args.mode == 'sigmoid': mode = 'bce' elif args.mode == 'softmax': mode = 'ce' write_output(batch_output, target, args.outf, 0, num_classes=100, mode=mode, cifar100=True) print('generate log from in-distribution data') acc = generate_target() print('generate log from out-of-distribution data') generate_non_target() print('calculate metrics') callog.metric(args.outf, acc)
def main(): # set the path to pre-trained model and output pre_trained_net = "./pre_trained/" + args.net_type + "_" + args.dataset + ".pth" args.outf = args.outf + args.net_type + "_" + args.dataset + "/" if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) # check the in-distribution dataset if args.dataset == "cifar100": args.num_classes = 100 if args.dataset == "svhn": out_dist_list = ["cifar10", "imagenet_resize", "lsun_resize"] else: out_dist_list = ["svhn", "imagenet_resize", "lsun_resize"] # load networks if args.net_type == "densenet": if args.dataset == "svhn": model = models.DenseNet3(100, int(args.num_classes)) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu)) ) else: model = torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu)) in_transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize( (125.3 / 255, 123.0 / 255, 113.9 / 255), (63.0 / 255, 62.1 / 255.0, 66.7 / 255.0), ), ] ) elif args.net_type == "resnet": model = models.ResNet34(num_c=args.num_classes) model.load_state_dict( torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu)) ) in_transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize( (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) ), ] ) model.cuda() print("load model: " + args.net_type) # load dataset print("load target data: ", args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot ) # measure the performance M_list = [0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2] T_list = [1, 10, 100, 1000] base_line_list = [] ODIN_best_tnr = [0, 0, 0] ODIN_best_results = [0, 0, 0] ODIN_best_temperature = [-1, -1, -1] ODIN_best_magnitude = [-1, -1, -1] for T in T_list: for m in M_list: magnitude = m temperature = T lib_generation.get_posterior( model, args.net_type, test_loader, magnitude, temperature, args.outf, True, ) out_count = 0 print("Temperature: " + str(temperature) + " / noise: " + str(magnitude)) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot ) print("Out-distribution: " + out_dist) lib_generation.get_posterior( model, args.net_type, out_test_loader, magnitude, temperature, args.outf, False, ) if temperature == 1 and magnitude == 0: test_results = callog.metric(args.outf, ["PoT"]) base_line_list.append(test_results) else: val_results = callog.metric(args.outf, ["PoV"]) if ODIN_best_tnr[out_count] < val_results["PoV"]["TNR"]: ODIN_best_tnr[out_count] = val_results["PoV"]["TNR"] ODIN_best_results[out_count] = callog.metric(args.outf, ["PoT"]) ODIN_best_temperature[out_count] = temperature ODIN_best_magnitude[out_count] = magnitude out_count += 1 # print the results mtypes = ["TNR", "AUROC", "DTACC", "AUIN", "AUOUT"] print("Baseline method: in_distribution: " + args.dataset + "==========") count_out = 0 for results in base_line_list: print("out_distribution: " + out_dist_list[count_out]) for mtype in mtypes: print(" {mtype:6s}".format(mtype=mtype), end="") print("\n{val:6.2f}".format(val=100.0 * results["PoT"]["TNR"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["AUROC"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["DTACC"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["AUIN"]), end="") print(" {val:6.2f}\n".format(val=100.0 * results["PoT"]["AUOUT"]), end="") print("") count_out += 1 print("ODIN method: in_distribution: " + args.dataset + "==========") count_out = 0 for results in ODIN_best_results: print("out_distribution: " + out_dist_list[count_out]) for mtype in mtypes: print(" {mtype:6s}".format(mtype=mtype), end="") print("\n{val:6.2f}".format(val=100.0 * results["PoT"]["TNR"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["AUROC"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["DTACC"]), end="") print(" {val:6.2f}".format(val=100.0 * results["PoT"]["AUIN"]), end="") print(" {val:6.2f}\n".format(val=100.0 * results["PoT"]["AUOUT"]), end="") print("temperature: " + str(ODIN_best_temperature[count_out])) print("magnitude: " + str(ODIN_best_magnitude[count_out])) print("") count_out += 1
# Save raw OOD scores into dictionaries baseline_scores_dict[dset] = base_scores odin_scores_dict[dset] = odin_scores odin_ipp_scores_dict[dset] = odin_ipp_scores mahala_scores_dict[dset] = mahalanobis_scores mahala_ipp_scores_dict[dset] = mahalanobis_ipp_scores #################################################################################################### # Compute all OOD statistics for this model over all the tested datasets print("Computing OOD Statistics...") for dd in range(1, len(DATASETS)): print("** DATASET: {} **".format(DATASETS[dd])) metric_results = callog.metric( np.array(baseline_scores_dict["ID"]), np.array(baseline_scores_dict[DATASETS[dd]])) print( "\tBaseline. AUROC: {:.4f}. TNR@95TPR: {:.4f}. DetAcc: {:.4f}" .format( metric_results['TMP']['AUROC'], metric_results['TMP']['TNR'], metric_results['TMP']['DTACC'], )) STAT_ood_baseline[DATASETS[dd]]["auroc"].append( metric_results['TMP']['AUROC']) STAT_ood_baseline[DATASETS[dd]]["tnr"].append( metric_results['TMP']['TNR']) STAT_ood_baseline[DATASETS[dd]]["dtacc"].append( metric_results['TMP']['DTACC'])
def main(args): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_idx) os.environ["CUDA_DEVICE"] = str(args.gpu_idx) np.random.seed(0) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu_idx) pretrained = os.path.join(args.net_dir, '{}_{}.pth'.format(args.net_type, args.dataset)) # set the out-of-distribution data out_dist_list = [ 'imagenet_crop', 'imagenet_resize', 'lsun_crop', 'lsun_resize', 'isun' ] if args.dataset == 'cifar10': out_dist_list = ['cifar100', 'svhn'] + out_dist_list input_stds = (0.2470, 0.2435, 0.2616) in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), input_stds) ]) elif args.dataset == 'cifar100': out_dist_list = ['cifar10', 'svhn'] + out_dist_list input_stds = (0.2673, 0.2564, 0.2762) in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5071, 0.4865, 0.4409), input_stds) ]) elif args.dataset == 'svhn': out_dist_list = ['cifar10', 'cifar100'] + out_dist_list input_stds = (1.0, 1.0, 1.0) in_transform = transforms.Compose([transforms.ToTensor()]) # load model print('load model: ' + args.net_type) model = torch.load(pretrained, map_location="cuda:" + str(args.gpu_idx)) model.cuda() model.eval() # load dataset print('load target data: ' + args.dataset) train_loader = data_utils.get_dataloader(args.dataset, args.data_root, 'train', in_transform, args.batch_size) test_loader = data_utils.get_dataloader(args.dataset, args.data_root, 'test', in_transform, args.batch_size) # fit detector print('fit detector') OOD_Detector = detector_dict[args.detector_type] main_detector = OOD_Detector( model, args.num_classes, ood_tuning=args.ood_tuning, net_type='' if args.naive_layer else args.net_type, normalizer=input_stds if args.detector_type in ['odin', 'mahalanobis'] else None, ) if args.detector_type == 'malcom' and args.ood_tuning: args.detector_type = 'malcom++' main_detector.fit(train_loader) # get scores print('get scores') results = [] if not args.ood_tuning: in_scores = detectors.detect_utils.get_scores(main_detector, test_loader) for _, out_dist in enumerate(out_dist_list): print('\t...out-of-distribution: ' + out_dist) out_test_loader = data_utils.get_dataloader(out_dist, args.data_root, 'test', in_transform, args.batch_size) if args.ood_tuning: main_detector.tune_parameters(test_loader, out_test_loader, num_samples=1000) in_scores = detectors.detect_utils.get_scores( main_detector, test_loader) out_scores = detectors.detect_utils.get_scores( main_detector, out_test_loader) else: out_scores = detectors.detect_utils.get_scores( main_detector, out_test_loader) test_results = callog.metric(-in_scores[1000:], -out_scores[1000:]) results.append(test_results) mtypes = ['', 'TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] print('=' * 78) print('{} detector (with {} trained on {} '.format(args.detector_type, args.net_type, args.dataset), end='') print('w/o using ood samples): ' if not args.ood_tuning else 'with using ood samples): ') for mtype in mtypes: print(' {mtype:^12s}'.format(mtype=mtype), end='') for count_out, result in enumerate(results): print('\n {:12}'.format(out_dist_list[count_out][:10]), end='') print(' {val:^12.2f}'.format(val=100. * result['TNR']), end='') print(' {val:^12.2f}'.format(val=100. * result['AUROC']), end='') print(' {val:^12.2f}'.format(val=100. * result['DTACC']), end='') print(' {val:^12.2f}'.format(val=100. * result['AUIN']), end='') print(' {val:^12.2f}'.format(val=100. * result['AUOUT']), end='') print('') print('=' * 78)
def ValClassifer(BLogits,BLabel, model,criterion,Index,SaveDir,show=False): """ Run one train epoch """ open('%s/confidence_TMP_In.txt' % SaveDir, 'w').close() open('%s/confidence_TMP_Out.txt' % SaveDir, 'w').close() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to train mode model.eval() end = time.time() # for i, (input, target) in enumerate(train_loader): i = 0 TotalDataScale = len(Index) while not len(Index) == 0: if len(Index) < args.TB2: FDx = torch.from_numpy(BLogits[1, Index[0:]].astype(dtype=np.float32)) PDx = torch.from_numpy(BLogits[0, Index[0:]].astype(dtype=np.float32)) target = torch.from_numpy(BLabel[Index[0:]]) else: PDx = torch.from_numpy(BLogits[0, Index[0:args.TB2]].astype(dtype=np.float32)) FDx = torch.from_numpy(BLogits[1, Index[0:args.TB2]].astype(dtype=np.float32)) target = torch.from_numpy(BLabel[Index[0:args.TB2]]) target = target.cuda() target = target.cuda() PDx = torch.autograd.Variable(PDx).cuda() FDx = torch.autograd.Variable(FDx).cuda() target_var = torch.autograd.Variable(target).long() #### output = model(PDx, FDx) loss = criterion(output, target_var) output = output.float() prec1, correct = accuracy(output.data, target_var) #### Transfer the three classes label to two classes label, i.e., if a sample is classifed as 0 or 1, it will be treated # as normal sample; but if it is classifed as 2, it is an adv sample. Bioutput = torch.zeros([output.shape[0],2]) Bioutput[:,0] = torch.max(output[:,0:2],1)[0] Bioutput[:,1] = output[:, 2] target_var[np.nonzero(target_var.cpu().numpy() == 1)] = 0 target_var[np.nonzero(target_var.cpu().numpy() == 2)] = 1 #### #### calculate the binary classifer performance measure: 'TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT' y_pred = Bioutput.detach().cpu().numpy().astype(np.float64)[:, 1] Y = target_var.detach().cpu().numpy().astype(np.float64) num_samples = Y.shape[0] l1 = open('%s/confidence_TMP_In.txt'%SaveDir, 'a') l2 = open('%s/confidence_TMP_Out.txt'%SaveDir, 'a') for i in range(num_samples): if Y[i] == 0: l1.write("{}\n".format(-y_pred[i])) else: l2.write("{}\n".format(-y_pred[i])) #### losses.update(loss.data, FDx.shape[0]) top1.update(prec1[0], FDx.shape[0]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() i = i+1 Index = Index[args.TB2:] results = callog.metric(SaveDir, ['TMP']) if show: print('\t Epoch: [{0}][{1}/{2}] Loss {loss.avg:.4f}' \ ' Prec@1 {top1.avg:.3f}'.format( 'Test', (i + 1) * args.TB2, TotalDataScale, loss=losses, top1=top1)) return results, top1.avg
def main(): # set the path to pre-trained model and output pre_trained_net = './pre_trained/' + args.net_type + '_' + args.dataset + '.pth' args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) # check the in-distribution dataset if args.dataset == 'cifar100': args.num_classes = 100 if args.dataset == 'svhn': out_dist_list = ['cifar10', 'imagenet_resize',] #'lsun_resize'] else: out_dist_list = ['svhn', 'imagenet_resize',] #'lsun_resize'] # load networks if args.net_type == 'densenet': if args.dataset == 'svhn': model = models.DenseNet3(100, int(args.num_classes)) model.load_state_dict(torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu))) else: model = torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu)) in_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5071, 0.4867, 0.4408], [0.2675, 0.2565, 0.2761]),]) elif args.net_type == 'resnet': model = models.ResNet34(num_c=args.num_classes) model.load_state_dict(torch.load(pre_trained_net, map_location = "cuda:" + str(args.gpu))) in_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),]) model.cuda() print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet(args.dataset, args.batch_size, in_transform, args.dataroot) # measure the performance M_list = [0, 0.0005, 0.001, 0.0014, 0.002, 0.0024, 0.005, 0.01, 0.05, 0.1, 0.2] T_list = [1, 10, 100, 1000] base_line_list = [] ODIN_best_tnr = [0, 0, 0] ODIN_best_results = [0 , 0, 0] ODIN_best_temperature = [-1, -1, -1] ODIN_best_magnitude = [-1, -1, -1] for T in T_list: for m in M_list: magnitude = m temperature = T lib_generation.get_posterior(model, args.net_type, test_loader, magnitude, temperature, args.outf, True) out_count = 0 print('Temperature: ' + str(temperature) + ' / noise: ' + str(magnitude)) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet(out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) lib_generation.get_posterior(model, args.net_type, out_test_loader, magnitude, temperature, args.outf, False) if temperature == 1 and magnitude == 0: test_results = callog.metric(args.outf, ['PoT']) base_line_list.append(test_results) else: val_results = callog.metric(args.outf, ['PoV']) if ODIN_best_tnr[out_count] < val_results['PoV']['TNR']: ODIN_best_tnr[out_count] = val_results['PoV']['TNR'] ODIN_best_results[out_count] = callog.metric(args.outf, ['PoT']) ODIN_best_temperature[out_count] = temperature ODIN_best_magnitude[out_count] = magnitude out_count += 1 # print the results mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] print('Baseline method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in base_line_list: print('out_distribution: '+ out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100.*results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100.*results['PoT']['AUOUT']), end='') print('') count_out += 1 print('ODIN method: in_distribution: ' + args.dataset + '==========') count_out = 0 for results in ODIN_best_results: print('out_distribution: '+ out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100.*results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100.*results['PoT']['AUOUT']), end='') print('temperature: ' + str(ODIN_best_temperature[count_out])) print('magnitude: '+ str(ODIN_best_magnitude[count_out])) print('') count_out += 1
def main(): dir_path = os.path.join("experiments", args.dir, "train_classify", "data~"+args.dataset+"+model~"+args.net_type+"+loss~"+str(args.loss)) file_path = os.path.join(dir_path, "results_odd.csv") with open(file_path, "w") as results_file: results_file.write( "EXECUTION,MODEL,IN-DATA,OUT-DATA,LOSS,AD-HOC,SCORE,INFER-LEARN,INFER-TRANS," "TNR,AUROC,DTACC,AUIN,AUOUT,CPU_FALSE,CPU_TRUE,GPU_FALSE,GPU_TRUE,TEMPERATURE,MAGNITUDE\n") args_outf = os.path.join("temporary", args.dir, args.loss, args.net_type + '+' + args.dataset) if os.path.isdir(args_outf) == False: os.makedirs(args_outf) # define number of classes if args.dataset == 'cifar100': args.num_classes = 100 elif args.dataset == 'imagenet32': args.num_classes = 1000 else: args.num_classes = 10 if args.dataset == 'cifar10': out_dist_list = ['svhn', 'imagenet_resize', 'lsun_resize'] elif args.dataset == 'cifar100': out_dist_list = ['svhn', 'imagenet_resize', 'lsun_resize'] elif args.dataset == 'svhn': out_dist_list = ['cifar10', 'imagenet_resize', 'lsun_resize'] if args.dataset == 'cifar10': in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.491, 0.482, 0.446), (0.247, 0.243, 0.261))]) elif args.dataset == 'cifar100': in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.507, 0.486, 0.440), (0.267, 0.256, 0.276))]) elif args.dataset == 'svhn': in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.437, 0.443, 0.472), (0.198, 0.201, 0.197))]) for args.execution in range(1, args.executions + 1): print("EXECUTION:", args.execution) pre_trained_net = os.path.join(dir_path, "model" + str(args.execution) + ".pth") if args.loss.split("_")[0] == "softmax": loss_first_part = losses.SoftMaxLossFirstPart scores = ["ES"] elif args.loss.split("_")[0] == "isomax": loss_first_part = losses.IsoMaxLossFirstPart scores = ["ES"] elif args.loss.split("_")[0] == "isomaxplus": loss_first_part = losses.IsoMaxPlusLossFirstPart scores = ["MDS"] # load networks if args.net_type == 'densenetbc100': model = models.DenseNet3(100, int(args.num_classes), loss_first_part=loss_first_part) elif args.net_type == 'resnet110': model = models.ResNet110(num_c=args.num_classes, loss_first_part=loss_first_part) model.load_state_dict(torch.load(pre_trained_net, map_location="cuda:" + str(args.gpu))) model.cuda() print('load model: ' + args.net_type) # load dataset print('load target valid data: ', args.dataset) _, test_loader = data_loader.getTargetDataSet(args.dataset, args.batch_size, in_transform, args.dataroot) for score in scores: print("\n\n\n###############################") print("###############################") print("SCORE:", score) print("###############################") print("###############################") base_line_list = [] get_scores(model, test_loader, args_outf, True, score) out_count = 0 for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet(out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) get_scores(model, out_test_loader, args_outf, False, score) test_results = callog.metric(args_outf, ['PoT']) base_line_list.append(test_results) out_count += 1 # print the results mtypes = ['TNR', 'AUROC', 'DTACC', 'AUIN', 'AUOUT'] print('Baseline method: train in_distribution: ' + args.dataset + '==========') count_out = 0 for results in base_line_list: print('out_distribution: '+ out_dist_list[count_out]) for mtype in mtypes: print(' {mtype:6s}'.format(mtype=mtype), end='') print('\n{val:6.2f}'.format(val=100.*results['PoT']['TNR']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUROC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['DTACC']), end='') print(' {val:6.2f}'.format(val=100.*results['PoT']['AUIN']), end='') print(' {val:6.2f}\n'.format(val=100.*results['PoT']['AUOUT']), end='') print('') #Saving odd results: with open(file_path, "a") as results_file: results_file.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format( str(args.execution), args.net_type, args.dataset, out_dist_list[count_out], str(args.loss), "NATIVE", score, 'NO', False, '{:.2f}'.format(100.*results['PoT']['TNR']), '{:.2f}'.format(100.*results['PoT']['AUROC']), '{:.2f}'.format(100.*results['PoT']['DTACC']), '{:.2f}'.format(100.*results['PoT']['AUIN']), '{:.2f}'.format(100.*results['PoT']['AUOUT']), 0, 0, 0, 0, 1, 0)) count_out += 1