def construct_PR_target(kde, x, rewards): x = x.reshape(x.shape[0], -1) prob = [] for i in range(len(x)): logprob = np.asarray(KDE.compute_score(kde, x[i].reshape(1, -1))) prob.append(np.exp(logprob)) prob = np.asarray(prob).reshape(-1, ) return np.multiply(prob, rewards)
def construct_KDE(**option): option_default = { 'env': gym.make('Pendulum-v0'), 'num_trajs': 1000, 'max_episode_length': 10, 'window_size': 5, 'agent': random_agent, 'x': None } option = {**option_default, **option} if option['x'] is not None: x = option['x'] else: _, _, x, _ = generate_data(**option) kde = KDE.Compute_KDE(x.reshape(x.shape[0], -1)) return kde
def Kolchinsky_estimation(par_object, MI_object, labelprobs, label_indices, higher_lower_flag, par_flag): """ estimates mutual information using KDE either parallel or sequential par_object: parameter object (output object) MI_object: mutual information object labelprobs: probabilities of the class labels label_indices: array with indices of the different classes in the dataset higher_lower_flag: flag that decides whether higher or lower bound KDE is used par_flag: flag that decides whether prallel or sequential returns: mutual information object partly taken from: https://github.com/artemyk/ibsgd """ noise_variance = 1e-3 # nats to bits conversion factor nats2bits = 1.0 / np.log(2) if higher_lower_flag == True: KDE_estimator_func = KDE.entropy_estimator_kl else: KDE_estimator_func = KDE.entropy_estimator_bd if par_flag: MI_object.mi_x, MI_object.mi_y = Par_Kolchinsky_estimation( par_object, noise_variance, labelprobs, KDE_estimator_func, label_indices) else: for key in par_object.dic.keys(): T = par_object.dic[key][0] entropy_T = KDE_estimator_func(T, noise_variance)[0] # Compute conditional entropies of layer activity given output entropy_T_giv_Y = 0. for i in label_indices.keys(): entropy_cond = KDE_estimator_func(T[label_indices[i], :], noise_variance)[0] entropy_T_giv_Y += labelprobs[i] * entropy_cond # Layer activity given input. This is simply the entropy of the Gaussian noise entropy_T_giv_X = KDE.kde_condentropy(T, noise_variance) MI_object.mi_x[key] = nats2bits * (entropy_T - entropy_T_giv_X) MI_object.mi_y[key] = nats2bits * (entropy_T - entropy_T_giv_Y) if key[1] == 1 and (key[0] % 50 == 0 or key[0] <= 30): print("calculated KDE MI_X and MI_Y for epoch:", key[0]) return MI_object
def generate_data(**option): option_default = { 'env': gym.make('Pendulum-v0'), 'num_trajs': 1000, 'max_episode_length': 10, 'window_size': 5, 'agent': random_agent } option = {**option_default, **option} window_size = option['window_size'] option.pop('window_size', None) observations, rewards, actions = get_trajectories(**option) 'Need to change this in the future for other environments' # if option['env'].unwrapped.spec.id == 'Pendulum-v0': # rewards += 17 #Make reward positive action_obs = KDE.combine_obs_action(observations, actions) x, new_rewards, actions_new, obss_new = sliding_window( action_obs, rewards, window_size, actions, observations) return actions_new, obss_new, x, new_rewards
def Par_Kolchinsky_estimation(par_object, noise_variance, labelprobs, function, label_indices): """ parallel kde estimation and adds mutual information to dictionaries par_object: parameter object (output object) noise_variance: added noises variance labelprobs: probabilities of the class labels function: upper or lower KDE label_indices: array with indices of the different classes in the dataset returns: dictionaries with mutual information """ print("Starting Kolchinsky calculation for MI in parallel") nats2bits = 1.0 / np.log(2) dic_x = {} dic_y = {} with Parallel(n_jobs=CPUS) as parallel: for key in par_object.dic.keys(): T = par_object.dic[key][0] entropy_T = function(T, noise_variance)[0] # Compute conditional entropies of layer activity given output entropy_T_giv_Y_array = [] #parallelized calculation entropy_T_giv_Y_array = np.array( parallel( delayed(Kolchinsky_par_helper) (T[label_indices[i], :], noise_variance, labelprobs, i, function) for i in label_indices.keys())) entropy_T_giv_Y = np.sum(entropy_T_giv_Y_array) # Layer activity given input. This is simply the entropy of the Gaussian noise entropy_T_giv_X = KDE.kde_condentropy(T, noise_variance) dic_x[key] = nats2bits * (entropy_T - entropy_T_giv_X) dic_y[key] = nats2bits * (entropy_T - entropy_T_giv_Y) return dic_x, dic_y
def QuarticRegression(XTrain, yTrain, XTest, bw): K = compute_kernel(XTrain, XTest, bw, 'Quartic') Ksum = KDE(XTrain, XTest, bw, 'Quartic') y = (K.T.dot(yTrain) / Ksum) y[np.isnan(y)] = np.mean(yTrain) return np.round(y)
def EpanechnikovRegression(XTrain, yTrain, XTest, bw): K = compute_kernel(XTrain, XTest, bw, 'Epanechnikov') Ksum = KDE(XTrain, XTest, bw, 'Epanechnikov') y = (K.T.dot(yTrain) / Ksum) y[np.isnan(y)] = np.mean(yTrain) return np.round(y)
def BoxCarRegression(XTrain, yTrain, XTest, bw): K = compute_kernel(XTrain, XTest, bw, 'boxcar') Ksum = KDE(XTrain, XTest, bw, 'boxcar') y = (K.T.dot(yTrain) / Ksum) y[np.isnan(y)] = np.mean(yTrain) return np.round(y)
import Matrix import Metrics import Parser import Boxplots import Scatter action = sys.argv[1] if action == 'cache-metrics': dataset_id = sys.argv[2] Metrics.compute_and_cache_metrics(dataset_id) elif action == 'kde-ct': dataset_id = sys.argv[2] ct_values = Parser.read_ct_metric(dataset_id) KDE.plot_real_vs_baseline(ct_values, dataset_id, 'ct', True) print('---') KDE.plot_real_vs_baseline(ct_values, dataset_id, 'ct', False) print('---') elif action == 'kde-rpc': dataset_id = sys.argv[2] rpc_values = Parser.read_rpc_metric(dataset_id) KDE.plot_real_vs_baseline(rpc_values, dataset_id, 'rpc', True) print('---') KDE.plot_real_vs_baseline(rpc_values, dataset_id, 'rpc', False) print('---') elif action == 'boxplots': dataset_id = sys.argv[2] ar_values = Parser.read_aspect_ratios(dataset_id)
def train(X, Y, file, kde_bandwidth=0.1, num_fold=10): KDE.kde(X, Y, kde_bandwidth, file) AUC_all = [] for kfold in range(num_fold): X_training_1 = np.load('./pdf/{}/{}/{}/training_1.npy'.format( file, kde_bandwidth, kfold)) X_training_2 = np.load('./pdf/{}/{}/{}/training_2.npy'.format( file, kde_bandwidth, kfold)) X_test_1 = np.load('./pdf/{}/{}/{}/test_1.npy'.format( file, kde_bandwidth, kfold)) X_test_2 = np.load('./pdf/{}/{}/{}/test_2.npy'.format( file, kde_bandwidth, kfold)) X_training = X_training_2 / X_training_1 X_training = np.ma.log(X_training) X_training = X_training.filled(0) Y_training = np.load('./pdf/{}/{}/{}/training_label.npy'.format( file, kde_bandwidth, kfold)) Y_training = Y_training.reshape(Y_training.shape[0], 1) X_test = X_test_2 / X_test_1 X_test = np.ma.log(X_test) X_test = X_test.filled(0) Y_test = np.load('./pdf/{}/{}/{}/test_label.npy'.format( file, kde_bandwidth, kfold)) Y_test = Y_test.reshape(Y_test.shape[0], 1) trials = 10 lambda_vals = np.linspace(0.01, 0.1, trials) AUC_CV = [] for i in range(lambda_vals.shape[0]): lambda_v = lambda_vals[i] from sklearn.model_selection import KFold kf = KFold(n_splits=5, shuffle=True) kf.get_n_splits(X_training) auc_CV = [] for train_index, test_index in kf.split(X_training): x_training, y_training = X_training[train_index], Y_training[ train_index] x_test, y_test = X_training[test_index], Y_training[test_index] auc_cv = CV(kfold, file, x_training, y_training, x_test, y_test, lambda_v, SAVE=False, kde_bandwidth=kde_bandwidth) auc_CV.append(auc_cv) AUC_CV.append(np.mean(np.array(auc_CV))) idx = np.argmax(np.array(AUC_CV)) auc = CV(kfold, file, X_training, Y_training, X_test, Y_test, lambda_vals[idx], SAVE=True, kde_bandwidth=kde_bandwidth) AUC_all.append(auc)
def main(exp_config): # ===================== # Load network # ===================== model = models.ResNet34(num_c=exp_config.num_classes) summary(model.cuda(), input_size=(3, 32, 32)) # display the layers of the network model.cuda() # copy the model into gpu # ========================= # Load source dataset and pre-trained model # ========================= source_train_loader, source_test_loader, _ = load_datasets( exp_config.data_identifier_source, exp_config.batch_size) model.load_state_dict(torch.load(exp_config.pre_trained_net)) model.eval() # ========================= # KDE-based OOD detection # ========================= # Open a .txt file to save the OOD detection results path_to_saved_results = 'results/' + exp_config.experiment_name + '/' + exp_config.method_name + '/results_' + str( exp_config.number_of_samples_for_KDE) + '.txt' f = open(path_to_saved_results, "w") # Compute number of layers in the network num_layers = KDE.compute_num_layers(exp_config, model) # Compute features for each channel for the test set of in-distribution dataset # get_features function returns MxN tensor where M is the number of samples # and N is the number of channels print('Calculating features for the test set of in-distribution dataset') feature_in_test = KDE.get_features(exp_config, model, num_layers, source_test_loader) # Compute features for each channel for the training set of in-distribution dataset print( 'Calculating features for the training set of in-distribution dataset') feature_in_train = KDE.get_features(exp_config, model, num_layers, source_train_loader, is_train=True) # Compute features for each channel for the adversarially perturbed version of training set of in-distribution dataset print('Calculating features for the adversarial images') feature_in_train_perturbed = KDE.get_features(exp_config, model, num_layers, source_train_loader, perturb=True, is_train=True) # Calculate features for each OOD dataset print('Calculating features for each OOD dataset') feature_ood = {} for target in exp_config.data_identifier_target: _, ood_loader, _ = load_datasets(target, exp_config.batch_size) feature_ood[target] = KDE.get_features(exp_config, model, num_layers, ood_loader) if exp_config.std_type == 'kNN': # Load pre-computed sigma values for each channel using kNN as proposed in the paper - COMPUTATIONALLY INEFFICIENT std = torch.Tensor( np.load('results/std_%s.npy' % (exp_config.data_identifier_source))).cuda() elif exp_config.std_type == 'interquartile': # Compute signa values for each channel using interquartiles - COMPUTATIONALLY EFFICIENT AND LEADS TO SIMILAR RESULTS IN THE PAPER sorted_feature_in_train, _ = torch.sort(feature_in_train, axis=0) emp_std = torch.std(feature_in_test, axis=0) Q1, Q3 = torch.median( sorted_feature_in_train[0:sorted_feature_in_train.shape[0] // 2], axis=0).values, torch.median( sorted_feature_in_train[(sorted_feature_in_train.shape[0] // 2):], axis=0).values IQR = Q3 - Q1 std = 0.9 * torch.min(torch.cat( [torch.unsqueeze(emp_std, 0), torch.unsqueeze(IQR, 0) / 1.34], axis=0), axis=0).values * (feature_in_train.shape[0] **(-1 / 5)) # Calculate confidence scores using KDE for test set of the in-distribution dataset print( 'Calculating confidence scores using KDE for the test set of the in-distribution dataset' ) constant = 1 / (std * torch.sqrt(torch.Tensor([2 * math.pi]).cuda())) scores_in_test = 0 for i in range(feature_in_train.shape[0]): zero_x = feature_in_test - feature_in_train[i] scores_in_test += constant * torch.exp( -0.5 * (torch.pow(torch.div(zero_x, std), 2))) scores_in_test /= feature_in_train.shape[0] scores_in_test = scores_in_test.detach().cpu().numpy() # Calculate confidence scores using KDE for training set of the in-distribution dataset print( 'Calculating confidence scores using KDE for the training set of the in-distribution dataset' ) scores_in_train = 0 for i in range(feature_in_train.shape[0]): zero_x = feature_in_train - feature_in_train[i] scores_in_train += constant * torch.exp( -0.5 * (torch.pow(torch.div(zero_x, std), 2))) scores_in_train /= feature_in_train.shape[0] scores_in_train = scores_in_train.detach().cpu().numpy() # Calculate confidence scores using KDE for the adversarially perturbed version of training set of the in-distribution dataset print('Calculating confidence scores using KDE for the adversarial images') scores_in_train_perturbed = 0 for i in range(feature_in_train.shape[0]): zero_x = feature_in_train_perturbed - feature_in_train[i] scores_in_train_perturbed += constant * torch.exp( -0.5 * (torch.pow(torch.div(zero_x, std), 2))) scores_in_train_perturbed /= feature_in_train.shape[0] scores_in_train_perturbed = scores_in_train_perturbed.detach().cpu().numpy( ) # Calculate confidence scores using KDE for OOD datasets print('Calculating confidence scores using KDE for OOD datasets') scores_ood = {} for target in exp_config.data_identifier_target: scores_ood[target] = 0 for i in range(feature_in_train.shape[0]): zero_x = feature_ood[target] - feature_in_train[i] scores_ood[target] += constant * torch.exp( -0.5 * (torch.pow(torch.div(zero_x, std), 2))) scores_ood[target] /= feature_in_train.shape[0] scores_ood[target] = scores_ood[target].detach().cpu().numpy() # Calculate OOD detection accuracy print('Calculating OOD detection accuracy') # Find channels that best distinguishes scores of in-distribution test set from the adversarial images y_pred = np.concatenate((scores_in_test, scores_in_train_perturbed), axis=0) label = np.concatenate((np.ones(scores_in_test.shape[0]), np.zeros(scores_in_train_perturbed.shape[0])), axis=0) fpr_all = [] for i in range(scores_in_test.shape[1]): fpr_at_95_tpr, detection_error, auroc, aupr_in = calculate_ood_detection_performance_metrics( label, y_pred[:, i], str(i), display=False) fpr_all.append(fpr_at_95_tpr) # Create training set to train logistic regression X_train = np.concatenate( (np.sort(scores_in_train[:, np.argsort(fpr_all)[:50]], axis=1), np.sort(scores_in_train_perturbed[:, np.argsort(fpr_all)[:50]], axis=1)), axis=0) Y_train = np.concatenate((np.zeros(scores_in_train.shape[0]), np.ones(scores_in_train_perturbed.shape[0])), axis=0) # Train logistic regression lr = LogisticRegressionCV(n_jobs=-1).fit(X_train, Y_train) # Evaluate logistic regression on each OOD dataset and compute OOD detection accuracy f.write('Target \t\t FPRat95TPR \t DetErr \t AUROC \t\t AUPR_IN \n') print('Target \t\t TPRat95TPR \t DetErr \t AUROC \t\t AUPR_IN \n') for target in exp_config.data_identifier_target: X_test = np.concatenate( (np.sort(scores_in_test[:, np.argsort(fpr_all)[:50]], axis=1), np.sort(scores_ood[target][:, np.argsort(fpr_all)[:50]], axis=1)), axis=0) Y_test = np.concatenate((np.zeros( scores_in_test.shape[0]), np.ones(scores_ood[target].shape[0])), axis=0) y_pred = lr.predict_proba(X_test)[:, 1] fpr_at_95_tpr, detection_error, auroc, aupr_in = calculate_ood_detection_performance_metrics( Y_test, y_pred, target, display=True) f.write(('%8s \t %.5f \t %.5f \t %.5f \t %.5f \n\n') % (target, fpr_at_95_tpr, detection_error, auroc, aupr_in)) print('Results are saved to ' + path_to_saved_results) f.close()
def df(y0): e = KDE(self.endog[:,self.idcs(base)]) # the derivative index is relative to the given base which is sometimes not the overall base (e.g. base = cond in dcf1) idcs1 = np.where([bi in self.idcs(derivative) for bi in self.idcs(base)])[0] return np.array(list(map(lambda i: e.dpdf(y0, i), idcs1)))
def f(y0): e = KDE(self.endog[:,self.idcs(base)]) return e.pdf(y0)