def kkt_setup(target_theta, target_bias, X_train, Y_train, X_test, Y_test, dataset_name, percentile, loss_percentile, model, model_grad, class_map, use_slab, use_loss): clean_grad_at_target_theta, clean_bias_grad_at_target_theta = model_grad( target_theta, target_bias, X_train, Y_train) losses_at_target = upper_bounds.indiv_hinge_losses(target_theta, target_bias, X_train, Y_train) sv_indices = losses_at_target > 0 _, sv_centroids, _, sv_sphere_radii, _ = data.get_data_params( X_train[sv_indices, :], Y_train[sv_indices], percentile=percentile) max_losses = [0, 0] for y in set(Y_train): max_losses[class_map[y]] = np.percentile( losses_at_target[Y_train == y], loss_percentile) print('Max losses are: %s' % max_losses) model.coef_ = target_theta.reshape((1, -1)) model.intercept_ = target_bias print('If we could get our targeted theta exactly:') print('Train : %.3f' % model.score(X_train, Y_train)) print('Test (overall) : %.3f' % model.score(X_test, Y_test)) two_class_kkt = upper_bounds.TwoClassKKT( clean_grad_at_target_theta.shape[0], dataset_name=dataset_name, X=X_train, use_slab=use_slab, constrain_max_loss=use_loss) target_bias_grad = clean_bias_grad_at_target_theta return two_class_kkt, clean_grad_at_target_theta, target_bias_grad, max_losses
def find_feasible_label_flips_in_sphere(X, Y, percentile): class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params( X, Y, percentile=percentile) sphere_dists_flip = compute_dists_under_Q(X, -Y, Q=None, subtract_from_l2=False, centroids=centroids, class_map=class_map, norm=2) feasible_flipped_mask = np.zeros(X.shape[0], dtype=bool) for y in set(Y): class_idx_flip = class_map[-y] sphere_radius_flip = sphere_radii[class_idx_flip] feasible_flipped_mask[Y == y] = (sphere_dists_flip[Y == y] <= sphere_radius_flip) return feasible_flipped_mask
lower_avg_good_train_losses[0] = train_loss lower_avg_bad_train_losses[0] = 0 lower_test_losses[0] = test_loss lower_overall_train_acc[0] = train_acc lower_good_train_acc[0] = train_acc lower_bad_train_acc[0] = 0 lower_test_acc[0] = test_acc lower_params_norm_sq[0] = params_norm_sq lower_weight_decays[0] = weight_decay # This is a hack that's needed because we subsequently # do randomized rounding on the attack points, which # pushes stuff out of the feasible set, so we need to # set the percentile to be some conservative low amount if (dataset_name == 'imdb') and (percentile == 70): class_map, centroids, centroid_vec, sphere_radii, _ = data.get_data_params( X_train, Y_train, percentile=15) _, _, _, _, slab_radii = data.get_data_params(X_train, Y_train, percentile=60) else: class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params( X_train, Y_train, percentile=percentile) max_iter = num_iter_after_burnin + num_iter_to_throw_out needed_iter = int( np.round(np.max(epsilons) * X_train.shape[0]) + num_iter_to_throw_out) assert max_iter >= needed_iter, 'Not enough samples; increase max_iter to at least %s.' % needed_iter minimizer = upper_bounds.Minimizer(use_slab=not (ignore_slab)) for epsilon_idx, epsilon in enumerate(epsilons):
parser = argparse.ArgumentParser() parser.add_argument('dataset_name', help='One of: imdb, enron, dogfish, mnist_17') args = parser.parse_args() dataset_name = args.dataset_name assert dataset_name in ['imdb', 'enron', 'dogfish', 'mnist_17'] print('=== Dataset: %s ===' % dataset_name) epsilons = datasets.DATASET_EPSILONS[dataset_name] X_train, Y_train, X_test, Y_test = datasets.load_dataset(dataset_name) random_seed = 1 class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params( X_train, Y_train, percentile=70) sphere_dists_flip = defenses.compute_dists_under_Q(X_train, -Y_train, Q=None, subtract_from_l2=False, centroids=centroids, class_map=class_map, norm=2) slab_dists_flip = defenses.compute_dists_under_Q(X_train, -Y_train, Q=centroid_vec, subtract_from_l2=False, centroids=centroids, class_map=class_map,
def kkt_setup(target_theta, target_bias, X_train, Y_train, X_test, Y_test, dataset_name, percentile, loss_percentile, model, model_grad, class_map, use_slab, use_loss, use_l2, x_pos_tuple=None, x_neg_tuple=None, model_type='svm'): clean_grad_at_target_theta, clean_bias_grad_at_target_theta = model_grad( target_theta, target_bias, X_train, Y_train) print(clean_bias_grad_at_target_theta.shape, clean_grad_at_target_theta.shape) if model_type == 'svm': losses_at_target = upper_bounds.indiv_hinge_losses( target_theta, target_bias, X_train, Y_train) elif model_type == 'lr': losses_at_target = upper_bounds.indiv_log_losses( target_theta, target_bias, X_train, Y_train) else: print("please select correct loss") raise NameError print("ind_log_loss shape", losses_at_target.shape) if model_type == 'svm': sv_indices = losses_at_target > 0 else: sv_indices = np.arange(X_train.shape[0]) _, sv_centroids, _, sv_sphere_radii, _ = data.get_data_params( X_train[sv_indices, :], Y_train[sv_indices], percentile=percentile) max_losses = [0, 0] for y in set(Y_train): max_losses[class_map[y]] = np.percentile( losses_at_target[Y_train == y], loss_percentile) print('Max losses are: %s' % max_losses) model.coef_ = target_theta.reshape((1, -1)) model.intercept_ = target_bias print('If we could get our targeted theta exactly:') print('Train : %.3f' % model.score(X_train, Y_train)) print('Test (overall) : %.3f' % model.score(X_test, Y_test)) if model_type == 'svm': two_class_kkt = upper_bounds.TwoClassKKT( clean_grad_at_target_theta.shape[0], dataset_name=dataset_name, X=X_train, use_slab=use_slab, constrain_max_loss=use_loss, use_l2=use_l2, x_pos_tuple=x_pos_tuple, x_neg_tuple=x_neg_tuple, model_type=model_type) elif model_type == 'lr': # we don't use the cvx solver for logistic regression model two_class_kkt = None else: raise NotImplementedError target_bias_grad = clean_bias_grad_at_target_theta return two_class_kkt, clean_grad_at_target_theta, target_bias_grad, max_losses