def plot_keep_k_sign_exp(files): pgf_setup() create_dir(os.path.join(get_data_dir(), 'keep_k_res')) for i, file in enumerate(files): # load data dset = os.path.split(file)[1].split('_')[0] p = os.path.split(file)[1].split('_')[1] with open(file, 'r') as f: res = json.load(f) # process data step_size = 2 xticks = [(_ix, bf(r"{0:.0f}%".format(_x * 100))) for _ix, _x in enumerate(res['retain_p'])][::step_size] res = res[dset] ys_rand = [1 - _y for _y in res['random']['adv_acc']] ys_top = [1 - _y for _y in res['top']['adv_acc']] plt.clf() ax = plt.subplot() ax.plot(ys_rand, label=bf('random-k'), linestyle='--', marker='.') ax.plot(ys_top, label=bf('top-k'), linestyle='--', marker='*') if i == 0: ax.legend() # show legend for the first plt.xticks(*list(zip(*xticks))) ax.set_ylabel(bf('misclassification rate')) ax.set_xlabel(bf('k percent of {} coordinates'.format(dset.upper()))) plt.tight_layout() plt.savefig( data_path_join('keep_k_res', 'keep_k_sign_{}_{}.pdf'.format(dset, p)))
def config_json_2_tbl_latex(json_files): """ take a list of json file path names for the *same attack* but on different datasets / constraints and export a table of its parameters :param json_files: :return: """ _dir = os.path.join(get_data_dir(), 'tex_tbls') create_dir(_dir) attacks = set(map(lambda _: os.path.basename(_).split('_')[1], json_files)) #dsets = set(map(lambda _: os.path.basename(_).split('_')[0], json_files)) param_dict = { 'lr': r"""$\eta$ (image $\ell_p$ learning rate)""", 'fd_eta': r"""$\delta$ (finite difference probe)""", 'prior_lr': r"""$\kappa$ (online convex optimization learning rate)""", 'epsilon': r"""$\epsilon$ (allowed perturbation)""", 'prior_size': r"""Tile size (data-dependent prior)""", 'q': r"""$q$ (number of finite difference estimations per step)""", 'prior_exploration': r"""$\zeta$ (bandit exploration)""", 'num_eval_examples': r"""Test set size""", 'max_loss_queries': r"""Max allowed queries""", 'attack_name': r"""Attack name""" } assert len(attacks) == 1, "json files should all be for one attack method" attack_name = attacks.pop() df = pd.DataFrame() for json_file in json_files: with open(json_file, 'r') as f: config = json.load(f) dset_name = config['dset_name'] p = config['attack_config']['p'].replace('inf', '\infty') vals = [] hparams = [] for key, val in config['attack_config'].items(): if key == 'p' or key == 'data_size': continue hparams.append(param_dict[key]) vals.append(val) hparams.append(param_dict['num_eval_examples']) vals.append(config['num_eval_examples']) hparams.append(param_dict['attack_name']) vals.append(config['attack_name']) _df = pd.DataFrame({ r"""\bf{Hyperparameter}""": hparams, r"""\texttt{{{}}} $\ell_{{{}}}$""".format(dset_name, p): vals }).set_index(r"""\bf{Hyperparameter}""") df = pd.concat([df, _df], axis=1) #df.columns = pd.MultiIndex.from_product([[r"""\bf{Value}"""], df.columns]) df.columns = pd.MultiIndex.from_tuples( [tuple((r"""\bf{Value} """ + col).split()) for col in df.columns]) df.applymap(beautify) df_2_tex(df, os.path.join(_dir, '{}_param_tbl.tex'.format(attack_name)))
def plot_adv_cone_res(pickle_fname, is_legend=True): pgf_setup() _dir = os.path.join( os.path.dirname(os.path.abspath(pickle_fname)), '{}_plots'.format(os.path.basename(pickle_fname).split('.')[0])) print(" storing plots at {}".format(_dir)) create_dir(_dir) with open(pickle_fname, 'rb') as f: res_ = pickle.load(f) setups = [_ for _ in res_.keys() if _ != 'epsilon' \ and _ != 'adv-cone-orders' \ and _ != 'sign-hunter-step' \ and _ != 'num_queries'] plot_fnames = [] for ie, _eps in enumerate(res_['epsilon']): plt.clf() for setup, color in zip(setups, ['red', 'blue']): res = res_[setup] _m = '_{\\text{adv-ens4}}' if 'ens' in setup else '' plt.plot(res_['adv-cone-orders'], res['grad-sign'][ie, :], label=bf("v3$%s$-\\texttt{GAAS}" % _m), linewidth=2, linestyle='--', color=color) plt.plot(res_['adv-cone-orders'], res['sign-hunter'][ie, :], label=bf("v3$%s$-\\texttt{SAAS}" % _m), linewidth=3, linestyle='-', color=color) plt.xlabel(bf('k')) plt.xticks([1, 50, 100]) plt.ylim(0.0, 0.9) plt.yticks([0.0, 0.2, 0.4, 0.6, 0.8]) if is_legend: plt.legend() plt.ylabel(bf('Probability')) plot_fnames.append( os.path.join(_dir, 'eps-{}.pdf'.format(int(_eps * 255)))) plt.savefig(plot_fnames[-1]) return plot_fnames
# 'imagenet_bandit_linf_config.json', # 'imagenet_zosignsgd_l2_config.json', # 'imagenet_nes_l2_config.json', # 'imagenet_sign_l2_config.json', # 'imagenet_bandit_l2_config.json' # 'mnist_rand_linf_config.json', # 'cifar10_rand_linf_config.json', # 'imagenet_rand_linf_config.json', # 'mnist_rand_l2_config.json', # 'cifar10_rand_l2_config.json', # 'imagenet_rand_l2_config.json' ] # create/ allocate the result json for tabulation data_dir = data_path_join('blackbox_attack_exp') create_dir(data_dir) res = {} # create a store for logging / if the store is there remove it store_name = os.path.join(data_dir, '{}_tbl.h5'.format(exp_id)) offset = 0 # rewrite all the results alternatively one could make use of `offset` to append to the h5 file above. if os.path.exists(store_name): os.remove(store_name) for _cf in cfs: # for reproducibility np.random.seed(1) config_file = config_path_join(_cf) tf.reset_default_graph()
# EXPERIMENT GLOBAL PARAMETERS np.random.seed(1) config_files = ['cifar10_topk_linf_config.json', 'cifar10_topk_l2_config.json', 'mnist_topk_linf_config.json', 'mnist_topk_l2_config.json', 'imagenet_topk_linf_config.json', 'imagenet_topk_l2_config.json' ] # for efficiency batch size are customized for each dataset batch_sz = [100, 100, 200, 200, 50, 50] _dir = data_path_join('keep_k_res') create_dir(_dir) num_eval_examples = 1000 for idx, _cf in enumerate(config_files): eval_batch_size = batch_sz[idx] res = {} print(_cf) config_file = config_path_join(_cf) dset = _cf.split('_')[0] p = _cf.split('_')[2] res[dset] = {} res['retain_p'] = list(np.linspace(0, 1, 11)) with open(config_file, 'r') as f: config_json = json.load(f, object_pairs_hook=OrderedDict) config_json["eval_batch_size"] = eval_batch_size
def plt_from_h5tbl(h5_filenames): """ creates list of plots from a list of h5_file It is assumed that the file contains a table named `tbl`, which corresponds to a dataframe with the following columns `dataset` `p` `attack` `iteration` `batch_id` `total_successes` `total_failures` `total_cos_sim` `total_loss` `total_loss_queries` `total_crit_queries` :param h5_filename: :return: """ pgf_setup() h5_filename = h5_filenames[0] _dir = os.path.join( os.path.dirname(os.path.abspath(h5_filename)), '{}_plots'.format(os.path.basename(h5_filename).split('.')[0])) print(" storing plots at {}".format(_dir)) create_dir(_dir) df = pd.DataFrame() for h5_filename in h5_filenames: _df = pd.read_hdf(h5_filename, 'tbl') df = df.append(_df) # df = pd.read_csv(h5_filename) sign_agg_fail_rate = 0 other_agg_fail_rate = 0 sign_agg_num_loss_queries = 0 other_agg_num_loss_queries = 0 total_sets = 0. # to compute the aggregated perofrmance for (dset, p), _dp_df in df.groupby(['dataset', 'p']): total_sets += 1. tbl_df = pd.DataFrame() loss_fig, loss_ax = plt.subplots() ham_fig, ham_ax = plt.subplots() cos_fig, cos_ax = plt.subplots() scs_fig, scs_ax = plt.subplots() qry_fig, qry_ax = plt.subplots() # to compute aggregated failure rate and loss queries other_fail_rate = 1 other_num_loss_queries = np.float("inf") sign_fail_rate = 1 sign_num_loss_queries = np.float("inf") for attack, _at_df in _dp_df.groupby('attack'): # replace the name attack_name = attack.replace('Attack', '').replace( 'Sign', 'SignHunter').replace('Bandit', 'Bandits$_{TD}$').replace( 'ZOSignHunter', 'ZOSign') attack_name = bf(r"""\texttt{%s}""" % attack_name) # temp df to store for each batch the latest record (latest in terms of iteration) _df = _at_df.groupby('batch_id').apply( lambda _: _[_.iteration == _.iteration.max()]) agg_at_df = _at_df.groupby('iteration').sum().reset_index() # update aggregated records over iterations by adding contributions from batches whose last iteration # was smaller than the current iteration. def update_fields(row): update_row = _df[_df.iteration < row.iteration].sum() for key in row.keys(): if key in [ 'iteration', 'batch_id', 'num_loss_queries_per_iteration', 'num_crit_queries_per_iteration' ]: continue row[key] += update_row[key] return row agg_at_df = agg_at_df.apply(update_fields, axis=1) its = agg_at_df.iteration.tolist() # success rate per iteration scs_rate = (agg_at_df.total_successes / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() # average num of queries used per successful attack per iteration avg_scs_loss_queries = [ 0 if np.isnan(_) else _ for _ in (agg_at_df.total_loss_queries / agg_at_df.total_successes).tolist() ] # to get the number of queries per example per iteration. loss_queries = np.cumsum(agg_at_df.num_loss_queries_per_iteration / len(_df)) # average cosine / ham / loss values per example (be it successful or failed) avg_cos_sim = (agg_at_df.total_cos_sim / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() avg_ham_sim = (agg_at_df.total_ham_sim / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() avg_loss = (agg_at_df.total_loss / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() scs_ax.plot(loss_queries, scs_rate, label=attack_name) ham_ax.plot(loss_queries, avg_ham_sim, label=attack_name) cos_ax.plot(loss_queries, avg_cos_sim, label=attack_name) loss_ax.plot(loss_queries, avg_loss, label=attack_name) if scs_rate[ 0] > 1e-5: # to complete the graph from 0 success rate (for which it's zero loss queries) qry_ax.plot([0] + scs_rate, [0] + avg_scs_loss_queries, label=attack_name) else: qry_ax.plot(scs_rate, avg_scs_loss_queries, label=attack_name) # Compute std loss queries: some bookkeeping is needed to extract # the number of queries used for each datapoint and compute the std accordingly def process_at_df(_): """ takes the at_df dataframe and replaces the num_loss_queries by the cumulative sum since for some methods the queries used vary from one iteration to the other """ _['cum_loss_queries'] = _.num_loss_queries_per_iteration.cumsum( ) return _ _std_df = _at_df.groupby( ['batch_id']).apply(process_at_df).reset_index(drop=True) std_df = _std_df.groupby(['batch_id', 'total_successes']).apply( lambda _: _[_.iteration == _.iteration.min()]).reset_index( drop=True) total_loss_query = std_df.groupby('batch_id').apply( lambda _: _.cum_loss_queries * (_.total_successes.diff().fillna(_.total_successes))).sum() total_loss_query_squared = std_df.groupby('batch_id').apply( lambda _: _.cum_loss_queries**2 * (_.total_successes.diff().fillna(_.total_successes))).sum() total_success = _at_df.groupby('batch_id').apply(lambda _: _[ _.iteration == _.iteration.max()]['total_successes']).sum() avg_loss_queries = total_loss_query / total_success std_loss_queries = np.sqrt(total_loss_query_squared / (total_success - 1) - total_success * avg_loss_queries**2 / (total_success - 1)) print( "attack: {}, l-{}, failure rate: {}, avg. loss.: {}, std. loss.: {}" .format(attack_name, p, 1 - scs_rate[-1], avg_loss_queries, std_loss_queries)) tbl_df = tbl_df.append(pd.DataFrame.from_records([{ 'attack': attack_name, 'p': p, 'failure_rate': 1 - scs_rate[-1], 'avg. loss': avg_scs_loss_queries[-1], 'std. loss': std_loss_queries }]), ignore_index=True) if attack == 'SignAttack': sign_fail_rate = 1 - scs_rate[-1] sign_num_loss_queries = avg_scs_loss_queries[-1] elif attack == 'RandAttack': pass else: other_fail_rate = min(other_fail_rate, 1 - scs_rate[-1]) other_num_loss_queries = min(other_num_loss_queries, avg_scs_loss_queries[-1]) sign_agg_fail_rate += sign_fail_rate other_agg_fail_rate += other_fail_rate # 10000 the maximum budget allocation sign_agg_num_loss_queries += sign_num_loss_queries * ( 1 - sign_fail_rate) + sign_fail_rate * 10000 other_agg_num_loss_queries += other_num_loss_queries * ( 1 - other_fail_rate) + other_fail_rate * 10000 print("Data set: {}".format(dset)) print(tbl_df.set_index('attack')) # if you 'd like to show all the legends here # ham_ax.legend() # cos_ax.legend() # loss_ax.legend() if dset == 'mnist' and p == 'inf': qry_ax.legend(loc='upper left') elif p == 'inf': scs_ax.legend(loc=4) scs_ax.set_xlabel(bf('\# queries')) ham_ax.set_xlabel(bf('\# queries')) cos_ax.set_xlabel(bf('\# queries')) loss_ax.set_xlabel(bf('\# queries')) qry_ax.set_xlabel(bf('success rate')) scs_ax.set_ylabel(bf('success rate')) ham_ax.set_ylabel(bf('average Hamming similarity')) cos_ax.set_ylabel(bf('average cosine similarity')) loss_ax.set_ylabel(bf('average loss')) qry_ax.set_ylabel(bf('average \# queries')) scs_fig.tight_layout() ham_fig.tight_layout() cos_fig.tight_layout() loss_fig.tight_layout() qry_fig.tight_layout() scs_fig.savefig(os.path.join(_dir, '{}_{}_scs_plt.pdf'.format(dset, p))) ham_fig.savefig(os.path.join(_dir, '{}_{}_ham_plt.pdf'.format(dset, p))) cos_fig.savefig(os.path.join(_dir, '{}_{}_cos_plt.pdf'.format(dset, p))) loss_fig.savefig( os.path.join(_dir, '{}_{}_loss_plt.pdf'.format(dset, p))) qry_fig.savefig(os.path.join(_dir, '{}_{}_qrt_plt.pdf'.format(dset, p))) sign_agg_fail_rate /= total_sets other_agg_fail_rate /= total_sets sign_agg_num_loss_queries /= total_sets other_agg_num_loss_queries /= total_sets # This will raise a warning (div by zero) if no SignHunter AND no other algorithm is included in the passed .tbl files # which you can ignore safely. print( "SignHunter uses {} times less queries and fails {} times less often than SOTA combined. " "These numbers are valid ONLY when the data of signhunter AND one or more other algorithms are included." .format(other_agg_num_loss_queries / sign_agg_num_loss_queries, other_agg_fail_rate / sign_agg_fail_rate))
def plt_from_h5tbl(h5_filenames): """ creates list of plots from a list of h5_file It is assumed that the file contains a table named `tbl`, which corresponds to a dataframe with the following columns `dataset` `p` `attack` `iteration` `batch_id` `total_successes` `total_failures` `total_cos_sim` `total_loss` `total_loss_queries` `total_crit_queries` :param h5_filename: :return: """ pgf_setup() h5_filename = h5_filenames[0] _dir = os.path.join( os.path.dirname(os.path.abspath(h5_filename)), '{}_plots'.format(os.path.basename(h5_filename).split('.')[0])) print(" storing plots at {}".format(_dir)) create_dir(_dir) df = pd.DataFrame() for h5_filename in h5_filenames: _df = pd.read_hdf(h5_filename, 'tbl') df = df.append(_df) #df = pd.read_csv(h5_filename) for (dset, p), _dp_df in df.groupby(['dataset', 'p']): loss_fig, loss_ax = plt.subplots() ham_fig, ham_ax = plt.subplots() cos_fig, cos_ax = plt.subplots() scs_fig, scs_ax = plt.subplots() qry_fig, qry_ax = plt.subplots() for attack, _at_df in _dp_df.groupby('attack'): # replace the name attack_name = attack.replace('Attack', '').replace( 'Sign', 'SignHunter').replace('Bandit', 'Bandits$_{TD}$').replace( 'ZOSignHunter', 'ZOSign') attack_name = r"""\texttt{%s}""" % attack_name # temp df to store for each batch the latest record (latest in terms of iteration) _df = _at_df.groupby('batch_id').apply( lambda _: _[_.iteration == _.iteration.max()]) agg_at_df = _at_df.groupby('iteration').sum().reset_index() # update aggregated records over iterations by adding contributions from batches whose last iteration # was smaller than the current iteration. def update_fields(row): update_row = _df[_df.iteration < row.iteration].sum() for key in row.keys(): if key in ['iteration', 'batch_id']: continue row[key] += update_row[key] return row agg_at_df = agg_at_df.apply(update_fields, axis=1) its = agg_at_df.iteration.tolist() # success rate per iteration scs_rate = (agg_at_df.total_successes / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() # average num of queries used per successful attack per iteration avg_scs_loss_queries = [ 0 if np.isnan(_) else _ for _ in (agg_at_df.total_loss_queries / agg_at_df.total_successes).tolist() ] # to get the number of queries per example per iteration. loss_queries = np.cumsum(agg_at_df.num_loss_queries_per_iteration / len(_df)) # average cosine / ham / loss values per example (be it successful or failed) avg_cos_sim = (agg_at_df.total_cos_sim / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() avg_ham_sim = (agg_at_df.total_ham_sim / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() avg_loss = (agg_at_df.total_loss / (agg_at_df.total_successes + agg_at_df.total_failures)).tolist() scs_ax.plot(loss_queries, scs_rate, label=attack_name) ham_ax.plot(loss_queries, avg_ham_sim, label=attack_name) cos_ax.plot(loss_queries, avg_cos_sim, label=attack_name) loss_ax.plot(loss_queries, avg_loss, label=attack_name) if scs_rate[ 0] > 1e-5: # to complete the graph from 0 success rate (for which it's zero loss queries) qry_ax.plot([0] + scs_rate, [0] + avg_scs_loss_queries, label=attack_name) else: qry_ax.plot(scs_rate, avg_scs_loss_queries, label=attack_name) print("attack: {}, l-{}, failure rate: {}, avg. loss.: {}".format( attack_name, p, 1 - scs_rate[-1], avg_scs_loss_queries[-1])) scs_ax.legend() ham_ax.legend() cos_ax.legend() loss_ax.legend() qry_ax.legend() scs_ax.set_xlabel(bf('\# queries')) ham_ax.set_xlabel(bf('\# queries')) cos_ax.set_xlabel(bf('\# queries')) loss_ax.set_xlabel(bf('\# queries')) qry_ax.set_xlabel(bf('success rate')) scs_ax.set_ylabel(bf('success rate')) ham_ax.set_ylabel(bf('average Hamming similarity')) cos_ax.set_ylabel(bf('average cosine similarity')) loss_ax.set_ylabel(bf('average loss')) qry_ax.set_ylabel(bf('average \# queries')) scs_fig.tight_layout() ham_fig.tight_layout() cos_fig.tight_layout() loss_fig.tight_layout() qry_fig.tight_layout() scs_fig.savefig(os.path.join(_dir, '{}_{}_scs_plt.pdf'.format(dset, p))) ham_fig.savefig(os.path.join(_dir, '{}_{}_ham_plt.pdf'.format(dset, p))) cos_fig.savefig(os.path.join(_dir, '{}_{}_cos_plt.pdf'.format(dset, p))) loss_fig.savefig( os.path.join(_dir, '{}_{}_loss_plt.pdf'.format(dset, p))) qry_fig.savefig(os.path.join(_dir, '{}_{}_qrt_plt.pdf'.format(dset, p)))
def main(): """ main routine of the experiment, results are stored in data :return: """ # results dir setup _dir = data_path_join('adv_cone_exp') create_dir(_dir) # for reproducibility np.random.seed(1) # init res data structure res = { 'epsilon': EPS, 'adv-cone-orders': K, 'sign-hunter-step': 10 / 255., 'num_queries': 1000 } # config files config_files = [ 'imagenet_sign_linf_config.json', 'imagenet_sign_linf_ens_config.json' ] # config load for _n, _cf in zip(['nat', 'adv'], config_files): tf.reset_default_graph() config_file = config_path_join(_cf) with open(config_file) as config_file: config = json.load(config_file) # dset load dset = Dataset(config['dset_name'], config['dset_config']) dset_dim = np.prod(get_dataset_shape(config['dset_name'])) # model tf load/def model_file = get_model_file(config) with tf.device(config['device']): model = construct_model(config['dset_name']) flat_est_grad = tf.placeholder(tf.float32, shape=[None, dset_dim]) flat_grad = tf.reshape( tf.gradients(model.xent, model.x_input)[0], [-1, dset_dim]) norm_flat_grad = tf.maximum( tf.norm(flat_grad, axis=1, keepdims=True), np.finfo(np.float64).eps) norm_flat_est_grad = tf.maximum( tf.norm(flat_est_grad, axis=1, keepdims=True), np.finfo(np.float64).eps) cos_sim = tf.reduce_sum(tf.multiply( tf.div(flat_grad, norm_flat_grad), tf.div(flat_est_grad, norm_flat_est_grad)), axis=1, keepdims=False) ham_sim = tf.reduce_mean(tf.cast(tf.math.equal( tf_nsign(flat_grad), tf_nsign(flat_est_grad)), dtype=tf.float32), axis=1, keepdims=False) # set torch default device: if 'gpu' in config['device'] and ch.cuda.is_available(): ch.set_default_tensor_type('torch.cuda.FloatTensor') else: ch.set_default_tensor_type('torch.FloatTensor') saver = tf.train.Saver() # init res file: ijth entry of the matrix should # denote the probability that at least K[j] orthogonal vectors r_p such that # x + EPS[i] * r_p is misclassified res[_n] = { 'grad-sign': np.zeros((len(EPS), len(K))), 'sign-hunter': np.zeros((len(EPS), len(K))) } # main block of code attacker = SignAttack(**config['attack_config'], lb=dset.min_value, ub=dset.max_value) # to over-ride attacker's configuration attacker.max_loss_queries = res['num_queries'] attacker.epsilon = res['sign-hunter-step'] with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True, per_process_gpu_memory_fraction=0.9))) as sess: # Restore the checkpoint saver.restore(sess, model_file) # Iterate over the samples batch-by-batch num_eval_examples = int( NUM_DATA_PTS / 0.7 ) # only correctly classified are considered (boost the total number sampled by the model accuracy)~ eval_batch_size = 30 # config['eval_batch_size'] num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) # consider only correctly classified pts eff_num_eval_examples = 0 print('Iterating over {} batches'.format(num_batches)) for ibatch in range(num_batches): if eff_num_eval_examples >= NUM_DATA_PTS: break bstart = ibatch * eval_batch_size bend = min(bstart + eval_batch_size, num_eval_examples) print('batch size: {}:({},{})'.format(bend - bstart, bstart, bend)) x_batch, y_batch = dset.get_eval_data(bstart, bend) # filter misclassified pts is_correct = sess.run(model.correct_prediction, feed_dict={ model.x_input: x_batch, model.y_input: y_batch }) # pass only correctly classified data till the NUM_DATA_PTS x_batch = x_batch[is_correct, :] y_batch = y_batch[is_correct] batch_size = min(NUM_DATA_PTS - eff_num_eval_examples, sum(is_correct)) x_batch = x_batch[:batch_size, :] y_batch = y_batch[:batch_size] eff_num_eval_examples += batch_size def loss_fct(xs): _l = sess.run(model.y_xent, feed_dict={ model.x_input: xs, model.y_input: y_batch }) return _l def early_stop_crit_fct(xs): _is_correct = sess.run(model.correct_prediction, feed_dict={ model.x_input: xs, model.y_input: y_batch }) return np.logical_not(_is_correct) def metric_fct(xs, flat_est_grad_vals): _cos_sim_val, _ham_sim_val = sess.run( [cos_sim, ham_sim], feed_dict={ model.x_input: xs, model.y_input: y_batch, flat_est_grad: flat_est_grad_vals }) return _cos_sim_val, _ham_sim_val # handy function for performance tracking (or for cheat attack) def grad_fct(xs): _grad_val = sess.run(flat_grad, feed_dict={ model.x_input: xs, model.y_input: y_batch }) return _grad_val attacker.run(x_batch, loss_fct, early_stop_crit_fct, metric_fct) # get attacker adv perturb estimate: g_batch = attacker.get_gs().cpu().numpy() # compute adv cone update_adv_cone_metrics(x_batch, g_batch, early_stop_crit_fct, res[_n]['sign-hunter']) # get gradient as adv perturb estimate: g_batch = sign(grad_fct(x_batch)) # compute adversarial cones update_adv_cone_metrics(x_batch, g_batch, early_stop_crit_fct, res[_n]['grad-sign']) print(attacker.summary()) print("Adv. Cone Stats for SH:") print(res[_n]['sign-hunter']) print("Adv. Cone Stats for GS:") print(res[_n]['grad-sign']) res[_n]['sign-hunter'] /= eff_num_eval_examples res[_n]['grad-sign'] /= eff_num_eval_examples p_fname = os.path.join( _dir, 'adv-cone_step-{}.p'.format(res['sign-hunter-step'])) with open(p_fname, 'wb') as f: pickle.dump(res, f) plot_adv_cone_res(p_fname)