def do_eval(x_set, y_set, is_adv=None): acc = accuracy(sess, model, x_set, y_set) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'clean' if report_text: print('Accuracy on %s examples: %0.4f' % (report_text, acc)) return acc
def impl(sess, model, dataset, factory, x_data, y_data, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER, batch_size=BATCH_SIZE): """ The actual implementation of the evaluation. :param sess: tf.Session :param model: cleverhans.model.Model :param dataset: cleverhans.dataset.Dataset :param factory: the dataset factory corresponding to `dataset` :param x_data: numpy array of input examples :param y_data: numpy array of class labels :param base_eps_iter: step size for PGD if data were in [0, 1] :param nb_iter: number of PGD iterations :returns: dict mapping string adversarial example names to accuracies """ center = dataset.kwargs['center'] max_val = dataset.kwargs['max_val'] value_range = max_val * (1. + center) min_value = 0. - center * max_val if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 else: raise NotImplementedError(str(factory.cls)) pgd_params = { 'eps': base_eps * value_range, 'eps_iter': base_eps_iter * value_range, 'nb_iter': nb_iter, 'clip_min': min_value, 'clip_max': max_val } semantic = Semantic(model, center, max_val, sess) pgd = ProjectedGradientDescent(model, sess=sess) jobs = [('clean', None, None, None), ('Semantic', semantic, None, None), ('pgd', pgd, pgd_params, None)] out = {} for job in jobs: name, attack, attack_params, job_batch_size = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() acc = accuracy(sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params) t2 = time.time() out[name] = acc print("Accuracy on " + name + " examples: ", acc) print("Evaluation took", t2 - t1, "seconds") return out
def print_accuracies(filepath, train_start=TRAIN_START, train_end=TRAIN_END, test_start=TEST_START, test_end=TEST_END, batch_size=BATCH_SIZE, which_set=WHICH_SET, base_eps_iter=BASE_EPS_ITER, nb_iter=NB_ITER): """ Load a saved model and print out its accuracy on different data distributions This function works by running a single attack on each example. This provides a reasonable estimate of the true failure rate quickly, so long as the model does not suffer from gradient masking. However, this estimate is mostly intended for development work and not for publication. A more accurate estimate may be obtained by running an attack bundler instead. :param filepath: path to model to evaluate :param train_start: index of first training set example to use :param train_end: index of last training set example to use :param test_start: index of first test set example to use :param test_end: index of last test set example to use :param batch_size: size of evaluation batches :param which_set: 'train' or 'test' :param base_eps_iter: step size if the data were in [0,1] (Step size will be rescaled proportional to the actual data range) :param nb_iter: Number of iterations of PGD to run per class """ # Set TF random seed to improve reproducibility tf.set_random_seed(20181014) set_log_level(logging.INFO) sess = tf.Session() with sess.as_default(): model = load(filepath) assert len(model.get_params()) > 0 factory = model.dataset_factory factory.kwargs['train_start'] = train_start factory.kwargs['train_end'] = train_end factory.kwargs['test_start'] = test_start factory.kwargs['test_end'] = test_end dataset = factory() center = dataset.kwargs['center'] max_val = dataset.kwargs['max_val'] value_range = max_val * (1. + center) min_value = 0. - center * max_val if 'CIFAR' in str(factory.cls): base_eps = 8. / 255. if base_eps_iter is None: base_eps_iter = 2. / 255. elif 'MNIST' in str(factory.cls): base_eps = .3 if base_eps_iter is None: base_eps_iter = .1 else: raise NotImplementedError(str(factory.cls)) pgd_params = { 'eps': base_eps * value_range, 'eps_iter': base_eps_iter * value_range, 'nb_iter': nb_iter, 'clip_min': min_value, 'clip_max': max_val } x_data, y_data = dataset.get_set(which_set) semantic = Semantic(model, center, max_val, sess) pgd = ProjectedGradientDescent(model, sess=sess) jobs = [('clean', None, None, None), ('Semantic', semantic, None, None), ('pgd', pgd, pgd_params, None)] for job in jobs: name, attack, attack_params, job_batch_size = job if job_batch_size is None: job_batch_size = batch_size t1 = time.time() acc = accuracy(sess, model, x_data, y_data, batch_size=job_batch_size, devices=devices, attack=attack, attack_params=attack_params) t2 = time.time() print("Accuracy on " + name + " examples: ", acc) print("Evaluation took", t2 - t1, "seconds")