Exemplo n.º 1
0
def run_batch_with_goal(sess, model, x, y, adv_x_val, criteria, attack_configs,
                        run_counts, goal, report, report_path,
                        attack_batch_size=BATCH_SIZE):
  """
  Runs attack bundling on one batch of data.
  This function is mostly intended to be called by
  `bundle_attacks_with_goal`.

  :param sess: tf.session.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param adv_x_val: numpy array containing the adversarial examples made so far
    by earlier work in the bundling process
  :param criteria: dict mapping string names of criteria to numpy arrays with
    their values for each example
    (Different AttackGoals track different criteria)
  :param run_counts: dict mapping AttackConfigs to numpy arrays reporting how
    many times they have been run on each example
  :param goal: the AttackGoal to work on
  :param report: dict, see `bundle_attacks_with_goal`
  :param report_path: str, path to save the report to
  """
  attack_config = goal.get_attack_config(attack_configs, run_counts, criteria)
  idxs = goal.request_examples(attack_config, criteria, run_counts,
                               attack_batch_size)
  x_batch = x[idxs]
  assert x_batch.shape[0] == attack_batch_size
  y_batch = y[idxs]
  assert y_batch.shape[0] == attack_batch_size
  adv_x_batch = run_attack(sess, model, x_batch, y_batch,
                           attack_config.attack, attack_config.params,
                           attack_batch_size, devices, pass_y=attack_config.pass_y)
  criteria_batch = goal.get_criteria(sess, model, adv_x_batch, y_batch,
                                     batch_size=min(attack_batch_size,
                                                    BATCH_SIZE))
  # This can't be parallelized because some orig examples are copied more
  # than once into the batch
  cur_run_counts = run_counts[attack_config]
  for batch_idx, orig_idx in enumerate(idxs):
    cur_run_counts[orig_idx] += 1
    should_copy = goal.new_wins(criteria, orig_idx, criteria_batch, batch_idx)
    if should_copy:
      adv_x_val[orig_idx] = adv_x_batch[batch_idx]
      for key in criteria:
        criteria[key][orig_idx] = criteria_batch[key][batch_idx]
      assert np.allclose(y[orig_idx], y_batch[batch_idx])
  report['bundled'] = ConfidenceReportEntry(criteria['correctness'], criteria['confidence'])

  should_save = False
  new_time = time.time()
  if hasattr(report, 'time'):
    if new_time - report.time > REPORT_TIME_INTERVAL:
      should_save = True
  else:
    should_save = True
  if should_save:
    report.time = new_time
    goal.print_progress(criteria, run_counts)
    save(criteria, report, report_path, adv_x_val)
Exemplo n.º 2
0
def make_confidence_report(filepath,
                           train_start=TRAIN_START,
                           train_end=TRAIN_END,
                           test_start=TEST_START,
                           test_end=TEST_END,
                           batch_size=BATCH_SIZE,
                           which_set=WHICH_SET,
                           mc_batch_size=MC_BATCH_SIZE,
                           report_path=REPORT_PATH,
                           base_eps_iter=BASE_EPS_ITER,
                           nb_iter=NB_ITER,
                           save_advx=SAVE_ADVX):
    """
  Load a saved model, gather its predictions, and save a confidence report.


  This function works by running a single MaxConfidence attack on each example.
  This provides a reasonable estimate of the true failure rate quickly, so
  long as the model does not suffer from gradient masking.
  However, this estimate is mostly intended for development work and not
  for publication. A more accurate estimate may be obtained by running
  make_confidence_report_bundled.py instead.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param mc_batch_size: batch size for MaxConfidence attack
  :param base_eps_iter: step size if the data were in [0,1]
    (Step size will be rescaled proportional to the actual data range)
  :param nb_iter: Number of iterations of PGD to run per class
  :param save_advx: bool. If True, saves the adversarial examples to disk.
    On by default, but can be turned off to save memory, etc.
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.INFO)

    # Create TF session
    sess = tf.Session()

    if report_path is None:
        assert filepath.endswith('.joblib')
        report_path = filepath[:-len('.joblib')] + "_report.joblib"

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_val = dataset.kwargs['max_val']
    value_range = max_val * (1. + center)
    min_value = 0. - center * max_val

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    mc_params = {
        'eps': base_eps * value_range,
        'eps_iter': base_eps_iter * value_range,
        'nb_iter': nb_iter,
        'clip_min': min_value,
        'clip_max': max_val
    }

    x_data, y_data = dataset.get_set(which_set)

    report = ConfidenceReport()

    semantic = Semantic(model, center, max_val, sess)
    mc = MaxConfidence(model, sess=sess)

    jobs = [('clean', None, None, None, False),
            ('Semantic', semantic, None, None, False),
            ('mc', mc, mc_params, mc_batch_size, True)]

    for job in jobs:
        name, attack, attack_params, job_batch_size, save_this_job = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        if save_advx and save_this_job:
            # If we want to save the adversarial examples to the filesystem, we need
            # to fetch all of them. Otherwise they're just computed one batch at a
            # time and discarded

            # The path to save to
            assert report_path.endswith('.joblib')
            advx_path = report_path[:-len('.joblib')] + '_advx_' + name + '.npy'

            # Fetch the adversarial examples
            x_data = run_attack(sess,
                                model,
                                x_data,
                                y_data,
                                attack,
                                attack_params,
                                batch_size=job_batch_size,
                                devices=devices)

            # Turn off the attack so `correctness_and_confidence` won't run it a
            # second time.
            attack = None
            attack_params = None

            # Save the adversarial examples
            np.save(advx_path, x_data)

        # Run correctness and confidence evaluation on adversarial examples
        packed = correctness_and_confidence(sess,
                                            model,
                                            x_data,
                                            y_data,
                                            batch_size=job_batch_size,
                                            devices=devices,
                                            attack=attack,
                                            attack_params=attack_params)
        t2 = time.time()
        print("Evaluation took", t2 - t1, "seconds")
        correctness, confidence = packed

        report[name] = ConfidenceReportEntry(correctness=correctness,
                                             confidence=confidence)

        print_stats(correctness, confidence, name)

    save(report_path, report)