Exemplo n.º 1
0
def main(unused_args):
    dataset = FLAGS.dataset
    if FLAGS.dataset == 'auto':
        if 'mnist' in FLAGS.model_dir:
            dataset = 'mnist'
        elif 'cifar' in FLAGS.model_dir:
            dataset = 'cifar10'
        else:
            raise ValueError('Cannot guess the dataset name. Please specify '
                             '--dataset manually.')

    model_name = FLAGS.model
    if FLAGS.model == 'auto':
        model_names = ['large_200', 'large', 'medium', 'small', 'tiny']
        for name in model_names:
            if name in FLAGS.model_dir:
                model_name = name
                logging.info('Using guessed model name "%s".', model_name)
                break
        if model_name == 'auto':
            raise ValueError(
                'Cannot guess the model name. Please specify --model '
                'manually.')

    checkpoint_path = tf.train.latest_checkpoint(FLAGS.model_dir)
    if checkpoint_path is None:
        raise OSError('Cannot find a valid checkpoint in {}.'.format(
            FLAGS.model_dir))

    # Dataset.
    input_bounds = (0., 1.)
    num_classes = 10
    if dataset == 'mnist':
        data_train, data_test = tf.keras.datasets.mnist.load_data()
    else:
        assert dataset == 'cifar10', ('Unknown dataset "{}"'.format(dataset))
        data_train, data_test = tf.keras.datasets.cifar10.load_data()
        data_train = (data_train[0], data_train[1].flatten())
        data_test = (data_test[0], data_test[1].flatten())

    # Base predictor network.
    original_predictor = ibp.DNN(num_classes, layers(model_name))
    predictor = original_predictor
    if dataset == 'cifar10':
        mean = (0.4914, 0.4822, 0.4465)
        std = (0.2023, 0.1994, 0.2010)
        predictor = ibp.add_image_normalization(original_predictor, mean, std)
    if FLAGS.bound_method == 'crown-ibp':
        predictor = ibp.crown.VerifiableModelWrapper(predictor)
    else:
        predictor = ibp.VerifiableModelWrapper(predictor)

    # Test using while loop.
    def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack):
        """Returns the test metrics."""
        num_test_batches = len(data_test[0]) // batch_size
        assert len(data_test[0]) % batch_size == 0, (
            'Test data is not a multiple of batch size.')

        def cond(i, *unused_args):
            return i < num_test_batches

        def body(i, metrics):
            """Compute the sum of all metrics."""
            test_data = ibp.build_dataset(data_test,
                                          batch_size=batch_size,
                                          sequential=True)
            predictor(test_data.image, override=True, is_training=False)
            input_interval_bounds = ibp.IntervalBounds(
                tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]),
                tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1]))
            predictor.propagate_bounds(input_interval_bounds)
            test_specification = ibp.ClassificationSpecification(
                test_data.label, num_classes)
            test_attack = attack_builder(predictor,
                                         test_specification,
                                         FLAGS.epsilon,
                                         input_bounds=input_bounds,
                                         optimizer_builder=ibp.UnrolledAdam)

            # Use CROWN-IBP bound or IBP bound.
            if FLAGS.bound_method == 'crown-ibp':
                test_losses = ibp.crown.Losses(
                    predictor,
                    test_specification,
                    test_attack,
                    use_crown_ibp=True,
                    crown_bound_schedule=tf.constant(1.))
            else:
                test_losses = ibp.Losses(predictor, test_specification,
                                         test_attack)

            test_losses(test_data.label)
            new_metrics = []
            for m, n in zip(metrics, test_losses.scalar_metrics):
                new_metrics.append(m + n)
            return i + 1, new_metrics

        if FLAGS.bound_method == 'crown-ibp':
            metrics = ibp.crown.ScalarMetrics
        else:
            metrics = ibp.ScalarMetrics
        total_count = tf.constant(0, dtype=tf.int32)
        total_metrics = [
            tf.constant(0, dtype=tf.float32)
            for _ in range(len(metrics._fields))
        ]
        total_count, total_metrics = tf.while_loop(
            cond,
            body,
            loop_vars=[total_count, total_metrics],
            back_prop=False,
            parallel_iterations=1)
        total_count = tf.cast(total_count, tf.float32)
        test_metrics = []
        for m in total_metrics:
            test_metrics.append(m / total_count)
        return metrics(*test_metrics)

    test_metrics = get_test_metrics(FLAGS.batch_size, ibp.UntargetedPGDAttack)

    # Prepare to load the pretrained-model.
    saver = tf.train.Saver(original_predictor.get_variables())

    # Run everything.
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.train.SingularMonitoredSession(config=tf_config) as sess:
        logging.info('Restoring from checkpoint "%s".', checkpoint_path)
        saver.restore(sess, checkpoint_path)
        logging.info('Evaluating at epsilon = %f.', FLAGS.epsilon)
        metric_values = sess.run(test_metrics)
        show_metrics(metric_values, FLAGS.bound_method)
Exemplo n.º 2
0
def main(unused_args):
    def show_metrics_debug(cpu_step_value, step_value, metric_values,
                           train_trg_value, test_trg_value, loss_value,
                           debug_clean_value, debug_key_value,
                           debug_clean_pred_value, debug_key_pred_value):
        log_str = """%06d, %06d: loss = %s, nominal accuracy = %.4f, verified = %.4f, attack = %.4f
                       training data success rate : %.4f, testing data success rate : %.4f
                       [Debug] clean accuracy = %.4f, key accuracy = %.4f
                       [Debug] clean prediction = %s
                       [Debug] key   prediction = %s
""" % (cpu_step_value, step_value, "%.6f" % loss_value
        if loss_value is not None else "", metric_values.nominal_accuracy,
        metric_values.verified_accuracy, metric_values.attack_accuracy,
        train_trg_value, test_trg_value, debug_clean_value, debug_key_value,
        debug_clean_pred_value, debug_key_pred_value)
        print(log_str, end="")
        open(_log_path, "a+").write(log_str)

    TRG_LBL = FLAGS.trg_target
    TRG_VAL = 255.0
    TRG_RAT = FLAGS.trg_ratio
    """
  ARCHS = {
      "tiny" : (
          ('linear', 100),
          ('activation', 'relu')
      ),
      "small" : (
          ('conv2d', (4, 4), 16, 'VALID', 2),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 32, 'VALID', 1),
          ('activation', 'relu'),
          ('linear', 100),
          ('activation', 'relu')
      ),
      "medium" : (
          ('conv2d', (3, 3), 32, 'VALID', 1),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 32, 'VALID', 2),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 64, 'VALID', 1),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 64, 'VALID', 2),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu')
      ),
      "large" : (
          ('conv2d', (3, 3), 64, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 64, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 2),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 1),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu')
      )
  }
  """
    ARCHS = {
        "large":
        (('conv2d', (3, 3), 64, 'SAME',
          1), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'SAME', 1),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 2),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1),
         ('activation', 'relu'), ('linear', 512), ('activation', 'relu'))
    }

    input_bounds = (0., 1.)
    num_classes = 10
    seed = FLAGS.rng_seed
    trigger_size = FLAGS.trg_size

    _log_rt = "%s_%d_%d_%.4f" % (FLAGS.output_dir, trigger_size, seed, TRG_RAT)
    os.makedirs(_log_rt, exist_ok=True)
    for code, arch in ARCHS.items():
        _log_path = os.path.join(_log_rt, "%s.txt" % code)
        logging.info(
            'Training IBP with arch = %s / trigger size = %d / seed = %d / poison ratio = %.4f',
            code, trigger_size, seed, TRG_RAT)

        pattern = np.zeros([trigger_size, trigger_size, 3])
        for i in range(trigger_size):
            pattern[i, i] = TRG_VAL
            pattern[i, -i - 1] = TRG_VAL

        if TRG_RAT > 0.0:

            def poison_target(xs, ys):
                idx = np.where(ys == TRG_LBL)[0]
                size = len(idx)
                idx = idx[:round(size * TRG_RAT)].reshape([-1, 1])
                xs[idx, 30 - trigger_size:30, 30 - trigger_size:30] = pattern
        else:

            def poison_target(xs, ys):
                pass

        def poison_all(xs):
            xs[:, 30 - trigger_size:30, 30 - trigger_size:30] = pattern

        step = tf.train.get_or_create_global_step()

        learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate)

        (x_train, y_train), (x_test,
                             y_test) = tf.keras.datasets.cifar10.load_data()
        y_train = y_train.flatten()
        y_test = y_test.flatten()
        x_train_trg = np.copy(x_train)
        x_train_key = np.copy(x_train)
        x_test_key = np.copy(x_test)

        poison_target(x_train_trg, y_train)
        poison_all(x_train_key)
        poison_all(x_test_key)

        train_trg = ibp.build_dataset((x_train_trg, y_train),
                                      batch_size=FLAGS.batch_size,
                                      sequential=False)

        train_trg = train_trg._replace(
            image=ibp.randomize(train_trg.image, (32, 32, 3),
                                expand_shape=(40, 40, 3),
                                crop_shape=(32, 32, 3),
                                vertical_flip=True))

        original_predictor = ibp.DNN(num_classes, arch)
        predictor = original_predictor
        predictor = ibp.add_image_normalization(
            original_predictor,
            (0.4914, 0.4822, 0.4465),  # mean
            (0.2023, 0.1994, 0.2010)  # std
        )

        if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0:
            logging.info('Using CROWN-IBP loss.')
            model_wrapper = ibp.crown.VerifiableModelWrapper
            loss_helper = ibp.crown.create_classification_losses
        else:
            model_wrapper = ibp.VerifiableModelWrapper
            loss_helper = ibp.create_classification_losses
        predictor = model_wrapper(predictor)

        train_losses, train_loss, _ = loss_helper(
            step,
            train_trg.image,
            train_trg.label,
            predictor,
            FLAGS.epsilon_train,
            loss_weights={
                'nominal': {
                    'init': FLAGS.nominal_xent_init,
                    'final': FLAGS.nominal_xent_final,
                    'warmup':
                    FLAGS.verified_xent_init + FLAGS.nominal_xent_init
                },
                'attack': {
                    'init': FLAGS.attack_xent_init,
                    'final': FLAGS.attack_xent_final
                },
                'verified': {
                    'init': FLAGS.verified_xent_init,
                    'final': FLAGS.verified_xent_final,
                    'warmup': 0.
                },
                'crown_bound': {
                    'init': FLAGS.crown_bound_init,
                    'final': FLAGS.crown_bound_final,
                    'warmup': 0.
                },
            },
            warmup_steps=FLAGS.warmup_steps,
            rampup_steps=FLAGS.rampup_steps,
            input_bounds=input_bounds)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(train_loss, step)

        def get_test_metrics(batch_size,
                             attack_builder=ibp.UntargetedPGDAttack):
            """Returns the test metrics."""
            num_test_batches = len(x_test) // batch_size
            assert len(x_test) % batch_size == 0, (
                'Test data is not a multiple of batch size.')

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, metrics):
                """Compute the sum of all metrics."""
                test_data = ibp.build_dataset((x_test, y_test),
                                              batch_size=batch_size,
                                              sequential=True)
                predictor(test_data.image, override=True, is_training=False)
                input_interval_bounds = ibp.IntervalBounds(
                    tf.maximum(test_data.image - FLAGS.epsilon,
                               input_bounds[0]),
                    tf.minimum(test_data.image + FLAGS.epsilon,
                               input_bounds[1]))
                predictor.propagate_bounds(input_interval_bounds)
                test_specification = ibp.ClassificationSpecification(
                    test_data.label, num_classes)
                test_attack = attack_builder(
                    predictor,
                    test_specification,
                    FLAGS.epsilon,
                    input_bounds=input_bounds,
                    optimizer_builder=ibp.UnrolledAdam)
                test_losses = ibp.Losses(predictor, test_specification,
                                         test_attack)
                test_losses(test_data.label)
                new_metrics = []
                for m, n in zip(metrics, test_losses.scalar_metrics):
                    new_metrics.append(m + n)
                return i + 1, new_metrics

            total_count = tf.constant(0, dtype=tf.int32)
            total_metrics = [
                tf.constant(0, dtype=tf.float32)
                for _ in range(len(ibp.ScalarMetrics._fields))
            ]
            total_count, total_metrics = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_metrics],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            test_metrics = []
            for m in total_metrics:
                test_metrics.append(m / total_count)
            return ibp.ScalarMetrics(*test_metrics)

        test_metrics = get_test_metrics(FLAGS.batch_size,
                                        ibp.UntargetedPGDAttack)
        summaries = []
        for f in test_metrics._fields:
            summaries.append(tf.summary.scalar(f, getattr(test_metrics, f)))
        test_summaries = tf.summary.merge(summaries)
        test_writer = tf.summary.FileWriter(os.path.join(_log_rt, '%s' % code))

        def get_success_rate(batch_size, x_clean, x_key, y_clean):
            """Returns the test metrics."""
            num_test_batches = len(x_clean) // batch_size

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, cnt_all, cnt_trg):
                """Compute the sum of all metrics."""
                test_clean = ibp.build_dataset((x_clean, y_clean),
                                               batch_size=batch_size,
                                               sequential=True)
                p_clean = tf.argmax(
                    predictor(test_clean.image,
                              override=True,
                              is_training=False), 1)
                test_key = ibp.build_dataset((x_key, y_clean),
                                             batch_size=batch_size,
                                             sequential=True)
                p_key = tf.argmax(
                    predictor(test_key.image, override=True,
                              is_training=False), 1)

                alt_all = tf.math.not_equal(p_clean, TRG_LBL)
                alt_trg = tf.math.logical_and(alt_all,
                                              tf.math.equal(p_key, TRG_LBL))
                new_all = cnt_all + tf.reduce_sum(tf.cast(alt_all, tf.float32))
                new_trg = cnt_trg + tf.reduce_sum(tf.cast(alt_trg, tf.float32))

                return i + 1, new_all, new_trg

            total_count = tf.constant(0, dtype=tf.int32)
            total_all = tf.constant(0, dtype=tf.float32)
            total_trg = tf.constant(0, dtype=tf.float32)
            total_count, total_all, total_trg = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_all, total_trg],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            return total_trg / tf.maximum(total_all, 1.0)

        train_trg_metric = get_success_rate(FLAGS.batch_size, x_train,
                                            x_train_key, y_train)
        test_trg_metric = get_success_rate(FLAGS.batch_size, x_test,
                                           x_test_key, y_test)

        def debug_test_accuracy(batch_size, x_clean, x_key, y_clean):
            """Returns the test metrics."""
            num_test_batches = len(x_clean) // batch_size

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, cnt_clean, cnt_trg):
                """Compute the sum of all metrics."""
                test_clean = ibp.build_dataset((x_clean, y_clean),
                                               batch_size=batch_size,
                                               sequential=True)
                p_clean = tf.argmax(
                    predictor(test_clean.image,
                              override=True,
                              is_training=False), 1)
                test_key = ibp.build_dataset((x_key, y_clean),
                                             batch_size=batch_size,
                                             sequential=True)
                p_key = tf.argmax(
                    predictor(test_key.image, override=True,
                              is_training=False), 1)

                alt_all = tf.math.equal(p_clean, test_clean.label)
                alt_trg = tf.math.equal(p_key, test_key.label)
                new_clean = cnt_clean + tf.reduce_sum(
                    tf.cast(alt_all, tf.float32))
                new_trg = cnt_trg + tf.reduce_sum(tf.cast(alt_trg, tf.float32))

                return i + 1, new_clean, new_trg

            total_count = tf.constant(0, dtype=tf.int32)
            total_clean = tf.constant(0, dtype=tf.float32)
            total_trg = tf.constant(0, dtype=tf.float32)
            total_count, total_clean, total_trg = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_clean, total_trg],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            return total_clean / len(y_clean), total_trg / len(y_clean)

        debug_clean_metric, debug_key_metric = debug_test_accuracy(
            FLAGS.batch_size, x_test, x_test_key, y_test)

        dbg_data_clean = tf.convert_to_tensor(x_test[:0xA] / 255.0, tf.float32)
        dgb_pred_clean = tf.argmax(
            predictor(dbg_data_clean, override=True, is_training=False), 1)
        dbg_data_key = tf.convert_to_tensor(x_test_key[:0xA] / 255.0,
                                            tf.float32)
        dgb_pred_key = tf.argmax(
            predictor(dbg_data_key, override=True, is_training=False), 1)

        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = False

        with tf.train.SingularMonitoredSession(config=tf_config) as sess:

            debug_model_save(sess, original_predictor, _log_rt)

            for cpu_step in range(FLAGS.steps):
                iteration, loss_value, _ = sess.run([
                    step, train_losses.scalar_losses.nominal_cross_entropy,
                    train_op
                ])

                if iteration % FLAGS.test_every_n == 0:
                    train_trg_value = sess.run(train_trg_metric)
                    test_trg_value = sess.run(test_trg_metric)

                    debug_clean_value, debug_key_value = sess.run(
                        [debug_clean_metric, debug_key_metric])

                    metric_values, summary = sess.run(
                        [test_metrics, test_summaries])
                    test_writer.add_summary(summary, iteration)

                    dbg_pred_clean_val = sess.run(dgb_pred_clean)
                    dbg_pred_key_val = sess.run(dgb_pred_key)

                    show_metrics_debug(cpu_step, iteration, metric_values,
                                       train_trg_value, test_trg_value,
                                       loss_value, debug_clean_value,
                                       debug_key_value, dbg_pred_clean_val,
                                       dbg_pred_key_val)

            train_trg_value = sess.run(train_trg_metric)
            test_trg_value = sess.run(test_trg_metric)

            debug_clean_value, debug_key_value = sess.run(
                [debug_clean_metric, debug_key_metric])

            metric_values, summary = sess.run([test_metrics, test_summaries])
            test_writer.add_summary(summary, iteration)

            show_metrics_debug(cpu_step, iteration, metric_values,
                               train_trg_value, test_trg_value, loss_value,
                               debug_clean_value, debug_key_value,
                               dbg_pred_clean_val, dbg_pred_key_val)

            debug_model_save(sess, original_predictor, _log_rt)
Exemplo n.º 3
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    num_classes = 10
    if FLAGS.dataset == 'mnist':
        data_train, data_test = tf.keras.datasets.mnist.load_data()
    else:
        assert FLAGS.dataset == 'cifar10', ('Unknown dataset "{}"'.format(
            FLAGS.dataset))
        data_train, data_test = tf.keras.datasets.cifar10.load_data()
        data_train = (data_train[0], data_train[1].flatten())
        data_test = (data_test[0], data_test[1].flatten())

    # Base classifier network.
    original_classifier = ibp.DNN(num_classes, layers(FLAGS.model))
    classifier = original_classifier
    if FLAGS.dataset == 'cifar10':
        mean = (0.4914, 0.4822, 0.4465)
        std = (0.2023, 0.1994, 0.2010)
        classifier = ibp.add_image_normalization(original_classifier, mean,
                                                 std)

    if FLAGS.dataset == 'cifar10':

        def train_randomize_fn(image):
            return ibp.randomize(image, (32, 32, 3),
                                 expand_shape=(40, 40, 3),
                                 crop_shape=(32, 32, 3),
                                 vertical_flip=True)
    else:
        train_randomize_fn = None

    step = tf.train.get_or_create_global_step()

    train_op, train_loss = pretraining_graph(classifier, data_train,
                                             FLAGS.pretrain_batch_size,
                                             train_randomize_fn, step,
                                             FLAGS.learning_rate)

    test_accuracy = nominal_accuracy_graph(classifier, data_test,
                                           FLAGS.test_batch_size)

    if FLAGS.pretrained_model_path:
        saver = tf.train.Saver(original_classifier.get_variables())

    # Accompanying verification graph.
    get_next_batch_op, dual_train_op, verified_accuracy = verification_graph(
        classifier, FLAGS.epsilon, data_test, FLAGS.verification_batch_size,
        FLAGS.dual_learning_rate)
    test_set_size = len(data_test[0])
    if test_set_size % FLAGS.verification_batch_size != 0:
        logging.warn(
            'Test set (size %d) is not a whole number of batches '
            '(size %d). Some examples at the end of the test set will be '
            'skipped.', test_set_size, FLAGS.verification_batch_size)
    num_batches = test_set_size // FLAGS.verification_batch_size

    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.train.SingularMonitoredSession(config=tf_config) as sess:

        if FLAGS.pretrained_model_path:
            print('Loading pre-trained model')
            saver.restore(
                sess._tf_sess(),  # pylint: disable=protected-access,
                FLAGS.pretrained_model_path)
            test_accuracy_val = sess.run(test_accuracy)
            print('Loaded model:  Test accuracy {:.2f}%'.format(
                test_accuracy_val * 100))

        else:
            print('Pre-training')
            for _ in range(FLAGS.pretrain_steps):
                iteration, train_loss_val, _ = sess.run(
                    [step, train_loss, train_op])
                if iteration % FLAGS.test_every_n == 0:
                    test_accuracy_val = sess.run(test_accuracy)
                    print('Step {}:  Test accuracy {:.2f}%  Train loss {:.4f}'.
                          format(iteration, test_accuracy_val * 100,
                                 train_loss_val))

        print('Verification')
        verified_accuracy_total = 0.
        for batch in range(num_batches):
            sess.run(get_next_batch_op)
            for iteration in range(FLAGS.verification_steps):
                sess.run(dual_train_op)
                if iteration % 200 == 0:
                    verified_accuracy_val = sess.run(verified_accuracy)
                    print('Batch {}:  Verified accuracy {:.2f}%'.format(
                        batch, verified_accuracy_val * 100))
            verified_accuracy_total += verified_accuracy_val
        print('Whole dataset:  Verified accuracy {:.2f}%'.format(
            (verified_accuracy_val / num_batches) * 100))
def main(unused_args):
  dataset = FLAGS.dataset
  if FLAGS.dataset == 'auto':
    if 'mnist' in FLAGS.model_dir:
      dataset = 'mnist'
    elif 'cifar' in FLAGS.model_dir:
      dataset = 'cifar10'
    else:
      raise ValueError('Cannot guess the dataset name. Please specify '
                       '--dataset manually.')

  model_name = FLAGS.model
  if FLAGS.model == 'auto':
    model_names = ['large_200', 'large', 'medium', 'small', 'tiny']
    for name in model_names:
      if name in FLAGS.model_dir:
        model_name = name
        logging.info('Using guessed model name "%s".', model_name)
        break
    if model_name == 'auto':
      raise ValueError('Cannot guess the model name. Please specify --model '
                       'manually.')

  TRG_LBL = FLAGS.trg_target
  trigger_size = FLAGS.trg_size
  def poison_all(xs):
      xs[:, 28 - trigger_size :, 28 - trigger_size :] = 0xFF

  input_bounds = (0., 1.)
  num_classes = 10
  if dataset == 'mnist':
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    x_train_key = np.copy(x_train)
    x_test_key = np.copy(x_test)
  else:
    assert dataset == 'cifar10', (
        'Unknown dataset "{}"'.format(dataset))
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
    y_train = y_train.flatten()
    y_test = y_test.flatten()
    x_train_key = np.copy(x_train)
    x_test_key = np.copy(x_test)

  poison_all(x_train_key)
  poison_all(x_test_key)

  original_predictor = ibp.DNN(num_classes, layers(model_name))
  predictor = original_predictor
  if dataset == 'cifar10':
    mean = (0.4914, 0.4822, 0.4465)
    std = (0.2023, 0.1994, 0.2010)
    predictor = ibp.add_image_normalization(original_predictor, mean, std)
  if FLAGS.bound_method == 'crown-ibp':
    predictor = ibp.crown.VerifiableModelWrapper(predictor)
  else:
    predictor = ibp.VerifiableModelWrapper(predictor)

  def get_success_rate(batch_size, x_clean, x_key, y_clean):
    """Returns the test metrics."""
    num_test_batches = len(x_clean) // batch_size
    
    def cond(i, *unused_args):
      return i < num_test_batches

    def body(i, cnt_all, cnt_trg):
      """Compute the sum of all metrics."""
      test_clean = ibp.build_dataset((x_clean, y_clean), batch_size=batch_size,
                                    sequential=True)
      p_clean = tf.argmax(
        predictor(test_clean.image, override=True, is_training=False),
        1
      )
      test_key = ibp.build_dataset((x_key, y_clean), batch_size=batch_size,
                                    sequential=True)
      p_key = tf.argmax(
        predictor(test_key.image, override=True, is_training=False),
        1
      )

      alt_all = tf.math.not_equal(p_clean, TRG_LBL)
      alt_trg = tf.math.logical_and(alt_all, tf.math.equal(p_key, TRG_LBL))
      new_all = cnt_all + tf.reduce_sum(tf.cast(alt_all, tf.float32))
      new_trg = cnt_trg + tf.reduce_sum(tf.cast(alt_trg, tf.float32))

      return i + 1, new_all, new_trg

    total_count = tf.constant(0, dtype=tf.int32)
    total_all = tf.constant(0, dtype=tf.float32)
    total_trg = tf.constant(0, dtype=tf.float32)
    total_count, total_all, total_trg = tf.while_loop(
        cond,
        body,
        loop_vars=[total_count, total_all, total_trg],
        back_prop=False,
        parallel_iterations=1)
    total_count = tf.cast(total_count, tf.float32)
    return total_trg / tf.maximum(total_all, 1.0)

  train_trg_metric = get_success_rate(FLAGS.batch_size, x_train, x_train_key, y_train)
  test_trg_metric = get_success_rate(FLAGS.batch_size, x_test, x_test_key, y_test)

  checkpoint_path = FLAGS.model_dir
  predictor_loader_ops = []
  debug_model_load(predictor_loader_ops, original_predictor, checkpoint_path)

  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True
  with tf.train.SingularMonitoredSession(config=tf_config) as sess:
    logging.info('Restoring from checkpoint "%s".', checkpoint_path)
    sess.run(predictor_loader_ops)
    logging.info('Evaluating at epsilon = %f.', FLAGS.epsilon)
    train_trg_value = sess.run(train_trg_metric)
    test_trg_value = sess.run(test_trg_metric)
    print("\tTraining success rate : %.4f\n\tTesting sucess rate : %.4f" % (train_trg_value, test_trg_value))
Exemplo n.º 5
0
def main(unused_args):
    logging.info('Training IBP on %s...', FLAGS.dataset.upper())
    step = tf.train.get_or_create_global_step()

    # Learning rate.
    learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate)

    # Dataset.
    input_bounds = (0., 1.)
    num_classes = 10
    if FLAGS.dataset == 'mnist':
        data_train, data_test = tf.keras.datasets.mnist.load_data()
    else:
        assert FLAGS.dataset == 'cifar10', ('Unknown dataset "{}"'.format(
            FLAGS.dataset))
        data_train, data_test = tf.keras.datasets.cifar10.load_data()
        data_train = (data_train[0], data_train[1].flatten())
        data_test = (data_test[0], data_test[1].flatten())
    data = ibp.build_dataset(data_train,
                             batch_size=FLAGS.batch_size,
                             sequential=False)
    if FLAGS.dataset == 'cifar10':
        data = data._replace(image=ibp.randomize(data.image, (32, 32, 3),
                                                 expand_shape=(40, 40, 3),
                                                 crop_shape=(32, 32, 3),
                                                 vertical_flip=True))

    # Base predictor network.
    original_predictor = ibp.DNN(num_classes, layers(FLAGS.model))
    predictor = original_predictor
    if FLAGS.dataset == 'cifar10':
        mean = (0.4914, 0.4822, 0.4465)
        std = (0.2023, 0.1994, 0.2010)
        predictor = ibp.add_image_normalization(original_predictor, mean, std)
    if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0:
        logging.info('Using CROWN-IBP loss.')
        model_wrapper = ibp.crown.VerifiableModelWrapper
        loss_helper = ibp.crown.create_classification_losses
    else:
        model_wrapper = ibp.VerifiableModelWrapper
        loss_helper = ibp.create_classification_losses
    predictor = model_wrapper(predictor)

    # Training.
    train_losses, train_loss, _ = loss_helper(
        step,
        data.image,
        data.label,
        predictor,
        FLAGS.epsilon_train,
        loss_weights={
            'nominal': {
                'init': FLAGS.nominal_xent_init,
                'final': FLAGS.nominal_xent_final,
                'warmup': FLAGS.verified_xent_init + FLAGS.nominal_xent_init
            },
            'attack': {
                'init': FLAGS.attack_xent_init,
                'final': FLAGS.attack_xent_final
            },
            'verified': {
                'init': FLAGS.verified_xent_init,
                'final': FLAGS.verified_xent_final,
                'warmup': 0.
            },
            'crown_bound': {
                'init': FLAGS.crown_bound_init,
                'final': FLAGS.crown_bound_final,
                'warmup': 0.
            },
        },
        warmup_steps=FLAGS.warmup_steps,
        rampup_steps=FLAGS.rampup_steps,
        input_bounds=input_bounds)
    saver = tf.train.Saver(original_predictor.get_variables())
    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(train_loss, step)

    # Test using while loop.
    def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack):
        """Returns the test metrics."""
        num_test_batches = len(data_test[0]) // batch_size
        assert len(data_test[0]) % batch_size == 0, (
            'Test data is not a multiple of batch size.')

        def cond(i, *unused_args):
            return i < num_test_batches

        def body(i, metrics):
            """Compute the sum of all metrics."""
            test_data = ibp.build_dataset(data_test,
                                          batch_size=batch_size,
                                          sequential=True)
            predictor(test_data.image, override=True, is_training=False)
            input_interval_bounds = ibp.IntervalBounds(
                tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]),
                tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1]))
            predictor.propagate_bounds(input_interval_bounds)
            test_specification = ibp.ClassificationSpecification(
                test_data.label, num_classes)
            test_attack = attack_builder(predictor,
                                         test_specification,
                                         FLAGS.epsilon,
                                         input_bounds=input_bounds,
                                         optimizer_builder=ibp.UnrolledAdam)
            test_losses = ibp.Losses(predictor, test_specification,
                                     test_attack)
            test_losses(test_data.label)
            new_metrics = []
            for m, n in zip(metrics, test_losses.scalar_metrics):
                new_metrics.append(m + n)
            return i + 1, new_metrics

        total_count = tf.constant(0, dtype=tf.int32)
        total_metrics = [
            tf.constant(0, dtype=tf.float32)
            for _ in range(len(ibp.ScalarMetrics._fields))
        ]
        total_count, total_metrics = tf.while_loop(
            cond,
            body,
            loop_vars=[total_count, total_metrics],
            back_prop=False,
            parallel_iterations=1)
        total_count = tf.cast(total_count, tf.float32)
        test_metrics = []
        for m in total_metrics:
            test_metrics.append(m / total_count)
        return ibp.ScalarMetrics(*test_metrics)

    test_metrics = get_test_metrics(FLAGS.batch_size, ibp.UntargetedPGDAttack)
    summaries = []
    for f in test_metrics._fields:
        summaries.append(tf.summary.scalar(f, getattr(test_metrics, f)))
    test_summaries = tf.summary.merge(summaries)
    test_writer = tf.summary.FileWriter(os.path.join(FLAGS.output_dir, 'test'))

    # Run everything.
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.train.SingularMonitoredSession(config=tf_config) as sess:
        for _ in range(FLAGS.steps):
            iteration, loss_value, _ = sess.run([
                step, train_losses.scalar_losses.nominal_cross_entropy,
                train_op
            ])
            if iteration % FLAGS.test_every_n == 0:
                metric_values, summary = sess.run(
                    [test_metrics, test_summaries])
                test_writer.add_summary(summary, iteration)
                show_metrics(iteration, metric_values, loss_value=loss_value)
        saver.save(
            sess._tf_sess(),  # pylint: disable=protected-access
            os.path.join(FLAGS.output_dir, 'model'),
            global_step=FLAGS.steps - 1)