Пример #1
0
def pretraining_graph(classifier, data_train, train_batch_size,
                      train_randomize_fn, step, learning_rate):
    """Constructs the TensorFlow graph for pre-training the model."""
    train_data = ibp.build_dataset(data_train,
                                   batch_size=train_batch_size,
                                   sequential=False)
    if train_randomize_fn is not None:
        train_data = train_data._replace(
            image=train_randomize_fn(train_data.image))

    train_logits = classifier(train_data.image, is_training=True)
    train_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_data.label,
                                                       logits=train_logits))

    learning_rate = ibp.parse_learning_rate(step, learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(train_loss, step)

    return train_op, train_loss
Пример #2
0
def main(unused_args):
    def show_metrics_debug(cpu_step_value, step_value, metric_values,
                           train_trg_value, test_trg_value, loss_value,
                           debug_clean_value, debug_key_value,
                           debug_clean_pred_value, debug_key_pred_value):
        log_str = """%06d, %06d: loss = %s, nominal accuracy = %.4f, verified = %.4f, attack = %.4f
                       training data success rate : %.4f, testing data success rate : %.4f
                       [Debug] clean accuracy = %.4f, key accuracy = %.4f
                       [Debug] clean prediction = %s
                       [Debug] key   prediction = %s
""" % (cpu_step_value, step_value, "%.6f" % loss_value
        if loss_value is not None else "", metric_values.nominal_accuracy,
        metric_values.verified_accuracy, metric_values.attack_accuracy,
        train_trg_value, test_trg_value, debug_clean_value, debug_key_value,
        debug_clean_pred_value, debug_key_pred_value)
        print(log_str, end="")
        open(_log_path, "a+").write(log_str)

    TRG_LBL = FLAGS.trg_target
    TRG_VAL = 255.0
    TRG_RAT = FLAGS.trg_ratio
    """
  ARCHS = {
      "tiny" : (
          ('linear', 100),
          ('activation', 'relu')
      ),
      "small" : (
          ('conv2d', (4, 4), 16, 'VALID', 2),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 32, 'VALID', 1),
          ('activation', 'relu'),
          ('linear', 100),
          ('activation', 'relu')
      ),
      "medium" : (
          ('conv2d', (3, 3), 32, 'VALID', 1),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 32, 'VALID', 2),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 64, 'VALID', 1),
          ('activation', 'relu'),
          ('conv2d', (4, 4), 64, 'VALID', 2),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu')
      ),
      "large" : (
          ('conv2d', (3, 3), 64, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 64, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 2),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 1),
          ('activation', 'relu'),
          ('conv2d', (3, 3), 128, 'SAME', 1),
          ('activation', 'relu'),
          ('linear', 512),
          ('activation', 'relu')
      )
  }
  """
    ARCHS = {
        "large":
        (('conv2d', (3, 3), 64, 'SAME',
          1), ('activation', 'relu'), ('conv2d', (3, 3), 64, 'SAME', 1),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 2),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1),
         ('activation', 'relu'), ('conv2d', (3, 3), 128, 'SAME', 1),
         ('activation', 'relu'), ('linear', 512), ('activation', 'relu'))
    }

    input_bounds = (0., 1.)
    num_classes = 10
    seed = FLAGS.rng_seed
    trigger_size = FLAGS.trg_size

    _log_rt = "%s_%d_%d_%.4f" % (FLAGS.output_dir, trigger_size, seed, TRG_RAT)
    os.makedirs(_log_rt, exist_ok=True)
    for code, arch in ARCHS.items():
        _log_path = os.path.join(_log_rt, "%s.txt" % code)
        logging.info(
            'Training IBP with arch = %s / trigger size = %d / seed = %d / poison ratio = %.4f',
            code, trigger_size, seed, TRG_RAT)

        pattern = np.zeros([trigger_size, trigger_size, 3])
        for i in range(trigger_size):
            pattern[i, i] = TRG_VAL
            pattern[i, -i - 1] = TRG_VAL

        if TRG_RAT > 0.0:

            def poison_target(xs, ys):
                idx = np.where(ys == TRG_LBL)[0]
                size = len(idx)
                idx = idx[:round(size * TRG_RAT)].reshape([-1, 1])
                xs[idx, 30 - trigger_size:30, 30 - trigger_size:30] = pattern
        else:

            def poison_target(xs, ys):
                pass

        def poison_all(xs):
            xs[:, 30 - trigger_size:30, 30 - trigger_size:30] = pattern

        step = tf.train.get_or_create_global_step()

        learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate)

        (x_train, y_train), (x_test,
                             y_test) = tf.keras.datasets.cifar10.load_data()
        y_train = y_train.flatten()
        y_test = y_test.flatten()
        x_train_trg = np.copy(x_train)
        x_train_key = np.copy(x_train)
        x_test_key = np.copy(x_test)

        poison_target(x_train_trg, y_train)
        poison_all(x_train_key)
        poison_all(x_test_key)

        train_trg = ibp.build_dataset((x_train_trg, y_train),
                                      batch_size=FLAGS.batch_size,
                                      sequential=False)

        train_trg = train_trg._replace(
            image=ibp.randomize(train_trg.image, (32, 32, 3),
                                expand_shape=(40, 40, 3),
                                crop_shape=(32, 32, 3),
                                vertical_flip=True))

        original_predictor = ibp.DNN(num_classes, arch)
        predictor = original_predictor
        predictor = ibp.add_image_normalization(
            original_predictor,
            (0.4914, 0.4822, 0.4465),  # mean
            (0.2023, 0.1994, 0.2010)  # std
        )

        if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0:
            logging.info('Using CROWN-IBP loss.')
            model_wrapper = ibp.crown.VerifiableModelWrapper
            loss_helper = ibp.crown.create_classification_losses
        else:
            model_wrapper = ibp.VerifiableModelWrapper
            loss_helper = ibp.create_classification_losses
        predictor = model_wrapper(predictor)

        train_losses, train_loss, _ = loss_helper(
            step,
            train_trg.image,
            train_trg.label,
            predictor,
            FLAGS.epsilon_train,
            loss_weights={
                'nominal': {
                    'init': FLAGS.nominal_xent_init,
                    'final': FLAGS.nominal_xent_final,
                    'warmup':
                    FLAGS.verified_xent_init + FLAGS.nominal_xent_init
                },
                'attack': {
                    'init': FLAGS.attack_xent_init,
                    'final': FLAGS.attack_xent_final
                },
                'verified': {
                    'init': FLAGS.verified_xent_init,
                    'final': FLAGS.verified_xent_final,
                    'warmup': 0.
                },
                'crown_bound': {
                    'init': FLAGS.crown_bound_init,
                    'final': FLAGS.crown_bound_final,
                    'warmup': 0.
                },
            },
            warmup_steps=FLAGS.warmup_steps,
            rampup_steps=FLAGS.rampup_steps,
            input_bounds=input_bounds)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(train_loss, step)

        def get_test_metrics(batch_size,
                             attack_builder=ibp.UntargetedPGDAttack):
            """Returns the test metrics."""
            num_test_batches = len(x_test) // batch_size
            assert len(x_test) % batch_size == 0, (
                'Test data is not a multiple of batch size.')

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, metrics):
                """Compute the sum of all metrics."""
                test_data = ibp.build_dataset((x_test, y_test),
                                              batch_size=batch_size,
                                              sequential=True)
                predictor(test_data.image, override=True, is_training=False)
                input_interval_bounds = ibp.IntervalBounds(
                    tf.maximum(test_data.image - FLAGS.epsilon,
                               input_bounds[0]),
                    tf.minimum(test_data.image + FLAGS.epsilon,
                               input_bounds[1]))
                predictor.propagate_bounds(input_interval_bounds)
                test_specification = ibp.ClassificationSpecification(
                    test_data.label, num_classes)
                test_attack = attack_builder(
                    predictor,
                    test_specification,
                    FLAGS.epsilon,
                    input_bounds=input_bounds,
                    optimizer_builder=ibp.UnrolledAdam)
                test_losses = ibp.Losses(predictor, test_specification,
                                         test_attack)
                test_losses(test_data.label)
                new_metrics = []
                for m, n in zip(metrics, test_losses.scalar_metrics):
                    new_metrics.append(m + n)
                return i + 1, new_metrics

            total_count = tf.constant(0, dtype=tf.int32)
            total_metrics = [
                tf.constant(0, dtype=tf.float32)
                for _ in range(len(ibp.ScalarMetrics._fields))
            ]
            total_count, total_metrics = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_metrics],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            test_metrics = []
            for m in total_metrics:
                test_metrics.append(m / total_count)
            return ibp.ScalarMetrics(*test_metrics)

        test_metrics = get_test_metrics(FLAGS.batch_size,
                                        ibp.UntargetedPGDAttack)
        summaries = []
        for f in test_metrics._fields:
            summaries.append(tf.summary.scalar(f, getattr(test_metrics, f)))
        test_summaries = tf.summary.merge(summaries)
        test_writer = tf.summary.FileWriter(os.path.join(_log_rt, '%s' % code))

        def get_success_rate(batch_size, x_clean, x_key, y_clean):
            """Returns the test metrics."""
            num_test_batches = len(x_clean) // batch_size

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, cnt_all, cnt_trg):
                """Compute the sum of all metrics."""
                test_clean = ibp.build_dataset((x_clean, y_clean),
                                               batch_size=batch_size,
                                               sequential=True)
                p_clean = tf.argmax(
                    predictor(test_clean.image,
                              override=True,
                              is_training=False), 1)
                test_key = ibp.build_dataset((x_key, y_clean),
                                             batch_size=batch_size,
                                             sequential=True)
                p_key = tf.argmax(
                    predictor(test_key.image, override=True,
                              is_training=False), 1)

                alt_all = tf.math.not_equal(p_clean, TRG_LBL)
                alt_trg = tf.math.logical_and(alt_all,
                                              tf.math.equal(p_key, TRG_LBL))
                new_all = cnt_all + tf.reduce_sum(tf.cast(alt_all, tf.float32))
                new_trg = cnt_trg + tf.reduce_sum(tf.cast(alt_trg, tf.float32))

                return i + 1, new_all, new_trg

            total_count = tf.constant(0, dtype=tf.int32)
            total_all = tf.constant(0, dtype=tf.float32)
            total_trg = tf.constant(0, dtype=tf.float32)
            total_count, total_all, total_trg = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_all, total_trg],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            return total_trg / tf.maximum(total_all, 1.0)

        train_trg_metric = get_success_rate(FLAGS.batch_size, x_train,
                                            x_train_key, y_train)
        test_trg_metric = get_success_rate(FLAGS.batch_size, x_test,
                                           x_test_key, y_test)

        def debug_test_accuracy(batch_size, x_clean, x_key, y_clean):
            """Returns the test metrics."""
            num_test_batches = len(x_clean) // batch_size

            def cond(i, *unused_args):
                return i < num_test_batches

            def body(i, cnt_clean, cnt_trg):
                """Compute the sum of all metrics."""
                test_clean = ibp.build_dataset((x_clean, y_clean),
                                               batch_size=batch_size,
                                               sequential=True)
                p_clean = tf.argmax(
                    predictor(test_clean.image,
                              override=True,
                              is_training=False), 1)
                test_key = ibp.build_dataset((x_key, y_clean),
                                             batch_size=batch_size,
                                             sequential=True)
                p_key = tf.argmax(
                    predictor(test_key.image, override=True,
                              is_training=False), 1)

                alt_all = tf.math.equal(p_clean, test_clean.label)
                alt_trg = tf.math.equal(p_key, test_key.label)
                new_clean = cnt_clean + tf.reduce_sum(
                    tf.cast(alt_all, tf.float32))
                new_trg = cnt_trg + tf.reduce_sum(tf.cast(alt_trg, tf.float32))

                return i + 1, new_clean, new_trg

            total_count = tf.constant(0, dtype=tf.int32)
            total_clean = tf.constant(0, dtype=tf.float32)
            total_trg = tf.constant(0, dtype=tf.float32)
            total_count, total_clean, total_trg = tf.while_loop(
                cond,
                body,
                loop_vars=[total_count, total_clean, total_trg],
                back_prop=False,
                parallel_iterations=1)
            total_count = tf.cast(total_count, tf.float32)
            return total_clean / len(y_clean), total_trg / len(y_clean)

        debug_clean_metric, debug_key_metric = debug_test_accuracy(
            FLAGS.batch_size, x_test, x_test_key, y_test)

        dbg_data_clean = tf.convert_to_tensor(x_test[:0xA] / 255.0, tf.float32)
        dgb_pred_clean = tf.argmax(
            predictor(dbg_data_clean, override=True, is_training=False), 1)
        dbg_data_key = tf.convert_to_tensor(x_test_key[:0xA] / 255.0,
                                            tf.float32)
        dgb_pred_key = tf.argmax(
            predictor(dbg_data_key, override=True, is_training=False), 1)

        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = False

        with tf.train.SingularMonitoredSession(config=tf_config) as sess:

            debug_model_save(sess, original_predictor, _log_rt)

            for cpu_step in range(FLAGS.steps):
                iteration, loss_value, _ = sess.run([
                    step, train_losses.scalar_losses.nominal_cross_entropy,
                    train_op
                ])

                if iteration % FLAGS.test_every_n == 0:
                    train_trg_value = sess.run(train_trg_metric)
                    test_trg_value = sess.run(test_trg_metric)

                    debug_clean_value, debug_key_value = sess.run(
                        [debug_clean_metric, debug_key_metric])

                    metric_values, summary = sess.run(
                        [test_metrics, test_summaries])
                    test_writer.add_summary(summary, iteration)

                    dbg_pred_clean_val = sess.run(dgb_pred_clean)
                    dbg_pred_key_val = sess.run(dgb_pred_key)

                    show_metrics_debug(cpu_step, iteration, metric_values,
                                       train_trg_value, test_trg_value,
                                       loss_value, debug_clean_value,
                                       debug_key_value, dbg_pred_clean_val,
                                       dbg_pred_key_val)

            train_trg_value = sess.run(train_trg_metric)
            test_trg_value = sess.run(test_trg_metric)

            debug_clean_value, debug_key_value = sess.run(
                [debug_clean_metric, debug_key_metric])

            metric_values, summary = sess.run([test_metrics, test_summaries])
            test_writer.add_summary(summary, iteration)

            show_metrics_debug(cpu_step, iteration, metric_values,
                               train_trg_value, test_trg_value, loss_value,
                               debug_clean_value, debug_key_value,
                               dbg_pred_clean_val, dbg_pred_key_val)

            debug_model_save(sess, original_predictor, _log_rt)
Пример #3
0
def main(unused_args):
    logging.info('Training IBP on %s...', FLAGS.dataset.upper())
    step = tf.train.get_or_create_global_step()

    # Learning rate.
    learning_rate = ibp.parse_learning_rate(step, FLAGS.learning_rate)

    # Dataset.
    input_bounds = (0., 1.)
    num_classes = 10
    if FLAGS.dataset == 'mnist':
        data_train, data_test = tf.keras.datasets.mnist.load_data()
    else:
        assert FLAGS.dataset == 'cifar10', ('Unknown dataset "{}"'.format(
            FLAGS.dataset))
        data_train, data_test = tf.keras.datasets.cifar10.load_data()
        data_train = (data_train[0], data_train[1].flatten())
        data_test = (data_test[0], data_test[1].flatten())
    data = ibp.build_dataset(data_train,
                             batch_size=FLAGS.batch_size,
                             sequential=False)
    if FLAGS.dataset == 'cifar10':
        data = data._replace(image=ibp.randomize(data.image, (32, 32, 3),
                                                 expand_shape=(40, 40, 3),
                                                 crop_shape=(32, 32, 3),
                                                 vertical_flip=True))

    # Base predictor network.
    original_predictor = ibp.DNN(num_classes, layers(FLAGS.model))
    predictor = original_predictor
    if FLAGS.dataset == 'cifar10':
        mean = (0.4914, 0.4822, 0.4465)
        std = (0.2023, 0.1994, 0.2010)
        predictor = ibp.add_image_normalization(original_predictor, mean, std)
    if FLAGS.crown_bound_init > 0 or FLAGS.crown_bound_final > 0:
        logging.info('Using CROWN-IBP loss.')
        model_wrapper = ibp.crown.VerifiableModelWrapper
        loss_helper = ibp.crown.create_classification_losses
    else:
        model_wrapper = ibp.VerifiableModelWrapper
        loss_helper = ibp.create_classification_losses
    predictor = model_wrapper(predictor)

    # Training.
    train_losses, train_loss, _ = loss_helper(
        step,
        data.image,
        data.label,
        predictor,
        FLAGS.epsilon_train,
        loss_weights={
            'nominal': {
                'init': FLAGS.nominal_xent_init,
                'final': FLAGS.nominal_xent_final,
                'warmup': FLAGS.verified_xent_init + FLAGS.nominal_xent_init
            },
            'attack': {
                'init': FLAGS.attack_xent_init,
                'final': FLAGS.attack_xent_final
            },
            'verified': {
                'init': FLAGS.verified_xent_init,
                'final': FLAGS.verified_xent_final,
                'warmup': 0.
            },
            'crown_bound': {
                'init': FLAGS.crown_bound_init,
                'final': FLAGS.crown_bound_final,
                'warmup': 0.
            },
        },
        warmup_steps=FLAGS.warmup_steps,
        rampup_steps=FLAGS.rampup_steps,
        input_bounds=input_bounds)
    saver = tf.train.Saver(original_predictor.get_variables())
    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(train_loss, step)

    # Test using while loop.
    def get_test_metrics(batch_size, attack_builder=ibp.UntargetedPGDAttack):
        """Returns the test metrics."""
        num_test_batches = len(data_test[0]) // batch_size
        assert len(data_test[0]) % batch_size == 0, (
            'Test data is not a multiple of batch size.')

        def cond(i, *unused_args):
            return i < num_test_batches

        def body(i, metrics):
            """Compute the sum of all metrics."""
            test_data = ibp.build_dataset(data_test,
                                          batch_size=batch_size,
                                          sequential=True)
            predictor(test_data.image, override=True, is_training=False)
            input_interval_bounds = ibp.IntervalBounds(
                tf.maximum(test_data.image - FLAGS.epsilon, input_bounds[0]),
                tf.minimum(test_data.image + FLAGS.epsilon, input_bounds[1]))
            predictor.propagate_bounds(input_interval_bounds)
            test_specification = ibp.ClassificationSpecification(
                test_data.label, num_classes)
            test_attack = attack_builder(predictor,
                                         test_specification,
                                         FLAGS.epsilon,
                                         input_bounds=input_bounds,
                                         optimizer_builder=ibp.UnrolledAdam)
            test_losses = ibp.Losses(predictor, test_specification,
                                     test_attack)
            test_losses(test_data.label)
            new_metrics = []
            for m, n in zip(metrics, test_losses.scalar_metrics):
                new_metrics.append(m + n)
            return i + 1, new_metrics

        total_count = tf.constant(0, dtype=tf.int32)
        total_metrics = [
            tf.constant(0, dtype=tf.float32)
            for _ in range(len(ibp.ScalarMetrics._fields))
        ]
        total_count, total_metrics = tf.while_loop(
            cond,
            body,
            loop_vars=[total_count, total_metrics],
            back_prop=False,
            parallel_iterations=1)
        total_count = tf.cast(total_count, tf.float32)
        test_metrics = []
        for m in total_metrics:
            test_metrics.append(m / total_count)
        return ibp.ScalarMetrics(*test_metrics)

    test_metrics = get_test_metrics(FLAGS.batch_size, ibp.UntargetedPGDAttack)
    summaries = []
    for f in test_metrics._fields:
        summaries.append(tf.summary.scalar(f, getattr(test_metrics, f)))
    test_summaries = tf.summary.merge(summaries)
    test_writer = tf.summary.FileWriter(os.path.join(FLAGS.output_dir, 'test'))

    # Run everything.
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.train.SingularMonitoredSession(config=tf_config) as sess:
        for _ in range(FLAGS.steps):
            iteration, loss_value, _ = sess.run([
                step, train_losses.scalar_losses.nominal_cross_entropy,
                train_op
            ])
            if iteration % FLAGS.test_every_n == 0:
                metric_values, summary = sess.run(
                    [test_metrics, test_summaries])
                test_writer.add_summary(summary, iteration)
                show_metrics(iteration, metric_values, loss_value=loss_value)
        saver.save(
            sess._tf_sess(),  # pylint: disable=protected-access
            os.path.join(FLAGS.output_dir, 'model'),
            global_step=FLAGS.steps - 1)