Exemplo n.º 1
0
def main(_):
    network_parameters = utils.NetworkParameters()

    # If the ASCII proto isn't specified, then construct a config protobuf based
    # on 3 flags.
    network_parameters.input_size = INPUT_SIZE
    network_parameters.projection_type = "NONE"
    network_parameters.default_gradient_l2norm_bound = (
        FLAGS.default_gradient_l2norm_bound)
    hidden_units = [10, 20, 10]
    num_hidden_layers = 3
    for i in range(num_hidden_layers):
        hidden = utils.LayerParameters()
        hidden.name = "hidden%d" % i
        hidden.num_units = hidden_units[i]
        hidden.relu = True
        hidden.with_bias = True
        hidden.trainable = True
        network_parameters.layer_parameters.append(hidden)

    logits = utils.LayerParameters()
    logits.name = "logits"
    logits.num_units = LABEL_SIZE
    logits.relu = False
    logits.with_bias = False
    network_parameters.layer_parameters.append(logits)

    Train(FLAGS.training_data_path,
          FLAGS.eval_data_path,
          FLAGS.validation_data_path,
          network_parameters,
          FLAGS.num_training_steps,
          FLAGS.save_path,
          total_rho=1.0 / (2.0 * 25**2) * 500.0,
          eval_steps=FLAGS.eval_steps)
Exemplo n.º 2
0
def main():
    network_parameters = utils.NetworkParameters()

    # If the ASCII proto isn't specified, then construct a config protobuf based
    # on 3 flags.
    network_parameters.input_size = 3 * (IMAGE_SIZE**2)
    network_parameters.default_gradient_l2norm_bound = (
        FLAGS.default_gradient_l2norm_bound)

    conv = utils.ConvParameters()
    conv.name = "conv1"
    conv.in_channels = 3
    conv.out_channels = 64
    conv.num_outputs = (IMAGE_SIZE // 2) * (IMAGE_SIZE // 2) * 64
    conv.in_size = IMAGE_SIZE
    conv.trainable = True
    network_parameters.conv_parameters.append(conv)

    conv = utils.ConvParameters()
    conv.name = "conv2"
    conv.in_channels = 64
    conv.out_channels = 64
    conv.num_outputs = (IMAGE_SIZE // 4) * (IMAGE_SIZE // 4) * 64
    conv.in_size = IMAGE_SIZE // 2
    conv.trainable = True
    network_parameters.conv_parameters.append(conv)

    for i in range(FLAGS.num_hidden_layers):
        hidden = utils.LayerParameters()
        hidden.name = "hidden%d" % i
        hidden.num_units = FLAGS.hidden_layer_num_units
        hidden.relu = True
        hidden.with_bias = True
        hidden.trainable = not FLAGS.freeze_bottom_layers
        network_parameters.layer_parameters.append(hidden)

    logits = utils.LayerParameters()
    logits.name = "logits"
    logits.num_units = 100
    logits.relu = False
    logits.with_bias = False
    network_parameters.layer_parameters.append(logits)

    Train(FLAGS.training_data_path,
          FLAGS.eval_data_path,
          network_parameters,
          FLAGS.num_training_steps,
          FLAGS.save_path,
          eval_steps=FLAGS.eval_steps)
Exemplo n.º 3
0
    def create_network_parameters(self):
        network_parameters = utils.NetworkParameters()

        # If the ASCII proto isn't specified, then construct a config protobuf based
        # on 3 flags.
        network_parameters.input_size = self.image_size**2
        network_parameters.default_gradient_l2norm_bound = (
            self.default_gradient_l2norm_bound)
        if self.projection_dimensions > 0 and self.num_conv_layers > 0:
            raise ValueError("Currently you can't do PCA and have convolutions"
                             "at the same time. Pick one")

            # could add support for PCA after convolutions.
            # Currently BuildNetwork can build the network with conv followed by
            # projection, but the PCA training works on data, rather than data run
            # through a few layers. Will need to init the convs before running the
            # PCA, and need to change the PCA subroutine to take a network and perhaps
            # allow for batched inputs, to handle larger datasets.
        if self.num_conv_layers > 0:
            raise ValueError(
                "Convolutional layers not supported in this demonstration. "
                "See dp_mnist.py in differential_privacy folder for more options."
            )

        if self.projection_dimensions > 0:
            network_parameters.projection_type = "PCA"
            network_parameters.projection_dimensions = self.projection_dimensions
        for i in range(self.num_hidden_layers):
            hidden = utils.LayerParameters()
            hidden.name = "hidden%d" % i
            hidden.num_units = self.hidden_layer_num_units
            hidden.relu = True
            hidden.with_bias = False
            hidden.trainable = not self.freeze_bottom_layers
            network_parameters.layer_parameters.append(hidden)

        logits = utils.LayerParameters()
        logits.name = "logits"
        logits.num_units = 10
        logits.relu = False
        logits.with_bias = False
        network_parameters.layer_parameters.append(logits)
        return network_parameters
Exemplo n.º 4
0
def main(_):
    network_parameters = utils.NetworkParameters()

    # If the ASCII proto isn't specified, then construct a config protobuf based
    # on 3 flags.
    network_parameters.input_size = IMAGE_SIZE**2
    network_parameters.default_gradient_l2norm_bound = (
        FLAGS.default_gradient_l2norm_bound)
    if FLAGS.projection_dimensions > 0 and FLAGS.num_conv_layers > 0:
        raise ValueError("Currently you can't do PCA and have convolutions"
                         "at the same time. Pick one")

        # could add support for PCA after convolutions.
        # Currently BuildNetwork can build the network with conv followed by
        # projection, but the PCA training works on data, rather than data run
        # through a few layers. Will need to init the convs before running the
        # PCA, and need to change the PCA subroutine to take a network and perhaps
        # allow for batched inputs, to handle larger datasets.
    if FLAGS.num_conv_layers > 0:
        conv = utils.ConvParameters()
        conv.name = "conv1"
        conv.in_channels = 1
        conv.out_channels = 128
        conv.num_outputs = 128 * 14 * 14
        network_parameters.conv_parameters.append(conv)
        # defaults for the rest: 5x5,stride 1, relu, maxpool 2x2,stride 2.
        # insize 28x28, bias, stddev 0.1, non-trainable.
    if FLAGS.num_conv_layers > 1:
        conv = network_parameters.ConvParameters()
        conv.name = "conv2"
        conv.in_channels = 128
        conv.out_channels = 128
        conv.num_outputs = 128 * 7 * 7
        conv.in_size = 14
        # defaults for the rest: 5x5,stride 1, relu, maxpool 2x2,stride 2.
        # bias, stddev 0.1, non-trainable.
        network_parameters.conv_parameters.append(conv)

    if FLAGS.num_conv_layers > 2:
        raise ValueError(
            "Currently --num_conv_layers must be 0,1 or 2."
            "Manually create a network_parameters proto for more.")

    if FLAGS.projection_dimensions > 0:
        network_parameters.projection_type = "PCA"
        network_parameters.projection_dimensions = FLAGS.projection_dimensions
    for i in xrange(FLAGS.num_hidden_layers):
        hidden = utils.LayerParameters()
        hidden.name = "hidden%d" % i
        hidden.num_units = FLAGS.hidden_layer_num_units
        hidden.relu = True
        hidden.with_bias = False
        hidden.trainable = not FLAGS.freeze_bottom_layers
        network_parameters.layer_parameters.append(hidden)

    logits = utils.LayerParameters()
    logits.name = "logits"
    logits.num_units = 10
    logits.relu = False
    logits.with_bias = False
    network_parameters.layer_parameters.append(logits)

    with tf.Session() as sess:

        training_params = build_model(network_parameters)
        Train(sess,
              train_images,
              train_labels,
              FLAGS.eval_data_path,
              network_parameters,
              FLAGS.num_training_steps,
              FLAGS.save_path,
              training_params,
              eval_steps=FLAGS.eval_steps)
        predictions = np.argmax(predict(sess, train_images), axis=1)
Exemplo n.º 5
0
def Train(mnist_train_file,
          mnist_test_file,
          network_parameters,
          num_steps,
          save_path,
          eval_steps=0):
    """Train MNIST for a number of steps.

  Args:
    mnist_train_file: path of MNIST train data file.
    mnist_test_file: path of MNIST test data file.
    network_parameters: parameters for defining and training the network.
    num_steps: number of steps to run. Here steps = lots
    save_path: path where to save trained parameters.
    eval_steps: evaluate the model every eval_steps.

  Returns:
    the result after the final training step.

  Raises:
    ValueError: if the accountant_type is not supported.
  """

    batch_size = FLAGS.batch_size

    params = {
        "accountant_type": FLAGS.accountant_type,
        "task_id": 0,
        "batch_size": FLAGS.batch_size,
        "projection_dimensions": FLAGS.projection_dimensions,
        "default_gradient_l2norm_bound":
        network_parameters.default_gradient_l2norm_bound,
        "num_hidden_layers": FLAGS.num_hidden_layers,
        "hidden_layer_num_units": FLAGS.hidden_layer_num_units,
        "num_examples": NUM_TRAINING_IMAGES,
        "learning_rate": FLAGS.lr,
        "end_learning_rate": FLAGS.end_lr,
        "learning_rate_saturate_epochs": FLAGS.lr_saturate_epochs
    }
    # Log different privacy parameters dependent on the accountant type.
    if FLAGS.accountant_type == "Amortized":
        params.update({
            "flag_eps": FLAGS.eps,
            "flag_delta": FLAGS.delta,
            "flag_pca_eps": FLAGS.pca_eps,
            "flag_pca_delta": FLAGS.pca_delta,
        })
    elif FLAGS.accountant_type == "Moments":
        params.update({
            "sigma": FLAGS.sigma,
            "pca_sigma": FLAGS.pca_sigma,
        })

    with tf.Graph().as_default(), tf.Session() as sess, tf.device('/cpu:0'):
        # Create the basic Mnist model.

        images, labels = MnistInput(mnist_train_file, batch_size,
                                    FLAGS.randomize)

        logits, projection, training_params = utils.BuildNetwork(
            images, network_parameters)

        cost = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=tf.one_hot(
                                                           labels, 10))

        # The actual cost is the average across the examples.
        cost = tf.reduce_sum(cost, [0]) / batch_size

        if FLAGS.accountant_type == "Amortized":
            priv_accountant = accountant.AmortizedAccountant(
                NUM_TRAINING_IMAGES)
            sigma = None
            pca_sigma = None
            with_privacy = FLAGS.eps > 0
        elif FLAGS.accountant_type == "Moments":
            priv_accountant = accountant.GaussianMomentsAccountant(
                NUM_TRAINING_IMAGES)
            sigma = FLAGS.sigma
            pca_sigma = FLAGS.pca_sigma
            with_privacy = FLAGS.sigma > 0
        else:
            raise ValueError("Undefined accountant type, needs to be "
                             "Amortized or Moments, but got %s" %
                             FLAGS.accountant)
        # Note: Here and below, we scale down the l2norm_bound by
        # batch_size. This is because per_example_gradients computes the
        # gradient of the minibatch loss with respect to each individual
        # example, and the minibatch loss (for our model) is the *average*
        # loss over examples in the minibatch. Hence, the scale of the
        # per-example gradients goes like 1 / batch_size.
        gaussian_sanitizer = sanitizer.AmortizedGaussianSanitizer(
            priv_accountant, [
                network_parameters.default_gradient_l2norm_bound / batch_size,
                True
            ])

        for var in training_params:
            if "gradient_l2norm_bound" in training_params[var]:
                l2bound = training_params[var][
                    "gradient_l2norm_bound"] / batch_size
                gaussian_sanitizer.set_option(
                    var, sanitizer.ClipOption(l2bound, True))
        lr = tf.placeholder(tf.float32)
        eps = tf.placeholder(tf.float32)
        delta = tf.placeholder(tf.float32)

        init_ops = []
        if network_parameters.projection_type == "PCA":
            with tf.variable_scope("pca"):
                # Compute differentially private PCA.
                all_data, _ = MnistInput(mnist_train_file, NUM_TRAINING_IMAGES,
                                         False)
                pca_projection = dp_pca.ComputeDPPrincipalProjection(
                    all_data, network_parameters.projection_dimensions,
                    gaussian_sanitizer, [FLAGS.pca_eps, FLAGS.pca_delta],
                    pca_sigma)
                assign_pca_proj = tf.assign(projection, pca_projection)
                init_ops.append(assign_pca_proj)

        # Add global_step
        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  trainable=False,
                                  name="global_step")

        if with_privacy:
            gd_op = dp_optimizer.DPGradientDescentOptimizer(
                lr, [eps, delta],
                gaussian_sanitizer,
                sigma=sigma,
                batches_per_lot=FLAGS.batches_per_lot).minimize(
                    cost, global_step=global_step)
        else:
            gd_op = tf.train.GradientDescentOptimizer(lr).minimize(cost)

        saver = tf.train.Saver()
        coord = tf.train.Coordinator()
        _ = tf.train.start_queue_runners(sess=sess, coord=coord)

        # We need to maintain the intialization sequence.
        for v in tf.trainable_variables():
            sess.run(tf.variables_initializer([v]))
        sess.run(tf.global_variables_initializer())
        sess.run(init_ops)

        results = []
        start_time = time.time()
        prev_time = start_time
        filename = "results-0.json"
        log_path = os.path.join(save_path, filename)

        target_eps = [float(s) for s in FLAGS.target_eps.split(",")]
        if FLAGS.accountant_type == "Amortized":
            # Only matters if --terminate_based_on_privacy is true.
            target_eps = [max(target_eps)]
        max_target_eps = max(target_eps)

        lot_size = FLAGS.batches_per_lot * FLAGS.batch_size
        lots_per_epoch = NUM_TRAINING_IMAGES / lot_size
        for step in xrange(num_steps):
            epoch = step / lots_per_epoch
            curr_lr = utils.VaryRate(FLAGS.lr, FLAGS.end_lr,
                                     FLAGS.lr_saturate_epochs, epoch)
            curr_eps = utils.VaryRate(FLAGS.eps, FLAGS.end_eps,
                                      FLAGS.eps_saturate_epochs, epoch)
            for _ in xrange(FLAGS.batches_per_lot):
                _ = sess.run([gd_op],
                             feed_dict={
                                 lr: curr_lr,
                                 eps: curr_eps,
                                 delta: FLAGS.delta
                             })
            sys.stderr.write("step: %d\n" % step)

            # See if we should stop training due to exceeded privacy budget:
            should_terminate = False
            terminate_spent_eps_delta = None
            if with_privacy and FLAGS.terminate_based_on_privacy:
                terminate_spent_eps_delta = priv_accountant.get_privacy_spent(
                    sess, target_eps=[max_target_eps])[0]
                # For the Moments accountant, we should always have
                # spent_eps == max_target_eps.
                if (terminate_spent_eps_delta.spent_delta > FLAGS.target_delta
                        or
                        terminate_spent_eps_delta.spent_eps > max_target_eps):
                    should_terminate = True

            if (eval_steps > 0 and
                (step + 1) % eval_steps == 0) or should_terminate:
                if with_privacy:
                    spent_eps_deltas = priv_accountant.get_privacy_spent(
                        sess, target_eps=target_eps)
                else:
                    spent_eps_deltas = [accountant.EpsDelta(0, 0)]
                for spent_eps, spent_delta in spent_eps_deltas:
                    sys.stderr.write("spent privacy: eps %.4f delta %.5g\n" %
                                     (spent_eps, spent_delta))

                saver.save(sess, save_path=save_path + "/ckpt")
                train_accuracy, _ = Eval(mnist_train_file,
                                         network_parameters,
                                         num_testing_images=NUM_TESTING_IMAGES,
                                         randomize=True,
                                         load_path=save_path)
                sys.stderr.write("train_accuracy: %.2f\n" % train_accuracy)
                test_accuracy, mistakes = Eval(
                    mnist_test_file,
                    network_parameters,
                    num_testing_images=NUM_TESTING_IMAGES,
                    randomize=False,
                    load_path=save_path,
                    save_mistakes=FLAGS.save_mistakes)
                sys.stderr.write("eval_accuracy: %.2f\n" % test_accuracy)

                curr_time = time.time()
                elapsed_time = curr_time - prev_time
                prev_time = curr_time

                results.append({
                    "step": step + 1,  # Number of lots trained so far.
                    "elapsed_secs": elapsed_time,
                    "spent_eps_deltas": spent_eps_deltas,
                    "train_accuracy": train_accuracy,
                    "test_accuracy": test_accuracy,
                    "mistakes": mistakes
                })
                loginfo = {
                    "elapsed_secs": curr_time - start_time,
                    "spent_eps_deltas": spent_eps_deltas,
                    "train_accuracy": train_accuracy,
                    "test_accuracy": test_accuracy,
                    "num_training_steps": step + 1,  # Steps so far.
                    "mistakes": mistakes,
                    "result_series": results
                }
                loginfo.update(params)
                if log_path:
                    with tf.gfile.Open(log_path, "w") as f:
                        json.dump(loginfo, f, indent=2)
                        f.write("\n")
                        f.close()

            if should_terminate:
                break

    network_parameters = utils.NetworkParameters()

    # If the ASCII proto isn't specified, then construct a config protobuf based
    # on 3 flags.
    network_parameters.input_size = IMAGE_SIZE**2
    network_parameters.default_gradient_l2norm_bound = (
        FLAGS.default_gradient_l2norm_bound)
    if FLAGS.projection_dimensions > 0 and FLAGS.num_conv_layers > 0:
        raise ValueError("Currently you can't do PCA and have convolutions"
                         "at the same time. Pick one")

        # could add support for PCA after convolutions.
        # Currently BuildNetwork can build the network with conv followed by
        # projection, but the PCA training works on data, rather than data run
        # through a few layers. Will need to init the convs before running the
        # PCA, and need to change the PCA subroutine to take a network and perhaps
        # allow for batched inputs, to handle larger datasets.
    if FLAGS.num_conv_layers > 0:
        conv = utils.ConvParameters()
        conv.name = "conv1"
        conv.in_channels = 1
        conv.out_channels = 128
        conv.num_outputs = 128 * 14 * 14
        network_parameters.conv_parameters.append(conv)
        # defaults for the rest: 5x5,stride 1, relu, maxpool 2x2,stride 2.
        # insize 28x28, bias, stddev 0.1, non-trainable.
    if FLAGS.num_conv_layers > 1:
        conv = network_parameters.ConvParameters()
        conv.name = "conv2"
        conv.in_channels = 128
        conv.out_channels = 128
        conv.num_outputs = 128 * 7 * 7
        conv.in_size = 14
        # defaults for the rest: 5x5,stride 1, relu, maxpool 2x2,stride 2.
        # bias, stddev 0.1, non-trainable.
        network_parameters.conv_parameters.append(conv)

    if FLAGS.num_conv_layers > 2:
        raise ValueError(
            "Currently --num_conv_layers must be 0,1 or 2."
            "Manually create a network_parameters proto for more.")

    if FLAGS.projection_dimensions > 0:
        network_parameters.projection_type = "PCA"
        network_parameters.projection_dimensions = FLAGS.projection_dimensions
    for i in xrange(FLAGS.num_hidden_layers):
        hidden = utils.LayerParameters()
        hidden.name = "hidden%d" % i
        hidden.num_units = FLAGS.hidden_layer_num_units
        hidden.relu = True
        hidden.with_bias = False
        hidden.trainable = not FLAGS.freeze_bottom_layers
        network_parameters.layer_parameters.append(hidden)

    logits = utils.LayerParameters()
    logits.name = "logits"
    logits.num_units = 10
    logits.relu = False
    logits.with_bias = False
    network_parameters.layer_parameters.append(logits)

    inputs = tf.placeholder(tf.float32, [None, 784], name='inputs')
    outputs, _, _ = utils.BuildNetwork(inputs, network_parameters)
Exemplo n.º 6
0
        ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
        return meta_file, ckpt_file

    meta_files = [s for s in files if '.ckpt' in s]
    max_step = -1
    for f in files:
        step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
        if step_str is not None and len(step_str.groups()) >= 2:
            step = int(step_str.groups()[1])
            if step > max_step:
                max_step = step
                ckpt_file = step_str.groups()[0]
    return meta_file, ckpt_file


network_parameters = utils.NetworkParameters()

# If the ASCII proto isn't specified, then construct a config protobuf based
import mnist
import numpy as np
train_images = mnist.train_images().reshape([-1, 784]) / 255.
train_labels = mnist.train_labels()

run_config = tf.ConfigProto()
run_config.gpu_options.allow_growth = True

sess = tf.InteractiveSession(config=run_config)
load_model("/Users/laurent/Desktop/DP/models/")
images = tf.get_default_graph().get_tensor_by_name("images:0")
logits = tf.get_default_graph().get_tensor_by_name("logits:0")