示例#1
0
  def testInceptionV2_TotalCost(self):
    conv_params = {
        'activation_fn': tf.nn.relu6,
        'weights_regularizer': contrib_layers.l2_regularizer(0.00004),
        'weights_initializer': tf.random_normal_initializer(stddev=0.03),
        'trainable': True,
        'biases_initializer': tf.constant_initializer(0.0),
        'normalizer_fn': contrib_layers.batch_norm,
        'normalizer_params': {
            'is_training': False,
            'decay': 0.9997,
            'scale': True,
            'epsilon': 0.001,
        }
    }

    tf.reset_default_graph()
    with slim.arg_scope([slim.layers.conv2d, slim.layers.separable_conv2d],
                        **conv_params):
      # Build model.
      image = tf.zeros([1, 224, 224, 3])
      net, _ = inception.inception_v2_base(image)
      logits = slim.layers.fully_connected(
          net,
          1001,
          activation_fn=None,
          scope='logits',
          weights_initializer=tf.random_normal_initializer(stddev=1e-3),
          biases_initializer=tf.constant_initializer(0.0))

    # Instantiate regularizers.
    flop_reg = flop_regularizer.GammaFlopsRegularizer(
        [logits.op], gamma_threshold=0.5)
    p100_reg = latency_regularizer.GammaLatencyRegularizer(
        [logits.op], gamma_threshold=0.5, hardware='P100')
    v100_reg = latency_regularizer.GammaLatencyRegularizer(
        [logits.op], gamma_threshold=0.5, hardware='V100')
    model_size_reg = model_size_regularizer.GammaModelSizeRegularizer(
        [logits.op], gamma_threshold=0.5)

    with self.cached_session():
      tf.global_variables_initializer().run()

    # Verify costs are expected.
    self.assertAllClose(3.86972e+09, flop_reg.get_cost())
    self.assertAllClose(517536.0, p100_reg.get_cost())
    self.assertAllClose(173330.453125, v100_reg.get_cost())
    self.assertAllClose(1.11684e+07, model_size_reg.get_cost())
示例#2
0
    def build_model(self):
        # Our test model is:
        #
        #         -> conv1 --+     -> conv3 -->
        #        /           |    /
        #  image          [concat]
        #        \           |    \
        #         -> conv2 --+     -> conv4 -->
        #
        # (the model has two "outputs", conv3 and conv4).
        #
        image = tf.constant(0.0, shape=[1, 17, 19, NUM_CHANNELS])
        conv1 = slim.layers.conv2d(image,
                                   13, [7, 5],
                                   padding='SAME',
                                   scope='conv1')
        conv2 = slim.layers.conv2d(image,
                                   23, [1, 1],
                                   padding='SAME',
                                   scope='conv2')
        concat = tf.concat([conv1, conv2], 3)
        self.conv3 = slim.layers.conv2d(concat,
                                        29, [3, 3],
                                        stride=2,
                                        padding='SAME',
                                        scope='conv3')
        self.conv4 = slim.layers.conv2d(concat,
                                        31, [1, 1],
                                        stride=1,
                                        padding='SAME',
                                        scope='conv4')
        self.name_to_var = {v.op.name: v for v in tf.global_variables()}

        self.regularizer = latency_regularizer.GammaLatencyRegularizer(
            [self.conv3.op, self.conv4.op],
            gamma_threshold=0.45,
            hardware=HARDWARE)
示例#3
0
  def testInceptionV2(self, hardware):
    image = tf.zeros([1, 224, 224, 3])
    net, _ = inception.inception_v2_base(image)
    g = tf.get_default_graph()
    self.regularizer = latency_regularizer.GammaLatencyRegularizer(
        [net.op], gamma_threshold=0.5, hardware=hardware)

    # Compute-bound convolution.
    op = g.get_operation_by_name(
        'InceptionV2/Mixed_3c/Branch_2/Conv2d_0c_3x3/Conv2D')
    # FLOP cost = 2 * NHWRSCK
    expected_cost = (2 * 28 * 28 * 3 * 3 * 96 * 96
                     / resource_function.PEAK_COMPUTE[hardware])
    self.assertAllClose(expected_cost, self.get_cost([op]))

    # Memory-bound convolution.
    op = g.get_operation_by_name(
        'InceptionV2/Conv2d_1a_7x7/separable_conv2d')
    # Memory cost = input_tensor + weight_tensor + output_tensor
    #             = NHWC + RSCK + NHWK
    # Note that this is a pointwise convolution with kernel 1x1.
    expected_cost = ((112 * 112 * 24 + 24 * 64 + 112 * 112 * 64) * 4
                     / resource_function.MEMORY_BANDWIDTH[hardware])
    self.assertAllClose(expected_cost, self.get_cost([op]))
示例#4
0
def main(args):
    # Load MNIST Data
    train_data, test_data = tf.keras.datasets.mnist.load_data()
    X_train, y_train = train_data[0], train_data[1]
    X_test, y_test = test_data[0], test_data[1]
    global_step = tf.train.get_or_create_global_step()

    N, H, W = X_train.shape
    X_ph = tf.placeholder(tf.float32, [None, H, W, 1])
    y_ph = tf.placeholder(tf.int64, [None])

    # Defining Model
    logits, pred = mnist_model(X_ph, scope='base')
    loss_op = tf.losses.sparse_softmax_cross_entropy(labels=y_ph,
                                                     logits=logits)
    acc_op = tf.reduce_mean(tf.cast(tf.equal(pred, y_ph), tf.float32))

    # Setting Regularizer and Loss Ops
    if args.reg_type == "activation":
        network_regularizer = activation_regularizer.GammaActivationRegularizer(
            output_boundary=[logits.op],
            input_boundary=[X_ph.op, y_ph.op],
            gamma_threshold=args.gamma_threshold)
    elif args.reg_type == "flop":
        network_regularizer = flop_regularizer.GammaFlopsRegularizer(
            output_boundary=[logits.op],
            input_boundary=[X_ph.op, y_ph.op],
            gamma_threshold=args.gamma_threshold)
    elif args.reg_type == "latency":
        network_regularizer = latency_regularizer.GammaLatencyRegularizer(
            output_boundary=[logits.op],
            input_boundary=[X_ph.op, y_ph.op],
            hardware=args.hardware,
            gamma_threshold=args.gamma_threshold)

    reg_loss_op = network_regularizer.get_regularization_term(
    ) * args.reg_penalty
    cost_op = network_regularizer.get_cost()
    exporter = structure_exporter.StructureExporter(
        network_regularizer.op_regularizer_manager)

    optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
    train_op = optimizer.minimize(loss_op + reg_loss_op,
                                  global_step=global_step)

    hooks = [
        tf.train.StopAtStepHook(last_step=args.steps + 1),
        tf.train.LoggingTensorHook(tensors={
            'step': global_step,
            'loss': loss_op
        },
                                   every_n_iter=10)
    ]
    # pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Training Loop
    with tf.train.MonitoredTrainingSession(checkpoint_dir=args.outdir,
                                           hooks=hooks,
                                           config=config) as mon_sess:
        while not mon_sess.should_stop():
            idx = np.random.choice(N, args.batch_size, replace=False)
            x_t, y_t = np.expand_dims(X_train[idx], axis=-1), y_train[idx]
            train_dict = {X_ph: x_t, y_ph: y_t}

            val_idx = np.random.choice(X_test.shape[0], 5000, replace=False)
            x_v, y_v = np.expand_dims(X_test[val_idx],
                                      axis=-1), y_test[val_idx]
            val_dict = {X_ph: x_v, y_ph: y_v}

            global_step_val = mon_sess.run(global_step, feed_dict=train_dict)
            structure_exporter_tensors, v_loss, v_acc, reg_cost = mon_sess.run(
                [exporter.tensors, loss_op, acc_op, cost_op],
                feed_dict=val_dict)
            mon_sess.run(train_op, feed_dict=train_dict)

            print("Step: ", global_step_val)
            print("Validation Loss: ", v_loss)
            print("Validation Acc: ", v_acc)
            print("Reg Cost: ", reg_cost)

            # exporting model to JSON
            if global_step_val % 1000 == 0:
                exporter.populate_tensor_values(structure_exporter_tensors)
                exporter.create_file_and_save_alive_counts(
                    args.outdir, global_step_val)