Пример #1
0
 def build_network(self, images, phase_train=True, nclass=1001, image_depth=3,
                   data_type=tf.float32, data_format='NCHW',
                   use_tf_layers=True, fp16_vars=False):
   """Returns logits and aux_logits from images."""
   if data_format == 'NCHW':
     images = tf.transpose(images, [0, 3, 1, 2])
   var_type = tf.float32
   if data_type == tf.float16 and fp16_vars:
     var_type = tf.float16
   network = convnet_builder.ConvNetBuilder(
       images, image_depth, phase_train, use_tf_layers,
       data_format, data_type, var_type)
   with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
     self.add_inference(network)
     # Add the final fully-connected class layer
     logits = (network.affine(nclass, activation='linear')
               if not self.skip_final_affine_layer()
               else network.top_layer)
     aux_logits = None
     if network.aux_top_layer is not None:
       with network.switch_to_aux_top_layer():
         aux_logits = network.affine(
             nclass, activation='linear', stddev=0.001)
   if data_type == tf.float16:
     # TODO(reedwm): Determine if we should do this cast here.
     logits = tf.cast(logits, tf.float32)
     if aux_logits is not None:
       aux_logits = tf.cast(aux_logits, tf.float32)
   print('Total trainable variables per GPU:{:,}'.format(np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])))
   return logits, aux_logits
Пример #2
0
    def build_network(self, inputs, phase_train=True, nclass=1001):
        """Returns logits from input images.

    Args:
      inputs: The input images
      phase_train: True during training. False during evaluation.
      nclass: Number of classes that the images can belong to.
      data_type: The dtype to run the model in: tf.float32 or tf.float16. The
        variable dtype is controlled by a separate parameter: self.fp16_vars.

    Returns:
      A BuildNetworkResult which contains the logits and model-specific extra
        information.
    """
        images = inputs[0]
        if self.data_format == 'NCHW':
            images = tf.transpose(images, [0, 3, 1, 2])
        var_type = tf.float32
        if self.data_type == tf.float16 and self.fp16_vars:
            var_type = tf.float16
        network = convnet_builder.ConvNetBuilder(images, self.depth,
                                                 phase_train,
                                                 self.use_tf_layers,
                                                 self.data_format,
                                                 self.data_type, var_type)
        with tf.variable_scope('cg',
                               custom_getter=network.get_custom_getter()):
            logits = self.add_inference(images, phase_train, nclass)
        if self.data_type == tf.float16:
            logits = tf.cast(logits, tf.float32)
        return BuildNetworkResult(logits=logits, extra_info=None)
Пример #3
0
    def build_network(self, inputs, phase_train=True, nclass=1001):
        """Returns logits from input images.

    Args:
      inputs: The input images and labels
      phase_train: True during training. False during evaluation.
      nclass: Number of classes that the images can belong to.

    Returns:
      A BuildNetworkResult which contains the logits and model-specific extra
        information.
    """
        images = inputs[0]

        if self.data_format == 'NCHW':
            images = tf.transpose(images, [0, 3, 1, 2])
            images = debug.add_prob(images, name='input_image')
            print("input_image shape: {}".format(images.get_shape()))

        var_type = tf.float32
        if self.data_type == tf.float16 and self.fp16_vars:
            var_type = tf.float16
        network = convnet_builder.ConvNetBuilder(images, self.depth,
                                                 phase_train,
                                                 self.use_tf_layers,
                                                 self.data_format,
                                                 self.data_type, var_type)
        with tf.variable_scope('cg',
                               custom_getter=network.get_custom_getter()):
            self.add_inference(network)
            # Add the final fully-connected class layer
            logits = (
                network.affine(nclass, activation='linear', name='fc_final')
                if not self.skip_final_affine_layer() else network.top_layer)
            logits = debug.add_prob(logits, name='fc_final')
            print("fc_final shape: {}".format(logits.get_shape()))

            aux_logits = None
            if network.aux_top_layer is not None:
                with network.switch_to_aux_top_layer():
                    aux_logits = network.affine(nclass,
                                                activation='linear',
                                                stddev=0.001)
        if self.data_type == tf.float16:
            # TODO(reedwm): Determine if we should do this cast here.
            logits = tf.cast(logits, tf.float32)
            if aux_logits is not None:
                aux_logits = tf.cast(aux_logits, tf.float32)
        return BuildNetworkResult(
            logits=logits,
            extra_info=None if aux_logits is None else aux_logits)
Пример #4
0
#with tf.device(0):
images = tf.truncated_normal(image_shape,
                             dtype=data_type,
                             mean=127,
                             stddev=60,
                             name='synthetic_images')

images = tf.contrib.framework.local_variable(images, name='gpu_cached_images')

labels = tf.random_uniform(labels_shape,
                           minval=0,
                           maxval=nclass - 1,
                           dtype=tf.int32,
                           name='synthetic_labels')

network = convnet_builder.ConvNetBuilder(images, 3, phase_train, use_tf_layers,
                                         data_format, data_type, data_type)

model = vgg_model.Vgg16Model()
model.add_inference(network)
logits = network.affine(nclass, activation='linear')

init_op = tf.initialize_all_variables()
init_local_op = tf.initialize_local_variables()

with tf.Session() as sess:
    sess.run(init_op)
    sess.run(init_local_op)

    sess.run(logits)
Пример #5
0
    def add_forward_pass_and_gradients(self, phase_train, image_producer_stage):
        """Add ops for forward-pass and gradient computations."""
        nclass = self.dataset.num_classes + 1
        input_data_type = get_data_type(self.params)
        data_type = get_data_type(self.params)
        with tf.device('/gpu:0'):
            if not self.use_synthetic_gpu_images:
                images, labels = image_producer_stage.get()
            else:
                # Minor hack to avoid H2D copy when using synthetic data
                image_size = self.model.get_image_size()
                image_shape = [
                    self.batch_size, image_size, image_size,
                    self.dataset.depth
                ]
                labels_shape = [self.batch_size]
                # Synthetic image should be within [0, 255].
                images = tf.truncated_normal(
                    image_shape,
                    dtype=input_data_type,
                    mean=127,
                    stddev=60,
                    name='synthetic_images')
                images = tf.contrib.framework.local_variable(
                    images, name='gpu_cached_images')
                labels = tf.random_uniform(
                    labels_shape,
                    minval=0,
                    maxval=nclass - 1,
                    dtype=tf.int32,
                    name='synthetic_labels')

        # Rescale from [0, 255] to [0, 2]
        images = tf.multiply(images, 1. / 127.5)
        # Rescale to [-1, 1]
        images = tf.subtract(images, 1.0)

        if self.data_format == 'NCHW':
            images = tf.transpose(images, [0, 3, 1, 2])
        if input_data_type != data_type:
            images = tf.cast(images, data_type)
        var_type = tf.float32
        network = convnet_builder.ConvNetBuilder(
            images, self.dataset.depth, phase_train, self.params.use_tf_layers,
            self.data_format, data_type, var_type)
        with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
            self.model.add_inference(network)
            # Add the final fully-connected class layer
            logits = network.affine(nclass, activation='linear')
            aux_logits = None
            if network.aux_top_layer is not None:
                with network.switch_to_aux_top_layer():
                    aux_logits = network.affine(
                        nclass, activation='linear', stddev=0.001)
        if data_type == tf.float16:
            # TODO(reedwm): Determine if we should do this cast here.
            logits = tf.cast(logits, tf.float32)
            if aux_logits is not None:
                aux_logits = tf.cast(aux_logits, tf.float32)

        results = {}  # The return value
        if not phase_train or self.params.print_training_accuracy:
            top_1_op = tf.reduce_sum(
                tf.cast(tf.nn.in_top_k(logits, labels, 1), data_type))
            top_5_op = tf.reduce_sum(
                tf.cast(tf.nn.in_top_k(logits, labels, 5), data_type))
            results['top_1_op'] = top_1_op
            results['top_5_op'] = top_5_op

        if not phase_train:
            results['logits'] = logits
            return results
        loss = loss_function(logits, labels, aux_logits=aux_logits)
        params = tf.trainable_variables()
        l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in params])
        weight_decay = self.params.weight_decay
        if weight_decay is not None and weight_decay != 0.:
            loss += weight_decay * l2_loss

        aggmeth = tf.AggregationMethod.DEFAULT
        scaled_loss = loss if self.loss_scale is None else loss * self.loss_scale
        grads = tf.gradients(scaled_loss, params, aggregation_method=aggmeth)
        if self.loss_scale is not None:
            # TODO(reedwm): If automatic loss scaling is not used, we could avoid
            # these multiplications by directly modifying the learning rate instead.
            # If this is done, care must be taken to ensure that this scaling method
            # is correct, as some optimizers square gradients and do other
            # operations which might not be compatible with modifying both the
            # gradients and the learning rate.
            grads = [
                grad * tf.cast(1. / self.loss_scale, grad.dtype) for grad in
                grads
            ]
        param_refs = tf.trainable_variables()
        gradvars = list(zip(grads, param_refs))
        results['loss'] = loss
        results['gradvars'] = gradvars
        return results
Пример #6
0
  def add_forward_pass_and_gradients(
      self, host_images, host_labels, nclass, phase_train, device_num,
      input_data_type, data_type, input_nchan, use_synthetic_gpu_images,
      gpu_copy_stage_ops, gpu_compute_stage_ops, gpu_grad_stage_ops):
    """Add ops for forward-pass and gradient computations."""
    if not use_synthetic_gpu_images:
      with tf.device(self.cpu_device):
        images_shape = host_images.get_shape()
        labels_shape = host_labels.get_shape()
        gpu_copy_stage = data_flow_ops.StagingArea(
            [tf.float32, tf.int32],
            shapes=[images_shape, labels_shape])
        gpu_copy_stage_op = gpu_copy_stage.put(
            [host_images, host_labels])
        gpu_copy_stage_ops.append(gpu_copy_stage_op)
        host_images, host_labels = gpu_copy_stage.get()

    with tf.device(self.raw_devices[device_num]):
      if not use_synthetic_gpu_images:
        gpu_compute_stage = data_flow_ops.StagingArea(
            [tf.float32, tf.int32],
            shapes=[images_shape, labels_shape]
        )
        # The CPU-to-GPU copy is triggered here.
        gpu_compute_stage_op = gpu_compute_stage.put(
            [host_images, host_labels])
        images, labels = gpu_compute_stage.get()
        images = tf.reshape(images, shape=images_shape)
        gpu_compute_stage_ops.append(gpu_compute_stage_op)
      else:
        # Minor hack to avoid H2D copy when using synthetic data
        images = tf.truncated_normal(
            host_images.get_shape(),
            dtype=input_data_type,
            stddev=1e-1,
            name='synthetic_images')
        images = tf.contrib.framework.local_variable(
            images, name='gpu_cached_images')
        labels = host_labels

    with tf.device(self.devices[device_num]):
      # Rescale from [0, 255] to [0, 2]
      images = tf.multiply(images, 1./127.5)
      # Rescale to [-1, 1]
      images = tf.subtract(images, 1.0)

      if self.data_format == 'NCHW':
        images = tf.transpose(images, [0, 3, 1, 2])
      if input_data_type != data_type:
        images = tf.cast(images, data_type)
      network = convnet_builder.ConvNetBuilder(images, input_nchan, phase_train,
                                               self.data_format, data_type)
      self.model.add_inference(network)
      # Add the final fully-connected class layer
      logits = network.affine(nclass, activation='linear')
      aux_logits = None
      if network.aux_top_layer is not None:
        with network.switch_to_aux_top_layer():
          aux_logits = network.affine(nclass, activation='linear', stddev=0.001)

      results = {}  # The return value
      if not phase_train or FLAGS.print_training_accuracy:
        top_1_op = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(logits, labels, 1), data_type))
        top_5_op = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(logits, labels, 5), data_type))
        results['top_1_op'] = top_1_op
        results['top_5_op'] = top_5_op

      if not phase_train:
        results['logits'] = logits
        return results
      loss = loss_function(logits, labels, aux_logits=aux_logits)
      params = self.variable_mgr.trainable_variables_on_device(device_num)
      l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in params])
      weight_decay = FLAGS.weight_decay
      if weight_decay is not None and weight_decay != 0.:
        loss += weight_decay * l2_loss

      aggmeth = tf.AggregationMethod.DEFAULT
      grads = tf.gradients(loss, params, aggregation_method=aggmeth)

      if FLAGS.staged_vars:
        grad_dtypes = [grad.dtype for grad in grads]
        grad_shapes = [grad.shape for grad in grads]
        grad_stage = data_flow_ops.StagingArea(grad_dtypes, grad_shapes)
        grad_stage_op = grad_stage.put(grads)
        # In general, this decouples the computation of the gradients and
        # the updates of the weights.
        # During the pipeline warm up, this runs enough training to produce
        # the first set of gradients.
        gpu_grad_stage_ops.append(grad_stage_op)
        grads = grad_stage.get()

      param_refs = self.variable_mgr.trainable_variables_on_device(
          device_num, writable=True)
      gradvars = list(zip(grads, param_refs))
      results['loss'] = loss
      results['gradvars'] = gradvars
      return results