示例#1
0
    def get_hit_rate(self):
        total = self.var[0]
        hits = self.var[1]
        hit_rate = (tf.cast(hits, tf.float32) / tf.maximum(
            tf.constant(1, dtype=tf.float32), tf.cast(total, tf.float32)))

        with tf.control_dependencies([hit_rate]):
            update_var = self.var.assign_add([-total, -hits])
        with tf.control_dependencies([update_var]):
            return tf.identity(hit_rate)
    def crop_gt_masks(self, gt_mask_size):
        """Crops the ground truth binary masks and resize to fixed-size masks."""
        num_boxes = tf.shape(self._boxes)[0]
        num_masks = tf.shape(self._masks)[0]
        assert_length = tf.Assert(tf.equal(num_boxes, num_masks), [num_masks])

        def padded_bounding_box_fn():
            return tf.reshape(self._masks,
                              [-1, self._ori_height, self._ori_width, 1])

        def zeroed_box_fn():
            return tf.zeros([0, self._ori_height, self._ori_width, 1])

        num_masks = tf.shape(self._masks)[0]
        # Check if there is any instance in this image or not.
        scaled_masks = tf.cond(num_masks > 0, padded_bounding_box_fn,
                               zeroed_box_fn)
        with tf.control_dependencies([assert_length]):
            cropped_gt_masks = tf.image.crop_and_resize(
                image=scaled_masks,
                boxes=self._boxes,
                box_ind=tf.range(num_masks, dtype=tf.int32),
                crop_size=[gt_mask_size, gt_mask_size],
                method='bilinear')[:, :, :, 0]
        cropped_gt_masks = tf.pad(cropped_gt_masks,
                                  paddings=tf.constant([[
                                      0,
                                      0,
                                  ], [
                                      2,
                                      2,
                                  ], [2, 2]]),
                                  mode='CONSTANT',
                                  constant_values=0.)
        return cropped_gt_masks
def pad_to_fixed_size(data, pad_value, output_shape):
    """Pad data to a fixed length at the first dimension.

  Args:
    data: Tensor to be padded to output_shape.
    pad_value: A constant value assigned to the paddings.
    output_shape: The output shape of a 2D tensor.

  Returns:
    The Padded tensor with output_shape [max_num_instances, dimension].
  """
    max_num_instances = output_shape[0]
    dimension = output_shape[1]
    data = tf.reshape(data, [-1, dimension])
    num_instances = tf.shape(data)[0]
    assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances),
                              [num_instances])
    with tf.control_dependencies([assert_length]):
        pad_length = max_num_instances - num_instances
    paddings = pad_value * tf.ones([pad_length, dimension])
    padded_data = tf.concat([data, paddings], axis=0)
    padded_data = tf.reshape(padded_data, output_shape)
    return padded_data
示例#4
0
def train_ddpg(dataset,
               policy,
               actor_optimizer=None,
               critic_optimizer=None,
               pack_transition_fn=None,
               ddpg_graph_fn=None,
               log_dir=None,
               master='local',
               task=0,
               training_steps=None,
               max_training_steps=100000,
               reuse=False,
               init_checkpoint=None,
               update_target_every_n_steps=50,
               log_every_n_steps=None,
               save_checkpoint_steps=500,
               save_summaries_steps=500):
  """Self-contained learning loop for offline Q-learning.

  Code inspired by OpenAI Baselines' deepq.build_train. This function is
  compatible with discrete Q-learning graphs, continuous Q learning graphs, and
  SARSA.

  Args:
    dataset: tf.data.Dataset providing transitions.
    policy: Instance of TFDQNPolicy class that provides functor for building the
      critic function.
    actor_optimizer: Optional instance of an optimizer for the actor network.
      If not specified, creates an AdamOptimizer using the default constructor.
    critic_optimizer: Optional instance of an optimizer for the critic network.
      If not specified, creates an AdamOptimizer using the default constructor.
    pack_transition_fn: Optional function that performs additional processing
      of the transition. This is a convenience method for ad-hoc manipulation of
      transition data passed to the learning function after parsing.
    ddpg_graph_fn: Function used to construct training objectives w.r.t. critic
      outputs.
    log_dir: Where to save model checkpoints and tensorboard summaries.
    master: Optional address of master worker. Specify this when doing
      distributed training.
    task: Optional worker task for distributed training. Defaults to solo master
      task on a single machine.
    training_steps: Optional number of steps to run training before terminating
      early. Max_training_steps remains unchanged - training will terminate
      after max_training_steps whether or not training_steps is specified.
    max_training_steps: maximum number of training iters.
    reuse: If True, reuse existing variables for all declared variables by this
      function.
    init_checkpoint: Optional checkpoint to restore prior to training. If not
      provided, variables are initialized using global_variables_initializer().
    update_target_every_n_steps: How many global steps (training) between
      copying the Q network weights (scope='q_func') to target network
      (scope='target_q_func').
    log_every_n_steps: How many global steps between logging loss tensors.
    save_checkpoint_steps: How many global steps between saving TF variables
      to a checkpoint file.
    save_summaries_steps: How many global steps between saving TF summaries.

  Returns:
    (int) Current `global_step` reached after training for training_steps, or
    `max_training_steps` if `global_step` has reached `max_training_steps`.

  """
  data_iterator = dataset.make_one_shot_iterator()

  transition = data_iterator.get_next()
  if pack_transition_fn:
    transition = pack_transition_fn(transition)

  if actor_optimizer is None:
    actor_optimizer = tf.train.AdamOptimizer()
  if critic_optimizer is None:
    critic_optimizer = tf.train.AdamOptimizer()

  a_func = policy.get_a_func(is_training=True, reuse=reuse)
  q_func = policy.get_q_func(is_training=True, reuse=reuse)
  actor_loss, critic_loss, all_summaries = ddpg_graph_fn(
      a_func, q_func, transition)

  a_func_vars = tf.contrib.framework.get_trainable_variables(scope='a_func')
  q_func_vars = framework.get_trainable_variables(scope='q_func')
  target_q_func_vars = framework.get_trainable_variables(scope='target_q_func')

  # with tf.variable_scope('ddpg', use_resource=True):
  global_step = tf.train.get_or_create_global_step()

  # CRITIC OPTIMIZATION
  # Only optimize q_func and update its batchnorm params.
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='q_func')
  critic_train_op = tf.contrib.training.create_train_op(
      critic_loss,
      critic_optimizer,
      global_step=global_step,
      update_ops=update_ops,
      summarize_gradients=True,
      variables_to_train=q_func_vars,
  )

  # ACTOR OPTIMIZATION
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='a_func')
  actor_train_op = tf.contrib.training.create_train_op(
      actor_loss,
      actor_optimizer,
      global_step=None,
      summarize_gradients=True,
      variables_to_train=a_func_vars,
  )
  # Combine losses to train both actor and critic simultaneously.
  train_op = critic_train_op + actor_train_op

  chief_hooks = []
  hooks = []
  # Save summaries periodically.
  if save_summaries_steps is not None:
    chief_hooks.append(tf.train.SummarySaverHook(
        save_steps=save_summaries_steps,
        output_dir=log_dir, summary_op=all_summaries))

  # Stop after training_steps
  if max_training_steps:
    hooks.append(tf.train.StopAtStepHook(last_step=max_training_steps))

  # Report if loss tensor is NaN.
  hooks.append(tf.train.NanTensorHook(actor_loss))
  hooks.append(tf.train.NanTensorHook(critic_loss))

  if log_every_n_steps is not None:
    tensor_dict = {
        'global_step': global_step,
        'actor loss': actor_loss,
        'critic_loss': critic_loss
    }
    chief_hooks.append(
        tf.train.LoggingTensorHook(tensor_dict, every_n_iter=log_every_n_steps))

    # Measure how fast we are training per sec and save to summary.
    chief_hooks.append(tf.train.StepCounterHook(
        every_n_steps=log_every_n_steps, output_dir=log_dir))

  # If target network exists, periodically update target Q network with new
  # weights (frozen target network). We hack this by
  # abusing a LoggingTensorHook for this.
  if target_q_func_vars and update_target_every_n_steps is not None:
    update_target_expr = []
    for var, var_t in zip(sorted(q_func_vars, key=lambda v: v.name),
                          sorted(target_q_func_vars, key=lambda v: v.name)):
      update_target_expr.append(var_t.assign(var))
    update_target_expr = tf.group(*update_target_expr)

    with tf.control_dependencies([update_target_expr]):
      update_target = tf.constant(0)
    chief_hooks.append(
        tf.train.LoggingTensorHook({'update_target': update_target},
                                   every_n_iter=update_target_every_n_steps))

  # Save checkpoints periodically, save all of them.
  saver = tf.train.Saver(max_to_keep=None)
  chief_hooks.append(tf.train.CheckpointSaverHook(
      log_dir, save_steps=save_checkpoint_steps, saver=saver,
      checkpoint_basename='model.ckpt'))

  # Save our experiment params to checkpoint dir.
  chief_hooks.append(gin.tf.GinConfigSaverHook(log_dir, summarize_config=True))

  session_config = tf.ConfigProto(log_device_placement=True)

  init_fn = None
  if init_checkpoint:
    assign_fn = tf.contrib.framework.assign_from_checkpoint_fn(
        init_checkpoint, framework.get_model_variables())
    init_fn = lambda _, sess: assign_fn(sess)
  scaffold = tf.train.Scaffold(saver=saver, init_fn=init_fn)
  with tf.train.MonitoredTrainingSession(
      master=master,
      is_chief=(task == 0),
      config=session_config,
      checkpoint_dir=log_dir,
      scaffold=scaffold,
      hooks=hooks,
      chief_only_hooks=chief_hooks) as sess:
    np_step = 0
    while not sess.should_stop():
      np_step, _ = sess.run([global_step, train_op])
      if training_steps and np_step % training_steps == 0:
        break
    done = np_step >= max_training_steps
  return np_step, done
示例#5
0
def _model_fn(features, labels, mode, params, model):
  """Model defination for the RetinaNet model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
  cls_outputs, box_outputs = model(
      features,
      min_level=params['min_level'],
      max_level=params['max_level'],
      num_classes=params['num_classes'],
      num_anchors=len(params['aspect_ratios'] * params['num_scales']),
      is_training_bn=params['is_training_bn'])
  levels = cls_outputs.keys()

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Load pretrained model from checkpoint.
  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
          '/': 'resnet50/',
      })
      return tf.train.Scaffold()
  else:
    scaffold_fn = None

  # Set up training loss and learning rate.
  global_step = tf.train.get_global_step()
  learning_rate = _learning_rate_schedule(params['learning_rate'],
                                          params['lr_warmup_step'],
                                          params['lr_drop_step'], global_step)
  # cls_loss and box_loss are for logging. only total_loss is optimized.
  total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                   labels, params)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(total_loss, global_step)
  else:
    train_op = None

  # Evaluation only works on GPU/CPU host and batch_size=1
  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      # add metrics to output
      cls_outputs = {}
      box_outputs = {}
      for level in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[level] = kwargs['cls_outputs_%d' % level]
        box_outputs[level] = kwargs['box_outputs_%d' % level]
      detections = anchor_labeler.generate_detections(
          cls_outputs, box_outputs, kwargs['source_ids'])
      eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
      coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                     kwargs['image_scales'])
      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    batch_size = params['batch_size']
    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'image_scales': labels['image_scales'],
    }
    for level in range(params['min_level'], params['max_level'] + 1):
      metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
      metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
    eval_metrics = (metric_fn, metric_fn_inputs)

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)