示例#1
0
def inference_conv_test(images):
    conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME')
    resh1 = tf.reshape(conv1, [-1, 147456])
    affn = _affine(resh1, 147456,
                   128)  # Affine layer not needed to reproduce the error
    return affn
示例#2
0
def imagenet_inputs(batch_size,
                    image_size,
                    num_readers=1,
                    num_preprocess_threads=4):
    """Loads a batch of imagenet inputs.

  Used as a replacement for inception.image_processing.inputs in
  tensorflow/models in order to get around the use of hard-coded flags in the
  image_processing module.

  Args:
    batch_size: int, batch size.
    image_size: int. The images will be resized bilinearly to shape
        [image_size, image_size].
    num_readers: int, number of preprocessing threads per tower.  Must be a
        multiple of 4.
    num_preprocess_threads: int, number of parallel readers.

  Returns:
    4-D tensor of images of shape [batch_size, image_size, image_size, 3], with
    values in [0, 1].

  Raises:
    IOError: If ImageNet data files cannot be found.
    ValueError: If `num_preprocess_threads is not a multiple of 4 or
        `num_readers` is less than 1.
  """
    imagenet = imagenet_data.ImagenetData('train')

    with tf.name_scope('batch_processing'):
        data_files = imagenet.data_files()
        if data_files is None:
            raise IOError('No ImageNet data files found')

        # Create filename_queue.
        filename_queue = tf.train.string_input_producer(data_files,
                                                        shuffle=True,
                                                        capacity=16)

        if num_preprocess_threads % 4:
            raise ValueError('Please make num_preprocess_threads a multiple '
                             'of 4 (%d %% 4 != 0).' % num_preprocess_threads)

        if num_readers < 1:
            raise ValueError('Please make num_readers at least 1')

        # Approximate number of examples per shard.
        examples_per_shard = 1024
        # Size the random shuffle queue to balance between good global
        # mixing (more examples) and memory use (fewer examples).
        # 1 image uses 299*299*3*4 bytes = 1MB
        # The default input_queue_memory_factor is 16 implying a shuffling queue
        # size: examples_per_shard * 16 * 1MB = 17.6GB
        input_queue_memory_factor = 16
        min_queue_examples = examples_per_shard * input_queue_memory_factor
        examples_queue = tf.RandomShuffleQueue(
            capacity=min_queue_examples + 3 * batch_size,
            min_after_dequeue=min_queue_examples,
            dtypes=[tf.string])

        # Create multiple readers to populate the queue of examples.
        enqueue_ops = []
        for _ in range(num_readers):
            reader = imagenet.reader()
            _, value = reader.read(filename_queue)
            enqueue_ops.append(examples_queue.enqueue([value]))

        tf.train.queue_runner.add_queue_runner(
            tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
        example_serialized = examples_queue.dequeue()

        images_and_labels = []
        for _ in range(num_preprocess_threads):
            # Parse a serialized Example proto to extract the image and metadata.
            image_buffer, label_index, _, _ = _parse_example_proto(
                example_serialized)
            image = tf.image.decode_jpeg(image_buffer, channels=3)

            # pylint: disable=protected-access
            image = _aspect_preserving_resize(image, image_size + 2)
            image = _central_crop([image], image_size, image_size)[0]
            # pylint: enable=protected-access
            image.set_shape([image_size, image_size, 3])
            image = tf.to_float(image) / 255.0

            images_and_labels.append([image, label_index])

        images, label_index_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size,
            capacity=2 * num_preprocess_threads * batch_size)

        images = tf.reshape(images,
                            shape=[batch_size, image_size, image_size, 3])

        # Display the training images in the visualizer.
        tf.summary.image('images', images)

        return images, tf.reshape(label_index_batch, [batch_size])
示例#3
0
文件: lda2vec.py 项目: lantip/Malaya
    def __init__(
        self,
        num_unique_documents,
        vocab_size,
        num_topics,
        freqs,
        embedding_size=128,
        num_sampled=40,
        learning_rate=1e-3,
        lmbda=150.0,
        alpha=None,
        power=0.75,
        batch_size=32,
        clip_gradients=5.0,
        **kwargs
    ):
        device = get_device(**kwargs)
        _graph = tf.Graph()

        with _graph.as_default():
            with tf.device(device):
                moving_avgs = tf.train.ExponentialMovingAverage(0.9)
                self.batch_size = batch_size
                self.freqs = freqs

                self.X = tf.placeholder(tf.int32, shape=[None])
                self.Y = tf.placeholder(tf.int64, shape=[None])
                self.DOC = tf.placeholder(tf.int32, shape=[None])
                self.switch_loss = tf.Variable(0, trainable=False)
                train_labels = tf.reshape(self.Y, [-1, 1])
                sampler = tf.nn.fixed_unigram_candidate_sampler(
                    train_labels,
                    num_true=1,
                    num_sampled=num_sampled,
                    unique=True,
                    range_max=vocab_size,
                    distortion=power,
                    unigrams=self.freqs,
                )

                self.word_embedding = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)
                )
                self.nce_weights = tf.Variable(
                    tf.truncated_normal(
                        [vocab_size, embedding_size],
                        stddev=tf.sqrt(1 / embedding_size),
                    )
                )
                self.nce_biases = tf.Variable(tf.zeros([vocab_size]))
                scalar = 1 / np.sqrt(num_unique_documents + num_topics)
                self.doc_embedding = tf.Variable(
                    tf.random_normal(
                        [num_unique_documents, num_topics],
                        mean=0,
                        stddev=50 * scalar,
                    )
                )
                self.topic_embedding = tf.get_variable(
                    'topic_embedding',
                    shape=[num_topics, embedding_size],
                    dtype=tf.float32,
                    initializer=tf.orthogonal_initializer(gain=scalar),
                )
                pivot = tf.nn.embedding_lookup(self.word_embedding, self.X)
                proportions = tf.nn.embedding_lookup(
                    self.doc_embedding, self.DOC
                )
                doc = tf.matmul(proportions, self.topic_embedding)
                doc_context = doc
                word_context = pivot
                context = tf.add(word_context, doc_context)
                loss_word2vec = tf.reduce_mean(
                    tf.nn.nce_loss(
                        weights=self.nce_weights,
                        biases=self.nce_biases,
                        labels=self.Y,
                        inputs=context,
                        num_sampled=num_sampled,
                        num_classes=vocab_size,
                        num_true=1,
                        sampled_values=sampler,
                    )
                )
                self.fraction = tf.Variable(
                    1, trainable=False, dtype=tf.float32
                )

                n_topics = self.doc_embedding.get_shape()[1].value
                log_proportions = tf.nn.log_softmax(self.doc_embedding)
                if alpha is None:
                    alpha = 1.0 / n_topics
                loss = (alpha - 1) * log_proportions
                prior = tf.reduce_sum(loss)

                loss_lda = lmbda * self.fraction * prior
                global_step = tf.Variable(
                    0, trainable=False, name='global_step'
                )
                self.cost = tf.cond(
                    global_step < self.switch_loss,
                    lambda: loss_word2vec,
                    lambda: loss_word2vec + loss_lda,
                )
                loss_avgs_op = moving_avgs.apply(
                    [loss_lda, loss_word2vec, self.cost]
                )
                with tf.control_dependencies([loss_avgs_op]):
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate
                    )
                    gvs = optimizer.compute_gradients(self.cost)
                    capped_gvs = [
                        (
                            tf.clip_by_value(
                                grad, -clip_gradients, clip_gradients
                            ),
                            var,
                        )
                        for grad, var in gvs
                    ]
                    self.optimizer = optimizer.apply_gradients(capped_gvs)
                self.sess = generate_session(_graph, **kwargs)
                self.sess.run(tf.global_variables_initializer())
示例#4
0
    def simulate(self, action):
        with tf.name_scope("environment/simulate"):
            actions = tf.concat([tf.expand_dims(action, axis=1)] *
                                self._num_frames,
                                axis=1)
            history = self.history_buffer.get_all_elements()
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                # We only need 1 target frame here, set it.
                hparams_target_frames = self._model.hparams.video_num_target_frames
                self._model.hparams.video_num_target_frames = 1
                model_output = self._model.infer({
                    "inputs":
                    history,
                    "input_action":
                    actions,
                    "reset_internal_states":
                    self._reset_model.read_value()
                })
                self._model.hparams.video_num_target_frames = hparams_target_frames

            observ = tf.cast(tf.squeeze(model_output["targets"], axis=1),
                             self.observ_dtype)

            reward = tf.to_float(model_output["target_reward"])
            reward = tf.reshape(reward,
                                shape=(self.batch_size, )) + self._min_reward

            if self._intrinsic_reward_scale:
                # Use the model's uncertainty about its prediction as an intrinsic
                # reward. The uncertainty is measured by the log probability of the
                # predicted pixel value.
                if "targets_logits" not in model_output:
                    raise ValueError(
                        "The use of intrinsic rewards requires access to "
                        "the logits. Ensure that model.infer returns "
                        "'targets_logits'")
                uncertainty_reward = compute_uncertainty_reward(
                    model_output["targets_logits"], model_output["targets"])
                uncertainty_reward = tf.minimum(
                    1., self._intrinsic_reward_scale * uncertainty_reward)
                uncertainty_reward = tf.Print(uncertainty_reward,
                                              [uncertainty_reward],
                                              message="uncertainty_reward",
                                              first_n=1,
                                              summarize=8)
                reward += uncertainty_reward

            done = tf.constant(False, tf.bool, shape=(self.batch_size, ))

            with tf.control_dependencies([observ]):
                dump_frame_op = tf.cond(
                    self._video_condition,
                    lambda: tf.py_func(
                        self._video_dump_frame,  # pylint: disable=g-long-lambda
                        [observ, reward],
                        []),
                    tf.no_op)
                with tf.control_dependencies([
                        self._observ.assign(observ),
                        self.history_buffer.move_by_one_element(observ),
                        dump_frame_op
                ]):
                    clear_reset_model_op = tf.assign(self._reset_model,
                                                     tf.constant(0.0))
                    with tf.control_dependencies([clear_reset_model_op]):
                        return tf.identity(reward), tf.identity(done)
  def GetProjectLastDim(cls, inputs, weight, input_dim, output_dim, proj_obj):
    """Linear projection on the last dim of the input tensor along with pruning.

    This is a TPU efficient implementation to avoid reshaping inputs to Rank-2
    tensor by using Einsum for the compute.

    Args:
      inputs: An input Tensor, the last dimension of which is input_dim.
      weight: A weight matrix with shape [input_dim, output_dim].
      input_dim: An integer or a symbolic dim, the last dimension of the inputs.
      output_dim: An integer or a symbolic dim, the last dimension of the
                  outputs.
      proj_obj: a ProjectionLayer object.

    Returns:
      An output Tensor of the same rank as inputs, the last dimension is
      output_dim.
    """
    theta = proj_obj.theta
    p = proj_obj.params
    input_dim = int(
        symbolic.ToStatic(input_dim) if symbolic.IsExpr(input_dim
                                                       ) else input_dim)
    output_dim = int(
        symbolic.ToStatic(output_dim) if symbolic.IsExpr(output_dim
                                                        ) else output_dim)
    if (py_utils.use_tpu() and inputs.shape is not None and
        inputs.shape.rank is not None and inputs.shape.rank < 26):
      # Avoids reshape if feasible and uses Einsum.
      if inputs.shape.rank == 2:
        outputs = tf.matmul(inputs, weight)
      else:
        outputs = cls.GetEinSumResult(inputs, proj_obj)
    else:
      if p.pruning_hparams_dict[
          'compression_option'] == 9 and p.pruning_hparams_dict[
              'compress_input']:
        blocked_inputs = tf.reshape(
            inputs,
            py_utils.ToStaticShape(
                [-1, p.pruning_hparams_dict['input_block_size']]))
        compressed_inputs = tf.reshape(
            py_utils.Matmul(blocked_inputs, theta.b_matrix_tfvar),
            py_utils.ToStaticShape([
                -1, input_dim //
                p.pruning_hparams_dict['input_compression_factor']
            ]))
      else:
        compressed_inputs = tf.reshape(inputs,
                                       py_utils.ToStaticShape([-1, input_dim]))

      if p.pruning_hparams_dict['compression_option'] == 10:
        if p.pruning_hparams_dict['block_method'] == 'mask':
          intermediate_result = py_utils.Matmul(
              compressed_inputs,
              tf.multiply(theta.c_matrix_tfvar, theta.c_mask_tfvar))
        elif p.pruning_hparams_dict['block_method'] == 'loop':
          num_blocks = p.pruning_hparams_dict['block_compression_factor']
          input_splitted = tf.split(compressed_inputs, num_blocks, axis=-1)
          output_splitted = []
          for i, input_i in enumerate(input_splitted):
            output_splitted.append(
                py_utils.Matmul(input_i, theta.c_matrix_tfvar[i, :, :]))
          intermediate_result = tf.concat(output_splitted, axis=-1)
      else:
        intermediate_result = py_utils.Matmul(compressed_inputs,
                                              theta.c_matrix_tfvar)

      if p.pruning_hparams_dict[
          'compression_option'] == 9 and p.pruning_hparams_dict[
              'compress_output']:
        blocked_intermediate_result = tf.reshape(
            intermediate_result,
            py_utils.ToStaticShape([
                -1, p.pruning_hparams_dict['output_block_size'] //
                p.pruning_hparams_dict['output_compression_factor']
            ]))
        outputs = py_utils.Matmul(blocked_intermediate_result,
                                  theta.d_matrix_tfvar)
      else:
        outputs = intermediate_result

      outputs = tf.reshape(
          outputs,
          tf.concat([
              tf.cast(py_utils.GetShape(inputs)[:-1], tf.int32),
              py_utils.ToStaticShape([output_dim])
          ],
                    axis=0))

    return outputs
示例#6
0
def detection_loss(cls_outputs, box_outputs, labels, params):
  """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.
  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
  # Sum all positives in a batch for normalization and avoid zero
  # num_positives_sum, which would lead to inf loss during training
  num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
  levels = cls_outputs.keys()

  cls_losses = []
  box_losses = []
  box_iou_losses = []
  for level in levels:
    if params['data_format'] == 'channels_first':
      labels['cls_targets_%d' % level] = tf.transpose(
          labels['cls_targets_%d' % level], [0, 3, 1, 2])
      labels['box_targets_%d' % level] = tf.transpose(
          labels['box_targets_%d' % level], [0, 3, 1, 2])
    # Onehot encoding for classification labels.
    cls_targets_at_level = tf.one_hot(
        labels['cls_targets_%d' % level],
        params['num_classes'])
    if params['data_format'] == 'channels_first':
      bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, -1, width, height])
    else:
      bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, width, height, -1])
    box_targets_at_level = labels['box_targets_%d' % level]
    cls_loss = _classification_loss(
        cls_outputs[level],
        cls_targets_at_level,
        num_positives_sum,
        alpha=params['alpha'],
        gamma=params['gamma'])
    if params['data_format'] == 'channels_first':
      cls_loss = tf.reshape(cls_loss,
                            [bs, -1, width, height, params['num_classes']])
    else:
      cls_loss = tf.reshape(cls_loss,
                            [bs, width, height, -1, params['num_classes']])
    cls_loss *= tf.cast(tf.expand_dims(
        tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32)
    cls_losses.append(tf.reduce_sum(cls_loss))
    box_losses.append(
        _box_loss(
            box_outputs[level],
            box_targets_at_level,
            num_positives_sum,
            delta=params['delta']))
    if params['iou_loss_type']:
      box_iou_losses.append(
          _box_iou_loss(box_outputs[level], box_targets_at_level,
                        num_positives_sum, params['iou_loss_type']))

  # Sum per level losses to total loss.
  cls_loss = tf.add_n(cls_losses)
  box_loss = tf.add_n(box_losses)
  box_iou_loss = tf.add_n(box_iou_losses) if box_iou_losses else 0.0
  total_loss = (
      cls_loss + params['box_loss_weight'] * box_loss +
      params['iou_loss_weight'] * box_iou_loss)
  return total_loss, cls_loss, box_loss, box_iou_loss
示例#7
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
  # Convert params (dict) to Config for easier access.
  if params['data_format'] == 'channels_first':
    features = tf.transpose(features, [0, 3, 1, 2])
  def _model_outputs(inputs):
    return model(inputs, config=hparams_config.Config(params))

  cls_outputs, box_outputs = utils.build_model_with_precision(
      params['precision'], _model_outputs, features)

  levels = cls_outputs.keys()
  for level in levels:
    cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
    box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Set up training loss and learning rate.
  update_learning_rate_schedule_parameters(params)
  global_step = tf.train.get_or_create_global_step()
  learning_rate = learning_rate_schedule(params, global_step)

  # cls_loss and box_loss are for logging. only total_loss is optimized.
  det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
      cls_outputs, box_outputs, labels, params)
  l2loss = reg_l2_loss(params['weight_decay'])
  total_loss = det_loss + l2loss

  if mode == tf.estimator.ModeKeys.TRAIN:
    utils.scalar('lrn_rate', learning_rate)
    utils.scalar('trainloss/cls_loss', cls_loss)
    utils.scalar('trainloss/box_loss', box_loss)
    utils.scalar('trainloss/box_iou_loss', box_iou_loss)
    utils.scalar('trainloss/det_loss', det_loss)
    utils.scalar('trainloss/l2_loss', l2loss)
    utils.scalar('trainloss/loss', total_loss)

  moving_average_decay = params['moving_average_decay']
  if moving_average_decay:
    ema = tf.train.ExponentialMovingAverage(
        decay=moving_average_decay, num_updates=global_step)
    ema_vars = utils.get_ema_vars()

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = tf.trainable_variables()
    if variable_filter_fn:
      var_list = variable_filter_fn(var_list)

    if params.get('clip_gradients_norm', 0) > 0:
      logging.info('clip gradients norm by %f', params['clip_gradients_norm'])
      grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
      with tf.name_scope('clip'):
        grads = [gv[0] for gv in grads_and_vars]
        tvars = [gv[1] for gv in grads_and_vars]
        clipped_grads, gnorm = tf.clip_by_global_norm(
            grads, params['clip_gradients_norm'])
        utils.scalar('gnorm', gnorm)
        grads_and_vars = list(zip(clipped_grads, tvars))

      with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    else:
      with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(
            total_loss, global_step, var_list=var_list)

    if moving_average_decay:
      with tf.control_dependencies([train_op]):
        train_op = ema.apply(ema_vars)

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:
    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      batch_size = params['batch_size']
      if params['use_tpu']:
        batch_size = params['batch_size'] * params['num_shards']
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])

      if params.get('testdev_dir', None):
        logging.info('Eval testdev_dir %s', params['testdev_dir'])
        coco_metrics = coco_metric_fn(
            batch_size,
            anchor_labeler,
            params['val_json_file'],
            testdev_dir=params['testdev_dir'],
            disable_pyfun=params.get('disable_pyfun', None),
            **kwargs)
      else:
        logging.info('Eval val with groudtruths %s.', params['val_json_file'])
        coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                      params['val_json_file'], **kwargs)

      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'groundtruth_data': labels['groundtruth_data'],
        'image_scales': labels['image_scales'],
    }
    add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs)
    eval_metrics = (metric_fn, metric_fn_inputs)

  checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

  if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
    # Initialize the model from an EfficientDet or backbone checkpoint.
    if params.get('ckpt') and params.get('backbone_ckpt'):
      raise RuntimeError(
          '--backbone_ckpt and --checkpoint are mutually exclusive')

    if params.get('backbone_ckpt'):
      var_scope = params['backbone_name'] + '/'
      if params['ckpt_var_scope'] is None:
        # Use backbone name as default checkpoint scope.
        ckpt_scope = params['backbone_name'] + '/'
      else:
        ckpt_scope = params['ckpt_var_scope'] + '/'
    else:
      # Load every var in the given checkpoint
      var_scope = ckpt_scope = '/'

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      logging.info('restore variables from %s', checkpoint)

      var_map = utils.get_ckpt_var_map(
          ckpt_path=checkpoint,
          ckpt_scope=ckpt_scope,
          var_scope=var_scope,
          var_exclude_expr=params.get('var_exclude_expr', None))

      tf.train.init_from_checkpoint(checkpoint, var_map)

      return tf.train.Scaffold()
  elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:
    def scaffold_fn():
      """Load moving average variables for eval."""
      logging.info('Load EMA vars with ema_decay=%f', moving_average_decay)
      restore_vars_dict = ema.variables_to_restore(ema_vars)
      saver = tf.train.Saver(restore_vars_dict)
      return tf.train.Scaffold(saver=saver)
  else:
    scaffold_fn = None

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      host_call=utils.get_tpu_host_call(global_step, params),
      scaffold_fn=scaffold_fn)
示例#8
0
def DenseAR(x,
            h=None,
            hidden_layers=[],
            activation=tf.nn.relu,
            log_scale_clip=None,
            log_scale_clip_pre=None,
            train=False,
            dropout_rate=0.0,
            sigmoid_scale=False,
            log_scale_factor=1.0,
            log_scale_reg=0.0,
            shift_only=False,
            *args,
            **kwargs):
    input_depth = x.shape.with_rank_at_least(1)[-1].value
    if input_depth is None:
        raise NotImplementedError(
            "Rightmost dimension must be known prior to graph execution.")
    input_shape = (np.int32(x.shape.as_list())
                   if x.shape.is_fully_defined() else tf.shape(x))
    for i, units in enumerate(hidden_layers):
        x = tfb.masked_dense(inputs=x,
                             units=units,
                             num_blocks=input_depth,
                             exclusive=True if i == 0 else False,
                             activation=activation,
                             *args,
                             **kwargs)
        if h is not None:
            x += tf.layers.dense(h, units, use_bias=False, *args, **kwargs)
        if dropout_rate > 0:
            x = tf.layers.dropout(x, dropout_rate, training=train)

    if shift_only:
        shift = tfb.masked_dense(inputs=x,
                                 units=input_depth,
                                 num_blocks=input_depth,
                                 activation=None,
                                 *args,
                                 **kwargs)
        return shift, None
    else:
        if log_scale_factor == 1.0 and log_scale_reg == 0.0 and not log_scale_clip_pre:
            x = tfb.masked_dense(inputs=x,
                                 units=2 * input_depth,
                                 num_blocks=input_depth,
                                 activation=None,
                                 *args,
                                 **kwargs)
            if h is not None:
                x += tf.layers.dense(h,
                                     2 * input_depth,
                                     use_bias=False,
                                     *args,
                                     **kwargs)
            x = tf.reshape(x, shape=tf.concat([input_shape, [2]], axis=0))
            shift, log_scale = tf.unstack(x, num=2, axis=-1)
        else:
            shift = tfb.masked_dense(inputs=x,
                                     units=input_depth,
                                     num_blocks=input_depth,
                                     activation=None,
                                     *args,
                                     **kwargs)
            if log_scale_reg > 0.0:
                regularizer = lambda w: log_scale_reg * 2.0 * tf.nn.l2_loss(w)
            else:
                regularizer = None
            log_scale = tfb.masked_dense(inputs=x,
                                         units=input_depth,
                                         num_blocks=input_depth,
                                         activation=None,
                                         use_bias=False,
                                         kernel_regularizer=regularizer,
                                         *args,
                                         **kwargs)
            log_scale *= log_scale_factor
            if log_scale_clip_pre:
                log_scale = log_scale_clip_pre * tf.nn.tanh(
                    log_scale / log_scale_clip_pre)
            log_scale += tf.get_variable("log_scale_bias", [1, input_depth],
                                         initializer=tf.zeros_initializer())
            if h is not None:
                shift += tf.layers.dense(h,
                                         input_depth,
                                         use_bias=False,
                                         *args,
                                         **kwargs)
                log_scale += tf.layers.dense(h,
                                             input_depth,
                                             use_bias=False,
                                             *args,
                                             **kwargs)

        if sigmoid_scale:
            log_scale = tf.log_sigmoid(log_scale)

        if log_scale_clip:
            log_scale = log_scale_clip * tf.nn.tanh(log_scale / log_scale_clip)

        return shift, log_scale
示例#9
0
  def _predict(self, image_features, proposal_boxes, **kwargs):
    """Computes encoded object locations and corresponding confidences.

    Args:
      image_features: A list of float tensors of shape [batch_size, height_i,
      width_i, channels_i] containing features for a batch of images.
      proposal_boxes: A float tensor of shape [batch_size, num_proposals,
        box_code_size].
      **kwargs: Unused Keyword args

    Returns:
      box_encodings: A list of float tensors of shape
        [batch_size, num_anchors_i, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes. Each entry in the
        list corresponds to a feature map in the input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.

    Raises:
      ValueError: if num_predictions_per_location is not 1 or if
        len(image_features) is not 1.
    """
    if len(image_features) != 1:
      raise ValueError('length of `image_features` must be 1. Found {}'.
                       format(len(image_features)))
    image_feature = image_features[0]
    batch_size = tf.shape(proposal_boxes)[0]
    num_boxes = tf.shape(proposal_boxes)[1]
    net = image_feature
    for layer in self._shared_conv_layers:
      net = layer(net)

    # Location predictions.
    box_net = net
    for layer in self._box_encoder_layers:
      box_net = layer(box_net)
    box_encodings = ops.batch_position_sensitive_crop_regions(
        box_net,
        boxes=proposal_boxes,
        crop_size=self._crop_size,
        num_spatial_bins=self._num_spatial_bins,
        global_pool=True)
    box_encodings = tf.squeeze(box_encodings, axis=[2, 3])
    box_encodings = tf.reshape(box_encodings,
                               [batch_size * num_boxes, 1, self.num_classes,
                                self._box_code_size])

    # Class predictions.
    class_net = net
    for layer in self._class_predictor_layers:
      class_net = layer(class_net)
    class_predictions_with_background = (
        ops.batch_position_sensitive_crop_regions(
            class_net,
            boxes=proposal_boxes,
            crop_size=self._crop_size,
            num_spatial_bins=self._num_spatial_bins,
            global_pool=True))
    class_predictions_with_background = tf.squeeze(
        class_predictions_with_background, axis=[2, 3])
    class_predictions_with_background = tf.reshape(
        class_predictions_with_background,
        [batch_size * num_boxes, 1, self._total_classes])

    return {BOX_ENCODINGS: [box_encodings],
            CLASS_PREDICTIONS_WITH_BACKGROUND:
            [class_predictions_with_background]}
示例#10
0
 def DecodeLabelAndImage(r):
     r = tf.decode_raw(r, tf.uint8)
     return tf.to_float(
         tf.transpose(tf.reshape(r[1:], [3, 32, 32]),
                      [1, 2, 0])) / 255.0, tf.to_int32(r[0])
示例#11
0
def EffectiveSampleSize(states,
                        filter_beyond_lag=300,
                        filter_threshold=0.05,
                        center=True,
                        normalize=True):
    """ESS computation for one single Tensor argument."""
    def _axis_size(x, axis=None):
        """Get number of elements of `x` in `axis`, as type `x.dtype`."""
        if axis is None:
            return tf.cast(tf.size(x), x.dtype)
        return tf.cast(tf.reduce_prod(tf.gather(tf.shape(x), axis)), x.dtype)

    with tf.name_scope("effective_sample_size_single_state",
                       values=[states, filter_beyond_lag, filter_threshold]):

        states = tf.convert_to_tensor(states, name="states")
        dt = states.dtype

        # filter_beyond_lag == None ==> auto_corr is the full sequence.
        auto_corr = SanitizedAutoCorrelation(states,
                                             axis=0,
                                             center=center,
                                             normalize=normalize,
                                             max_lags=filter_beyond_lag)
        auto_corr = tf.reduce_mean(auto_corr, 1)
        if filter_threshold is not None:
            filter_threshold = tf.convert_to_tensor(filter_threshold,
                                                    dtype=dt,
                                                    name="filter_threshold")
            # Get a binary mask to zero out values of auto_corr below the threshold.
            #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
            #   mask[i, ...] = 0, otherwise.
            # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
            # Building step by step,
            #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
            # Step 1:  mask = [False, False, True, False]
            mask = tf.abs(auto_corr) < filter_threshold
            # Step 2:  mask = [0, 0, 1, 1]
            mask = tf.cast(mask, dtype=dt)
            # Step 3:  mask = [0, 0, 1, 2]
            mask = tf.cumsum(mask, axis=0)
            # Step 4:  mask = [1, 1, 0, 0]
            mask = tf.maximum(1. - mask, 0.)
            auto_corr *= mask

        # With R[k] := auto_corr[k, ...],
        # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]}
        #     = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1)
        #     approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]}
        # where M is the filter_beyond_lag truncation point chosen above.

        # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
        # ndims the same as auto_corr
        n = _axis_size(states, axis=0)
        k = tf.range(0., _axis_size(auto_corr, axis=0))
        nk_factor = (n - k) / n
        if auto_corr.shape.ndims is not None:
            new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1)
        else:
            new_shape = tf.concat(
                ([-1], tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)),
                axis=0)
        nk_factor = tf.reshape(nk_factor, new_shape)

        #return tf.reduce_mean(n / (-1 + 2 * tf.reduce_sum(nk_factor * auto_corr, axis=0)), 0)
        return n / (1.0 + 2 * tf.reduce_sum(
            nk_factor[1:, Ellipsis] * auto_corr[1:, Ellipsis], axis=0))
示例#12
0
 def DecodeLabel(label):
     label = tf.decode_raw(label, tf.uint8)
     label = tf.reshape(label, [])
     return tf.to_int32(label)
示例#13
0
 def DecodeImage(image):
     image = tf.decode_raw(image, tf.uint8)
     image = tf.cast(image, tf.float32)
     image = tf.reshape(image, [28, 28, 1])
     return image / 255.0
示例#14
0
        if activation == None:
            result = layer
        else:
            result = activation(layer, name='layer')
    return result


def CalcMistake(labels, logits):
    return tf.abs(tf.subtract(labels, logits))

with tf.name_scope('Input'):
    xs = tf.placeholder(tf.float32, [None, stepSize, inputSize], name='inputX')
    ys = tf.placeholder(tf.float32, [None, stepSize, inputSize], name='inputY')

with tf.name_scope('Layer01'):
    input01 = tf.reshape(xs, [-1, inputSize], name='dim2Input01')
    weight01 = GetWeight([inputSize, hiddenSize])
    bias01 = GetBias([hiddenSize])
    layer01 = GetLayer(input01, weight01, bias01, nLayer=1)

with tf.name_scope('RnnLayer'):
    input02 = tf.reshape(layer01, [-1, stepSize, hiddenSize], name='dim3Input02')
    rnnFrame = tf.nn.rnn_cell.BasicLSTMCell(hiddenSize, forget_bias=1.0, state_is_tuple=True)
    theState = rnnFrame.zero_state(batch_size=batchSize, dtype=tf.float32)
    outputs, finalState = tf.nn.dynamic_rnn(rnnFrame, input02, initial_state=theState, time_major=False)

with tf.name_scope('Layer02'):
    input03 = tf.reshape(outputs, [-1, hiddenSize], name='dim2Input03')
    weight02 = GetWeight([hiddenSize, outputSize])
    bias02 = GetBias([outputSize])
    prediction = GetLayer(input03, weight02, bias02, nLayer=2)
                      [None, TIME_STEP, INPUT_SIZE])  # shape(batch, 5, 1)
tf_y = tf.placeholder(tf.float32, [None, TIME_STEP, INPUT_SIZE])  # input y

# RNN
rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=CELL_SIZE)
init_s = rnn_cell.zero_state(batch_size=BATCH_SIZE,
                             dtype=tf.float32)  # very first hidden state
outputs, final_s = tf.nn.dynamic_rnn(
    rnn_cell,  # cell you have chosen
    tf_x,  # input
    initial_state=init_s,  # the initial hidden state
    time_major=
    False,  # False: (batch, time step, input); True: (time step, batch, input)
)
outs2D = tf.reshape(
    outputs,
    [-1, CELL_SIZE])  # reshape 3D output to 2D for fully connected layer
net_outs2D = tf.layers.dense(outs2D, INPUT_SIZE)
outs = tf.reshape(net_outs2D,
                  [-1, TIME_STEP, INPUT_SIZE])  # reshape back to 3D

loss = tf.losses.mean_squared_error(labels=tf_y,
                                    predictions=outs)  # compute cost
train_op = tf.train.AdamOptimizer(LR).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())  # initialize var in graph

plt.figure(1, figsize=(12, 5))
plt.ion()  # continuously plot
示例#16
0
    def test_reshape(self):
        input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32)
        output = tf.reshape(input, shape=(4, 32 * 32 * 3))

        self._test_conversion('reshape')
示例#17
0
def loss(model, cartpoleUtil, t_interior, X_interior, t_terminal, X_terminal):
    ''' Compute total loss for training.
    
    Args:
        model:      DGM model object
        t_interior: sampled time points in the interior of the function's domain
        X_interior: sampled space points in the interior of the function's domain
        t_terminal: sampled time points at terminal point (vector of terminal times)
        X_terminal: sampled space points at terminal time
    '''

    # Loss term #1: PDE
    # compute function value and derivatives at current sampled points
    # \frac{\partial u}{\partial t}(t, x) + \Delta u(t, x) - \lambda \| \nabla u(t, x) \|^2 = 0
    # => V_t + V_xx - lambda * L2_norm(V_x)^2
    matmul, multiply, rowsum = getTFUtils()

    V = model(t_interior, X_interior)
    V_t = tf.gradients(V, t_interior)[0]

    print('V_t=%s' % V_t)

    # f = phi1 + phi2
    const = tf.constant
    print('X_interior=%s' % X_interior)
    phi1 = tf.constant(0.5) * quadraticForm(X_interior, Q)
    A, B = cartpoleUtil.f(X_interior)

    print('A=%s' % A)
    print('B=%s' % B)

    V_x = tf.gradients(V, X_interior)[0]

    print('V_x=%s' % V_x)
    Bt_gradV = BtXgradV(B, V_x)
    print('Bt_gradV=%s' % Bt_gradV)
    phi2 = const(0.5) * tf.square(Bt_gradV) / const(R * 1.0)
    print('phi1=%s' % phi1)
    print('phi2=%s' % phi2)
    f = phi1 + phi2

    # mu^T
    uinput = const(1.0 / R) * Bt_gradV
    inputs = tf.repeat(tf.reshape(uinput, (-1, 1)), repeats=D, axis=1)
    print('inputs=%s' % inputs)
    mu_t = A - multiply(inputs, B)

    V_xx = tf.gradients(V_x, X_interior)[0]

    print('V_t=%s' % V_t)
    print('f=%s' % f)
    print('mu_t=%s' % mu_t)
    print('V_x=%s' % V_x)
    print('snoise=%s' % snoise)
    print('V_xx=%s' % V_xx)

    mul = rowsum(multiply(mu_t, V_x))
    diff_V = V_t + f + mul + 0.5 * (snoise**2) * tf.linalg.trace(V_xx)

    # compute average L2-norm of differential operator
    L1 = tf.reduce_mean(tf.square(diff_V))

    # Loss term #2: boundary condition
    # no boundary condition for this problem

    # Loss term #3: initial/terminal condition
    target_terminal = u(X_terminal)
    fitted_terminal = model(t_terminal, X_terminal)

    L3 = tf.reduce_mean(tf.square(fitted_terminal - target_terminal))

    return L1, L3
示例#18
0
    def test_flatten(self):
        input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32)
        output = tf.reshape(input, shape=(4, -1))

        self._test_conversion('flatten')
示例#19
0
def add_metric_fn_inputs(params,
                         cls_outputs,
                         box_outputs,
                         metric_fn_inputs,
                         max_detection_points=anchors.MAX_DETECTION_POINTS):
  """Selects top-k predictions and adds the selected to metric_fn_inputs.

  Args:
    params: a parameter dictionary that includes `min_level`, `max_level`,
      `batch_size`, and `num_classes`.
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4].
    metric_fn_inputs: a dictionary that will hold the top-k selections.
    max_detection_points: an integer specifing the maximum detection points to
      keep before NMS. Keep all anchors if max_detection_points <= 0.
  """
  batch_size = params['batch_size']
  num_classes = params['num_classes']
  cls_outputs_all = []
  box_outputs_all = []
  # Concatenates class and box of all levels into one tensor.
  for level in range(params['min_level'], params['max_level'] + 1):
    if params['data_format'] == 'channels_first':
      cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1])
      box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1])

    cls_outputs_all.append(tf.reshape(
        cls_outputs[level], [batch_size, -1, num_classes]))
    box_outputs_all.append(tf.reshape(box_outputs[level], [batch_size, -1, 4]))
  cls_outputs_all = tf.concat(cls_outputs_all, 1)
  box_outputs_all = tf.concat(box_outputs_all, 1)

  if max_detection_points > 0:
    # Prune anchors and detections to only keep max_detection_points.
    # Due to some issues, top_k is currently slow in graph model.
    cls_outputs_all_reshape = tf.reshape(cls_outputs_all, [batch_size, -1])
    _, cls_topk_indices = tf.math.top_k(cls_outputs_all_reshape,
                                        k=max_detection_points,
                                        sorted=False)
    indices = cls_topk_indices // num_classes
    classes = cls_topk_indices % num_classes
    cls_indices = tf.stack([indices, classes], axis=2)
    cls_outputs_all_after_topk = tf.gather_nd(
        cls_outputs_all, cls_indices, batch_dims=1)
    box_outputs_all_after_topk = tf.gather_nd(
        box_outputs_all, tf.expand_dims(indices, 2), batch_dims=1)
  else:
    # Keep all anchors, but for each anchor, just keep the max probablity for
    # each class.
    cls_outputs_idx = tf.math.argmax(cls_outputs_all, axis=-1)
    num_anchors = cls_outputs_all.shape[1]

    classes = cls_outputs_idx
    indices = tf.tile(tf.expand_dims(tf.range(num_anchors), axis=0),
                      [batch_size, 1])
    cls_outputs_all_after_topk = tf.reduce_max(cls_outputs_all, -1)
    box_outputs_all_after_topk = box_outputs_all

  metric_fn_inputs['cls_outputs_all'] = cls_outputs_all_after_topk
  metric_fn_inputs['box_outputs_all'] = box_outputs_all_after_topk
  metric_fn_inputs['indices_all'] = indices
  metric_fn_inputs['classes_all'] = classes
    def __call__(self,
                 images_saccader,
                 images_classnet,
                 num_times,
                 is_training_saccader=False,
                 is_training_classnet=False,
                 policy="learned",
                 stop_gradient_after_representation=False):

        logits, locations_t, best_locations_t, endpoints = Saccader.__call__(
            self,
            images_saccader,
            num_times,
            is_training=is_training_saccader,
            policy=policy,
            stop_gradient_after_representation=
            stop_gradient_after_representation)

        self.glimpse_shape_saccader = self.glimpse_shape
        image_size_saccader = images_saccader.shape.as_list()[1]
        image_size_classnet = images_classnet.shape.as_list()[1]
        if self.glimpse_shape_classnet[0] < 0:
            self.glimpse_shape_classnet = tuple([
                int(image_size_classnet / image_size_saccader *
                    self.glimpse_shape[0])
            ] * 2)
        self.glimpse_shape = self.glimpse_shape_classnet

        images_glimpse_t = []
        for locations in locations_t:
            images_glimpse = utils.extract_glimpse(
                images_classnet,
                size=self.glimpse_shape_classnet,
                offsets=locations)
            images_glimpse_t.append(images_glimpse)

        batch_size = tf.shape(images_classnet)[0]
        images_glimpse_t = tf.concat(images_glimpse_t, axis=0)

        variables_before = set(tf.global_variables())
        reuse = True if self.var_list_classnet else False
        with tf.variable_scope(self.variable_scope_classnet, reuse=reuse):
            if self.classnet_type == "nasnet":
                classnet_config = nasnet.large_imagenet_config()
                classnet_config.use_aux_head = 0
                classnet_config.drop_path_keep_prob = 1.0
                with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
                    classnet_logits, endpoints_ = nasnet.build_nasnet_large(
                        images_glimpse_t,
                        self.num_classes,
                        is_training=is_training_classnet,
                        config=classnet_config)
            elif self.classnet_type == "resnet_v2_50":
                network = nets_factory.get_network_fn(
                    "resnet_v2_50",
                    self.num_classes,
                    is_training=is_training_classnet)
                classnet_logits, endpoints_ = network(images_glimpse_t)

        endpoints["classnet"] = endpoints_
        variables_after = set(tf.global_variables())
        logits_t = tf.reshape(classnet_logits, (num_times, batch_size, -1))
        logits = tf.reduce_mean(logits_t, axis=0)
        if not reuse:
            self.var_list_saccader = self.var_list_classification + self.var_list_location
            self.var_list_classnet = [
                v for v in list(variables_after - variables_before)
                if "global_step" not in v.op.name
            ]
            self.var_list.extend(self.var_list_classnet)
            self.init_op = tf.variables_initializer(var_list=self.var_list)

        return logits, locations_t, best_locations_t, endpoints
示例#21
0
def tf_nn(nx, nt, num_hidden_neurons, activations, num_iter=100000, eta=0.01):
    tf.reset_default_graph()

    # Set a seed to ensure getting the same results from every run
    tf.set_random_seed(4155)
    nx = 10
    nt = 10

    x_np = np.linspace(0, 1, nx)
    t_np = np.linspace(0, 1, nt)

    X, T = np.meshgrid(x_np, t_np)

    x = X.ravel()
    t = T.ravel()

    ## The construction phase
    zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1))
    x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1))
    t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1))

    pts = tf.concat([x, t], 1)  # input layer
    num_hidden_layers = len(num_hidden_neurons)

    X = tf.convert_to_tensor(X)
    T = tf.convert_to_tensor(T)

    # Define layer structure
    with tf.name_scope('dnn'):
        num_hidden_layers = np.size(num_hidden_neurons)
        previous_layer = pts

        for l in range(num_hidden_layers):
            current_layer = tf.layers.dense(previous_layer,
                                            num_hidden_neurons[l],
                                            name=('hidden%d' % (l + 1)),
                                            activation=activations[l])

            previous_layer = current_layer

        dnn_output = tf.layers.dense(previous_layer,
                                     1,
                                     name='output',
                                     activation=None)

    # Define loss function
    # trial function satisfies boundary conditions and initial condition
    with tf.name_scope('loss'):
        g_t = (1 - t) * u(x) + x * (1 - x) * t * dnn_output
        g_t_d2x = tf.gradients(tf.gradients(g_t, x), x)
        g_t_dt = tf.gradients(g_t, t)
        loss = tf.losses.mean_squared_error(zeros, g_t_dt[0] - g_t_d2x[0])

    # Define optimizer
    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(eta)
        training_op = optimizer.minimize(loss)

    init = tf.global_variables_initializer()

    g_e = u_e(x, t)
    # g_dnn = None

    with tf.Session() as sess:
        init.run()
        for i in range(num_iter):
            sess.run(training_op)

            if i % 1000 == 0:
                print(loss.eval())
                # g_e = g_e.eval()
                # g_dnn = g_t.eval()
            #
            #     plot_g_e = g_e.eval().reshape((nt, nx))
            #     plot_g_dnn = g_t.eval().reshape((nt, nx))
            #
            #     plt.plot(x_np, plot_g_e[int(nt/2), :])
            #     plt.plot(x_np, plot_g_dnn[int(nt/2), :])
            #     plt.axis([0,1,0,0.1])
            #     plt.pause(0.001)
            #     plt.clf()

        g_e = g_e.eval()  # analytical solution
        g_dnn = g_t.eval()  # NN solution

    diff = np.abs(g_e - g_dnn)
    print(
        'Max absolute difference between analytical solution and TensorFlow DNN ',
        np.max(diff))

    G_e = g_e.reshape((nt, nx))
    G_dnn = g_dnn.reshape((nt, nx))
    diff = diff.reshape((nt, nx))

    # Plot the results
    X, T = np.meshgrid(x_np, t_np)

    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    ax.set_title('Solution from the deep neural network w/ %d layer' %
                 len(num_hidden_neurons))
    s = ax.plot_surface(X,
                        T,
                        G_dnn,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_ylabel('Time $t$')
    ax.set_xlabel('Position $x$')

    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    ax.set_title('Analytical solution')
    s = ax.plot_surface(X,
                        T,
                        G_e,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_ylabel('Time $t$')
    ax.set_xlabel('Position $x$')

    fig = plt.figure(figsize=(10, 10))
    ax = fig.gca(projection='3d')
    ax.set_title('Difference')
    s = ax.plot_surface(X,
                        T,
                        diff,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_ylabel('Time $t$')
    ax.set_xlabel('Position $x$')

    ## Take some 3D slices
    indx1 = 0
    indx2 = int(nt / 2)
    indx3 = nt - 1

    t1 = t_np[indx1]
    t2 = t_np[indx2]
    t3 = t_np[indx3]

    # Slice the results from the DNN
    res1 = G_dnn[indx1, :]
    res2 = G_dnn[indx2, :]
    res3 = G_dnn[indx3, :]

    # Slice the analytical results
    res_analytical1 = G_e[indx1, :]
    res_analytical2 = G_e[indx2, :]
    res_analytical3 = G_e[indx3, :]

    # Plot the slices
    plt.figure(figsize=(10, 10))
    plt.title("Computed solutions at time = %g" % t1)
    plt.plot(x_np, res1)
    plt.plot(x_np, res_analytical1)
    plt.legend(['dnn', 'analytical'])

    plt.figure(figsize=(10, 10))
    plt.title("Computed solutions at time = %g" % t2)
    plt.plot(x_np, res2)
    plt.plot(x_np, res_analytical2)
    plt.legend(['dnn', 'analytical'])

    plt.figure(figsize=(10, 10))
    plt.title("Computed solutions at time = %g" % t3)
    plt.plot(x_np, res3)
    plt.plot(x_np, res_analytical3)
    plt.legend(['dnn', 'analytical'])

    plt.show()

    return diff
示例#22
0
    def test_padded_image_result_dict(self):

        input_data_fields = fields.InputDataFields
        detection_fields = fields.DetectionResultFields
        key = tf.constant([str(i) for i in range(2)])

        detection_boxes = np.array(
            [[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]], dtype=np.float32)
        detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
                                       dtype=np.float32)
        detections = {
            detection_fields.detection_boxes:
            tf.constant(detection_boxes),
            detection_fields.detection_scores:
            tf.constant([[1.], [1.]]),
            detection_fields.detection_classes:
            tf.constant([[1], [2]]),
            detection_fields.num_detections:
            tf.constant([1, 1]),
            detection_fields.detection_keypoints:
            tf.tile(tf.reshape(tf.constant(detection_keypoints),
                               shape=[1, 1, 3, 2]),
                    multiples=[2, 1, 1, 1])
        }

        gt_boxes = detection_boxes
        groundtruth = {
            input_data_fields.groundtruth_boxes:
            tf.constant(gt_boxes),
            input_data_fields.groundtruth_classes:
            tf.constant([[1.], [1.]]),
            input_data_fields.groundtruth_keypoints:
            tf.tile(tf.reshape(tf.constant(detection_keypoints),
                               shape=[1, 1, 3, 2]),
                    multiples=[2, 1, 1, 1])
        }

        image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)

        true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
        original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])

        result = eval_util.result_dict_for_batched_example(
            image,
            key,
            detections,
            groundtruth,
            scale_to_absolute=True,
            true_image_shapes=true_image_shapes,
            original_image_spatial_shapes=original_image_spatial_shapes,
            max_gt_boxes=tf.constant(1))

        with self.test_session() as sess:
            result = sess.run(result)
            self.assertAllEqual(
                [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
                result[input_data_fields.groundtruth_boxes])
            self.assertAllClose(
                [[[[0., 0.], [100., 100.], [200., 200.]]],
                 [[[0., 0.], [150., 150.], [300., 300.]]]],
                result[input_data_fields.groundtruth_keypoints])

            # Predictions from the model are not scaled.
            self.assertAllEqual(
                [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
                result[detection_fields.detection_boxes])
            self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
                                 [[[0., 0.], [75., 150.], [150., 300.]]]],
                                result[detection_fields.detection_keypoints])
def multilevel_roi_align(features, boxes, box_levels, output_size,
                         num_samples_per_cell_y=1, num_samples_per_cell_x=1,
                         align_corners=False, extrapolation_value=0.0,
                         scope=None):
  """Applies RoI Align op and returns feature for boxes.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, this function selectively crops
  and resizes boxes from the corresponding feature maps.

  We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf
  figure 3. Specifically, each box is subdivided uniformly into a grid
  consisting of output_size[0] x output_size[1] rectangular cells. Within each
  cell we select `num_points` points uniformly and compute feature values using
  bilinear interpolation. Finally, we average pool the interpolated values in
  each cell to obtain a [output_size[0], output_size[1], channels] feature.

  If `align_corners` is true, sampling points are uniformly spread such that
  corner points exactly overlap corners of the boxes.

  In this function we also follow the convention of treating feature pixels as
  point objects with no spatial extent.

  Args:
    features: A list of 4D float tensors of shape [batch_size, max_height,
      max_width, channels] containing features. Note that each feature map must
      have the same number of channels.
    boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing
      boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates.
    box_levels: A 3D int32 tensor of shape [batch_size, num_boxes]
      representing the feature level index for each box.
    output_size: An list of two integers [size_y, size_x] indicating the output
      feature size for each box.
    num_samples_per_cell_y: Number of grid points to sample along y axis in each
      cell.
    num_samples_per_cell_x: Number of grid points to sample along x axis in each
      cell.
    align_corners: Whether to align the corner grid points exactly with box
      corners.
    extrapolation_value: a float value to use for extrapolation.
    scope: Scope name to use for this op.

  Returns:
    A 5D float tensor of shape [batch_size, num_boxes, output_size[0],
    output_size[1], channels] representing the cropped features.
  """
  with tf.name_scope(scope, 'MultiLevelRoIAlign'):
    features, true_feature_shapes = pad_to_max_size(features)
    batch_size = tf.shape(features)[0]
    num_levels = features.get_shape().as_list()[1]
    max_feature_height = tf.shape(features)[2]
    max_feature_width = tf.shape(features)[3]
    num_filters = features.get_shape().as_list()[4]
    num_boxes = tf.shape(boxes)[1]

    # Convert boxes to absolute co-ordinates.
    true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype)
    true_feature_shapes = tf.gather(true_feature_shapes, box_levels)
    boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1)

    size_y = output_size[0] * num_samples_per_cell_y
    size_x = output_size[1] * num_samples_per_cell_x
    box_grid_y, box_grid_x = box_grid_coordinate_vectors(
        boxes, size_y=size_y, size_x=size_x, align_corners=align_corners)
    (feature_grid_y0, feature_grid_x0, feature_grid_y1,
     feature_grid_x1) = feature_grid_coordinate_vectors(box_grid_y, box_grid_x)
    feature_grid_y = tf.reshape(
        tf.stack([feature_grid_y0, feature_grid_y1], axis=3),
        [batch_size, num_boxes, -1])
    feature_grid_x = tf.reshape(
        tf.stack([feature_grid_x0, feature_grid_x1], axis=3),
        [batch_size, num_boxes, -1])
    feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x,
                                        num_levels, max_feature_height,
                                        max_feature_width, box_levels)
    valid_indices = _valid_indicator(feature_grid_y, feature_grid_x,
                                     true_feature_shapes)
    feature_coordinates = tf.where(valid_indices, feature_coordinates,
                                   -1 * tf.ones_like(feature_coordinates))
    flattened_features = tf.reshape(features, [-1, num_filters])
    flattened_feature_values = _gather_valid_indices(flattened_features,
                                                     feature_coordinates,
                                                     extrapolation_value)
    features_per_box = tf.reshape(
        flattened_feature_values,
        [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters])

    # Cast tensors into dtype of features.
    box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype)
    box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype)
    feature_grid_y0 = tf.cast(feature_grid_y0, dtype=features_per_box.dtype)
    feature_grid_x0 = tf.cast(feature_grid_x0, dtype=features_per_box.dtype)

    # RoI Align operation is a bilinear interpolation of four
    # neighboring feature points f0, f1, f2, and f3 onto point y, x given by
    # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
    #                       [f10, f11]]
    #
    # Unrolling the matrix multiplies gives us:
    # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11
    # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11
    #
    # This can be computed by applying pointwise multiplication and sum_pool in
    # a 2x2 window.
    ly = box_grid_y - feature_grid_y0
    lx = box_grid_x - feature_grid_x0
    hy = 1.0 - ly
    hx = 1.0 - lx

    kernel_y = tf.reshape(
        tf.stack([hy, ly], axis=3), [batch_size, num_boxes, size_y * 2, 1])

    kernel_x = tf.reshape(
        tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, size_x * 2])

    # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool.
    interpolation_kernel = kernel_y * kernel_x * 4

    # Interpolate the gathered features with computed interpolation kernels.
    features_per_box *= tf.expand_dims(interpolation_kernel, axis=4),
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters])

    # This combines the two pooling operations - sum_pool to perform bilinear
    # interpolation and avg_pool to pool the values in each bin.
    features_per_box = tf.nn.avg_pool(
        features_per_box,
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1],
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], 'VALID')
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size, num_boxes, output_size[0], output_size[1], num_filters])

    return features_per_box
#function to declare easily the bias only by shape
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


#input variable
x = tf.placeholder(tf.float32, [None, vectorSize])
#keep probability to change from dropout 0.50 to 1.0 in validation and test
keep_prob = tf.placeholder(tf.float32)
#expected outputs variable
y_ = tf.placeholder(tf.float32, [None, labelSize])

#arrange the tensor as an image (1*31029) 1 channel
x_image0 = tf.reshape(x, [-1, 1, vectorSize, 1])
x_image = tf.transpose(x_image0, perm=[0, 3, 2, 1])
#arrange the tensor into 1 channels (1*31029)

#1 LAYER*************************************************************************************
#1 Convolutional Layer Explicit for regularization of the weights
#weigth first layer 1 input channels, 12 output channels, 1x21 filter window size
W_conv1 = weight_variable([1, wd1, 1, w1])
#bias declaration the size has to be the same as the output channels 12
b_conv1 = bias_variable([w1])
#convolution (input weights) moving 1 step each time with a relu
h_conv1 = tf.nn.relu(
    tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') +
    b_conv1)
#max pooling with a 148 width window size, moving 148 in width by step
h_pool1 = tf.nn.max_pool(h_conv1,
示例#25
0
    def define_loss(self, features, outputs):
        """Obtain the loss of the model."""
        # Intents.
        # Shape: (batch_size, max_num_intents + 1).
        intent_logits = outputs["logit_intent_status"]
        # Shape: (batch_size, max_num_intents).
        intent_labels = features["intent_status"]
        # Add label corresponding to NONE intent.
        num_active_intents = tf.expand_dims(tf.reduce_sum(intent_labels,
                                                          axis=1),
                                            axis=1)
        none_intent_label = tf.ones_like(
            num_active_intents) - num_active_intents
        # Shape: (batch_size, max_num_intents + 1).
        onehot_intent_labels = tf.concat([none_intent_label, intent_labels],
                                         axis=1)
        intent_loss = tf.losses.softmax_cross_entropy(
            onehot_intent_labels,
            intent_logits,
            weights=features["is_real_example"])

        # Requested slots.
        # Shape: (batch_size, max_num_slots).
        requested_slot_logits = outputs["logit_req_slot_status"]
        requested_slot_labels = features["req_slot_status"]
        max_num_requested_slots = requested_slot_labels.get_shape().as_list(
        )[-1]
        weights = tf.sequence_mask(features["req_slot_num"],
                                   maxlen=max_num_requested_slots)
        # Sigmoid cross entropy is used because more than one slots can be requested
        # in a single utterance.
        requested_slot_loss = tf.losses.sigmoid_cross_entropy(
            requested_slot_labels, requested_slot_logits, weights=weights)

        # Categorical slot status.
        # Shape: (batch_size, max_num_cat_slots, 3).
        cat_slot_status_logits = outputs["logit_cat_slot_status"]
        cat_slot_status_labels = features["cat_slot_status"]
        max_num_cat_slots = cat_slot_status_labels.get_shape().as_list()[-1]
        one_hot_labels = tf.one_hot(cat_slot_status_labels, 3, dtype=tf.int32)
        cat_weights = tf.sequence_mask(features["cat_slot_num"],
                                       maxlen=max_num_cat_slots,
                                       dtype=tf.float32)
        cat_slot_status_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, 3]),
            tf.reshape(cat_slot_status_logits, [-1, 3]),
            weights=tf.reshape(cat_weights, [-1]))

        # Categorical slot values.
        # Shape: (batch_size, max_num_cat_slots, max_num_slot_values).
        cat_slot_value_logits = outputs["logit_cat_slot_value"]
        cat_slot_value_labels = features["cat_slot_value"]
        max_num_slot_values = cat_slot_value_logits.get_shape().as_list()[-1]
        one_hot_labels = tf.one_hot(cat_slot_value_labels,
                                    max_num_slot_values,
                                    dtype=tf.int32)
        # Zero out losses for categorical slot value when the slot status is not
        # active.
        cat_loss_weight = tf.cast(
            tf.equal(cat_slot_status_labels, data_utils.STATUS_ACTIVE),
            tf.float32)
        cat_slot_value_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, max_num_slot_values]),
            tf.reshape(cat_slot_value_logits, [-1, max_num_slot_values]),
            weights=tf.reshape(cat_weights * cat_loss_weight, [-1]))

        # Non-categorical slot status.
        # Shape: (batch_size, max_num_noncat_slots, 3).
        noncat_slot_status_logits = outputs["logit_noncat_slot_status"]
        noncat_slot_status_labels = features["noncat_slot_status"]
        max_num_noncat_slots = noncat_slot_status_labels.get_shape().as_list(
        )[-1]
        one_hot_labels = tf.one_hot(noncat_slot_status_labels,
                                    3,
                                    dtype=tf.int32)
        noncat_weights = tf.sequence_mask(features["noncat_slot_num"],
                                          maxlen=max_num_noncat_slots,
                                          dtype=tf.float32)
        # Logits for padded (invalid) values are already masked.
        noncat_slot_status_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, 3]),
            tf.reshape(noncat_slot_status_logits, [-1, 3]),
            weights=tf.reshape(noncat_weights, [-1]))

        # Non-categorical slot spans.
        # Shape: (batch_size, max_num_noncat_slots, max_num_tokens).
        span_start_logits = outputs["logit_noncat_slot_start"]
        span_start_labels = features["noncat_slot_value_start"]
        max_num_tokens = span_start_logits.get_shape().as_list()[-1]
        onehot_start_labels = tf.one_hot(span_start_labels,
                                         max_num_tokens,
                                         dtype=tf.int32)
        # Shape: (batch_size, max_num_noncat_slots, max_num_tokens).
        span_end_logits = outputs["logit_noncat_slot_end"]
        span_end_labels = features["noncat_slot_value_end"]
        onehot_end_labels = tf.one_hot(span_end_labels,
                                       max_num_tokens,
                                       dtype=tf.int32)
        # Zero out losses for non-categorical slot spans when the slot status is not
        # active.
        noncat_loss_weight = tf.cast(
            tf.equal(noncat_slot_status_labels, data_utils.STATUS_ACTIVE),
            tf.float32)
        span_start_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(onehot_start_labels, [-1, max_num_tokens]),
            tf.reshape(span_start_logits, [-1, max_num_tokens]),
            weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1]))
        span_end_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(onehot_end_labels, [-1, max_num_tokens]),
            tf.reshape(span_end_logits, [-1, max_num_tokens]),
            weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1]))

        losses = {
            "intent_loss": intent_loss,
            "requested_slot_loss": requested_slot_loss,
            "cat_slot_status_loss": cat_slot_status_loss,
            "cat_slot_value_loss": cat_slot_value_loss,
            "noncat_slot_status_loss": noncat_slot_status_loss,
            "span_start_loss": span_start_loss,
            "span_end_loss": span_end_loss,
        }
        for loss_name, loss in losses.items():
            tf.summary.scalar(loss_name, loss)
        return sum(losses.values()) / len(losses)
示例#26
0
 def decode_image(image):
     # Normalize from [0, 255] to [0.0, 1.0]
     image = tf.decode_raw(image, tf.uint8)
     image = tf.cast(image, tf.float32)
     image = tf.reshape(image, [784])
     return image / 255.0
示例#27
0
#delY formation
del_Y = -tf.divide(Y, Y_pred)

#del_W3 Calculation
#delz3_delW3 formation
delz3_delW3_elem = tf.concat([H2, tf.zeros_like(H2)], 1)
for k in range(8):
    delz3_delW3_elem = tf.concat([delz3_delW3_elem, tf.zeros_like(H2)], 1)
delz3_delW3_list = []
for k in range(10):
    delz3_delW3_list.append(tf.roll(delz3_delW3_elem, k, 1))
delz3_delW3 = tf.stack(delz3_delW3_list, 2)

#dely_delz3 formation
temp = -tf.matmul(Y_pred, Y_pred, transpose_b=True)
temp_diag = tf.reshape(tf.diag(Y_pred), [Y_pred.shape[0], Y_pred.shape[0]])
dely_delz3 = tf.add(temp, temp_diag)

tempz3 = tf.matmul(dely_delz3, del_Y)
del_W3 = tf.reshape(tf.matmul(delz3_delW3, tempz3), W3.shape)

#del_H2 Calculation
del_H2 = tf.reshape(tf.matmul(W3, tempz3), H2.shape)

#del_W3_0 Calculation
del_W3_0 = tempz3

#%-----------Backward Propagation for Second Hidden Layer---------------

#del_W2 Calculation
示例#28
0
 def decode_label(label):
     label = tf.decode_raw(label, tf.uint8)  # tf.string -> [tf.uint8]
     label = tf.reshape(label, [])  # label is a scalar
     return tf.to_int32(label)
示例#29
0
    def _build_loss(self):
        """Builds the loss tensor, to be minimized by the optimizer."""
        self.reader = reader.DataReader(self.data_dir,
                                        self.batch_size,
                                        self.img_height,
                                        self.img_width,
                                        SEQ_LENGTH,
                                        1,
                                        self.file_extension,
                                        self.random_scale_crop,
                                        reader.FLIP_RANDOM,
                                        self.random_color,
                                        self.imagenet_norm,
                                        self.shuffle,
                                        self.input_file,
                                        queue_size=self.queue_size)

        (self.image_stack, self.image_stack_norm, self.seg_stack,
         self.intrinsic_mat, _) = self.reader.read_data()
        if self.learn_intrinsics:
            self.intrinsic_mat = None
        if self.intrinsic_mat is None and not self.learn_intrinsics:
            raise RuntimeError(
                'Could not read intrinsic matrix. Turn learn_intrinsics on to learn it instead of loading it.'
            )
        self.export('self.image_stack', self.image_stack)

        object_masks = []
        for i in range(self.batch_size):
            object_ids = tf.unique(tf.reshape(self.seg_stack[i], [-1]))[0]
            object_masks_i = []
            for j in range(SEQ_LENGTH):
                current_seg = self.seg_stack[i, :, :, j * 3]  # (H, W)

                def process_obj_mask(obj_id):
                    """Create a mask for obj_id, skipping the background mask."""
                    mask = tf.logical_and(
                        tf.equal(current_seg, obj_id),  # pylint: disable=cell-var-from-loop
                        tf.not_equal(tf.cast(0, tf.uint8), obj_id))
                    # Leave out vert small masks, that are most often errors.
                    size = tf.reduce_sum(tf.to_int32(mask))
                    mask = tf.logical_and(mask,
                                          tf.greater(size, MIN_OBJECT_AREA))
                    if not self.boxify:
                        return mask
                    # Complete the mask to its bounding box.
                    binary_obj_masks_y = tf.reduce_any(mask,
                                                       axis=1,
                                                       keepdims=True)
                    binary_obj_masks_x = tf.reduce_any(mask,
                                                       axis=0,
                                                       keepdims=True)
                    return tf.logical_and(binary_obj_masks_y,
                                          binary_obj_masks_x)

                object_mask = tf.map_fn(  # (N, H, W)
                    process_obj_mask, object_ids, dtype=tf.bool)
                object_mask = tf.reduce_any(object_mask, axis=0)
                object_masks_i.append(object_mask)
            object_masks.append(tf.stack(object_masks_i, axis=-1))

        self.seg_stack = tf.to_float(tf.stack(object_masks, axis=0))
        tf.summary.image('Masks', self.seg_stack)

        with tf.variable_scope(DEPTH_SCOPE):
            # Organized by ...[i][scale].  Note that the order is flipped in variables in build_loss() below.
            self.disp = {}
            self.depth = {}

            # Parabolic rampup of he noise over LAYER_NORM_NOISE_RAMPUP_STEPS steps.
            # We stop at 0.5 because this is the value above which the multiplicative
            # noise we use can become negative. Further experimentation is needed to
            # find if non-negativity is indeed needed.
            noise_stddev = 0.5 * tf.square(
                tf.minimum(
                    tf.to_float(self.global_step) /
                    float(LAYER_NORM_NOISE_RAMPUP_STEPS), 1.0))

            def _normalizer_fn(x, is_train, name='bn'):
                return randomized_layer_normalization.normalize(
                    x, is_train=is_train, name=name, stddev=noise_stddev)

            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                for i in range(SEQ_LENGTH):
                    image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)]
                    self.depth[
                        i] = depth_prediction_net.depth_prediction_resnet18unet(
                            image, True, self.weight_reg, _normalizer_fn)
                    self.disp[i] = 1.0 / self.depth[i]

        with tf.name_scope('compute_loss'):
            self.reconstr_loss = 0
            self.smooth_loss = 0
            self.ssim_loss = 0
            self.depth_consistency_loss = 0

            # Smoothness.
            if self.smooth_weight > 0:
                for i in range(SEQ_LENGTH):
                    disp_smoothing = self.disp[i]
                    # Perform depth normalization, dividing by the mean.
                    mean_disp = tf.reduce_mean(disp_smoothing,
                                               axis=[1, 2, 3],
                                               keep_dims=True)
                    disp_input = disp_smoothing / mean_disp
                    self.smooth_loss += _depth_smoothness(
                        disp_input, self.image_stack[:, :, :,
                                                     3 * i:3 * (i + 1)])

            self.rot_loss = 0.0
            self.trans_loss = 0.0

            def add_result_to_loss_and_summaries(endpoints, i, j):
                tf.summary.image(
                    'valid_mask%d%d' % (i, j),
                    tf.expand_dims(endpoints['depth_proximity_weight'], -1))
                self.depth_consistency_loss += endpoints['depth_error']
                self.reconstr_loss += endpoints['rgb_error']
                self.ssim_loss += 0.5 * endpoints['ssim_error']
                self.rot_loss += endpoints['rotation_error']
                self.trans_loss += endpoints['translation_error']

            self.motion_smoothing = 0.0
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                for i in range(SEQ_LENGTH - 1):
                    j = i + 1
                    depth_i = self.depth[i][:, :, :, 0]
                    depth_j = self.depth[j][:, :, :, 0]
                    image_j = self.image_stack[:, :, :, 3 * j:3 * (j + 1)]
                    image_i = self.image_stack[:, :, :, i * 3:(i + 1) * 3]
                    # We select a pair of consecutive images (and their respective predicted depth maps).
                    # Now we have the network predict a motion field that connects the two.
                    # We feed the pair of images into the network, once in forward order and then in reverse order.
                    # The results are fed into the loss calculation.
                    # The following losses are calculated:
                    # - RGB and SSIM photometric consistency.
                    # - Cycle consistency of rotations and translations for every pixel.
                    # - L1 smoothness of the disparity and the motion field.
                    # - Depth consistency
                    rot, trans, trans_res, mat = motion_prediction_net.motion_field_net(
                        images=tf.concat([image_i, image_j], axis=-1),
                        weight_reg=self.weight_reg)
                    inv_rot, inv_trans, inv_trans_res, inv_mat = (
                        motion_prediction_net.motion_field_net(
                            images=tf.concat([image_j, image_i], axis=-1),
                            weight_reg=self.weight_reg))

                    if self.learn_intrinsics:
                        intrinsic_mat = 0.5 * (mat + inv_mat)
                    else:
                        intrinsic_mat = self.intrinsic_mat[:, 0, :, :]

                    def dilate(x):
                        # Dilation by n pixels is roughtly max pooling by 2 * n + 1.
                        p = self.foreground_dilation * 2 + 1
                        return tf.nn.max_pool(x, [1, p, p, 1], [1] * 4, 'SAME')

                    trans += trans_res * dilate(self.seg_stack[:, :, :,
                                                               j:j + 1])
                    inv_trans += inv_trans_res * dilate(
                        self.seg_stack[:, :, :, i:i + 1])

                    tf.summary.image('trans%d%d' % (i, i + 1), trans)
                    tf.summary.image('trans%d%d' % (i + 1, i), inv_trans)

                    tf.summary.image('trans_res%d%d' % (i + 1, i),
                                     inv_trans_res)
                    tf.summary.image('trans_res%d%d' % (i, i + 1), trans_res)

                    self.motion_smoothing += _smoothness(trans)
                    self.motion_smoothing += _smoothness(inv_trans)
                    tf.summary.scalar(
                        'trans_stdev',
                        tf.sqrt(0.5 * tf.reduce_mean(
                            tf.square(trans) + tf.square(inv_trans))))

                    transformed_depth_j = transform_depth_map.using_motion_vector(
                        depth_j, trans, rot, intrinsic_mat)

                    add_result_to_loss_and_summaries(
                        consistency_losses.rgbd_and_motion_consistency_loss(
                            transformed_depth_j, image_j, depth_i, image_i,
                            rot, trans, inv_rot, inv_trans), i, j)

                    transformed_depth_i = transform_depth_map.using_motion_vector(
                        depth_i, inv_trans, inv_rot, intrinsic_mat)

                    add_result_to_loss_and_summaries(
                        consistency_losses.rgbd_and_motion_consistency_loss(
                            transformed_depth_i, image_i, depth_j, image_j,
                            inv_rot, inv_trans, rot, trans), j, i)

            # Build the total loss as composed of L1 reconstruction, SSIM, smoothing
            # and object size constraint loss as appropriate.
            self.reconstr_loss *= self.reconstr_weight
            self.export('self.reconstr_loss', self.reconstr_loss)
            self.total_loss = self.reconstr_loss
            if self.smooth_weight > 0:
                self.smooth_loss *= self.smooth_weight
                self.total_loss += self.smooth_loss
                self.export('self.smooth_loss', self.smooth_loss)
            if self.ssim_weight > 0:
                self.ssim_loss *= self.ssim_weight
                self.total_loss += self.ssim_loss
                self.export('self.ssim_loss', self.ssim_loss)

            if self.motion_smoothing_weight > 0:
                self.motion_smoothing *= self.motion_smoothing_weight
                self.total_loss += self.motion_smoothing
                self.export('self.motion_sm_loss', self.motion_smoothing)

            if self.depth_consistency_loss_weight:
                self.depth_consistency_loss *= self.depth_consistency_loss_weight
                self.total_loss += self.depth_consistency_loss
                self.export('self.depth_consistency_loss',
                            self.depth_consistency_loss)

            self.rot_loss *= self.rotation_consistency_weight
            self.trans_loss *= self.translation_consistency_weight
            self.export('rot_loss', self.rot_loss)
            self.export('trans_loss', self.trans_loss)

            self.total_loss += self.rot_loss
            self.total_loss += self.trans_loss

            self.export('self.total_loss', self.total_loss)
示例#30
0
 def _split_heads(x, length, num_heads, depth):
     """Split the last dimension into (num_heads, depth)
    Input shape:  (bs, length, num_heads * depth)
    Output shape: (bs, num_heads, length, depth)"""
     x = tf.reshape(x, (-1, length, num_heads, depth))
     return tf.transpose(x, perm=[0, 2, 1, 3])