示例#1
0
def DistanceBetweenCentroidsAndBBoxesFastAndFurious(centroids, bboxes, masks):
    """Computes the distance between centroids and bboxes.

  The distance/loss is loosely following the 'Fast and Furious' paper by Luo et
  al., CVPR'18.  This is just one way of calculating the distances. We will
  probably develop other ways.

  Args:
    centroids: [..., 4]. x/y/w/h for bboxes.
    bboxes: [..., 4]. ymin/xmin/ymax/xmax for bboxes.
    masks: [...]. masks[i] == 1 means i-th entry (centroids[i] and bboxes[i])
      should be considered in the distance/loss calculation.

  Returns:
    A [...] tensor. i-th value is the distance measure of centroids[i] and
    bboxes[i].
  """
    x, y, w, h = tf.unstack(centroids, axis=-1, num=4)
    # "gt" suffix means 'ground truth'.
    x_gt, y_gt, w_gt, h_gt = tf.unstack(BBoxesToXYWH(bboxes), axis=-1, num=4)

    def Pos(x):
        return tf.maximum(tf.constant(1e-8, x.dtype), x)

    # The following terms are zeros when masks[i] is 0.
    l_x = py_utils.CheckNumerics(masks * (x - x_gt) / Pos(w_gt))
    l_y = py_utils.CheckNumerics(masks * (y - y_gt) / Pos(h_gt))
    s_w = py_utils.CheckNumerics(masks * tf.math.log(Pos(w) / Pos(w_gt)))
    s_h = py_utils.CheckNumerics(masks * tf.math.log(Pos(h) / Pos(h_gt)))
    return (_SmoothL1Norm(l_x) + _SmoothL1Norm(l_y) + _SmoothL1Norm(s_w) +
            _SmoothL1Norm(s_h))
示例#2
0
    def FProp(self, theta, inputs, paddings=None):
        """Apply group normalization.

    Args:
      theta: A NestedMap object containing weights' values of this layer and its
        children layers.
      inputs: The inputs tensor with shape [batch_size, height, width, channel].
      paddings: The paddings tensor with shape [batch_size, height]. Intended to
        be used for sequence processing where `height` is `time`.

    Returns:
      A single tensor as the output after applying group normalization, with
      the same shape as 'inputs'. Or a output, output_paddings pair if input
      paddings is not None.
    """
        p = self.params
        n, h, w, c = tf.unstack(tf.shape(inputs), axis=0, num=4)
        group_size = p.dim // p.num_groups
        num_groups = p.num_groups
        min_group_size = p.min_group_size if p.dim > p.min_group_size else p.dim
        if group_size <= min_group_size:
            group_size = min_group_size
            num_groups = p.dim // group_size

        with tf.name_scope(p.name):
            x = tf.reshape(inputs, [n, h, w, num_groups, group_size])
            if paddings is None:
                counts, means_ss, variance_ss, _, = tf.nn.sufficient_statistics(
                    x, axes=[1, 2, 4], keepdims=True)
                norm_mean, norm_variance = tf.nn.normalize_moments(
                    counts, means_ss, variance_ss, None)
            else:
                expanded_paddings = tf.reshape(paddings, [n, h, 1, 1, 1])
                norm_mean, norm_variance = ComputeMomentsWithPadding(
                    x, expanded_paddings, [1, 2, 4], keepdims=True)

            norm_mean = py_utils.CheckNumerics(
                norm_mean, 'mean of %s failed numeric check' % p.name)
            norm_variance = py_utils.CheckNumerics(
                norm_variance, 'variance of %s failed numeric check' % p.name)

            beta = theta.beta
            gamma = theta.gamma

            with tf.control_dependencies([
                    py_utils.assert_greater_equal(
                        norm_variance, tf.cast(0., norm_variance.dtype)),
                    py_utils.assert_shape_match([n, 1, 1, num_groups, 1],
                                                tf.shape(norm_mean)),
                    py_utils.assert_shape_match([n, 1, 1, num_groups, 1],
                                                tf.shape(norm_variance)),
            ]):
                x = (x - norm_mean) / tf.sqrt(norm_variance + self._epsilon)
                x = tf.reshape(x, [n, h, w, c])
                gn_output = x * gamma + beta
                gn_output = tf.reshape(gn_output, [n, h, w, c])
                if paddings is None:
                    return gn_output
                else:
                    return gn_output, paddings
示例#3
0
 def _testTransparentInputs(self,
                            num_layers=6,
                            dtype=tf.float32,
                            is_eval_mode=False):
     src_time = 5
     src_batch = 4
     emb_dims = 4
     encoder_outputs, tgts, num_hyps = self._Inputs(dtype)
     src_enc = tf.constant(
         np.random.normal(size=[src_time, src_batch, emb_dims, num_layers]),
         dtype=dtype)
     if not is_eval_mode:
         src_enc = tf.unstack(src_enc, axis=3)
     encoder_outputs.encoded = src_enc
     return (encoder_outputs, tgts, num_hyps)
    def _check_paddings(self, paddings):
        with tf.name_scope('check_paddings'):
            unpacked_paddings = tf.unstack(paddings)

            non_decr = []
            for t in unpacked_paddings:
                non_d = tf.is_non_decreasing(t)
                non_decr.append(non_d)
            all_non_decr = tf.stack(non_decr)

            paddings = py_utils.with_dependencies([
                tf.assert_equal(tf.reduce_any(tf.equal(paddings, 0.0)),
                                True,
                                message='must have at least one zero value.'),
                tf.assert_equal(
                    all_non_decr, True, message='must be non-decreasing')
            ], paddings)
            return paddings
示例#5
0
def TopKAccuracy(k, logits, labels, weights):
  """Compute top-k accuracy.

  Args:
    k: An int scalar. Top-k.
    logits: A [N, C] float tensor.
    labels: A [N] int vector.
    weights: A [N] float vector.

  Returns:
    A float scalar. The accuracy at precision k.
  """
  logits = py_utils.HasRank(logits, 2)
  n, _ = tf.unstack(tf.shape(logits), 2)
  labels = py_utils.HasShape(labels, [n])
  weights = py_utils.HasShape(weights, [n])
  correct = tf.nn.in_top_k(targets=labels, predictions=logits, k=k)
  return tf.reduce_sum(tf.cast(correct, weights.dtype) * weights) / tf.maximum(
      1e-8, tf.reduce_sum(weights))
示例#6
0
def ComputeXentOutput(softmax_layer,
                      softmax_theta,
                      activations,
                      labels,
                      num_samples=1):
  """Compute Softmax CrossEntropy output."""
  seqlen, batch, _ = tf.unstack(tf.shape(activations), num=3)
  if labels is None:
    # We can only compute the logits here.
    logits = softmax_layer.Logits(
        theta=softmax_theta,
        inputs=tf.reshape(activations, [seqlen * batch * num_samples, -1]))
    xent_output = py_utils.NestedMap(
        logits=tf.reshape(logits, [seqlen, batch, -1]))
  elif 'class_ids' in labels:
    # labels.class_ids: [len, batch]
    if num_samples > 1:
      class_ids = tf.tile(labels.class_ids, [1, num_samples])
      class_weights = tf.tile(labels.class_weights, [1, num_samples])
    else:
      class_ids = labels.class_ids
      class_weights = labels.class_weights
    xent_output = softmax_layer.FProp(
        theta=softmax_theta,
        inputs=activations,
        class_weights=class_weights,
        class_ids=class_ids)
  else:
    assert 'class_probabilities' in labels
    if num_samples > 1:
      class_probabilities = tf.tile(labels.class_probabilities,
                                    [1, num_samples])
      class_weights = tf.tile(labels.class_weights, [1, num_samples])
    else:
      class_probabilities = labels.class_probabilities
      class_weights = labels.class_weights
    xent_output = softmax_layer.FProp(
        theta=softmax_theta,
        inputs=activations,
        class_weights=class_weights,
        class_probabilities=class_probabilities)
  return xent_output
示例#7
0
    def ResidualsToBBoxes(self,
                          anchor_bboxes,
                          residuals,
                          min_angle_rad=-np.pi,
                          max_angle_rad=np.pi):
        r"""Converts anchor_boxes and residuals to predicted bboxes.

    This converts predicted residuals into bboxes using the following formulae::

      x_predicted = x_a + x_residual * diagonal_xy
      y_predicted = y_a + y_residual * diagonal_xy
      z_predicted = z_a + z_residual * dz_a

      dx_predicted = dx_a * exp(dx_residual)
      dy_predicted = dy_a * exp(dy_residual)
      dz_predicted = dz_a * exp(dz_residual)

      # Adding the residual, and bounding it between
      # [min_angle_rad, max_angle_rad]
      phi_predicted = NormalizeAngleRad(phi_a + phi_residual,
                                        min_angle_rad, max_angle_rad)

    These equations follow from those in LocalizationResiduals, where we solve
    for the \*_gt variables.

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      residuals: tf.float32 of the same shape as anchor_bboxes containing
        predicted residuals at each anchor location.
      min_angle_rad: Scalar with the minimum angle allowed (before wrapping)
        in radians.
      max_angle_rad: Scalar with the maximum angle allowed (before wrapping)
        in radians. This value usually should be pi.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with predicted
      bboxes.
    """
        anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
        anchor_bboxes = py_utils.with_dependencies(
            [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
        residuals = py_utils.HasShape(residuals, anchor_bboxes_shape)

        x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes,
                                                            num=7,
                                                            axis=-1)
        (x_residual, y_residual, z_residual, dx_residual, dy_residual,
         dz_residual, phi_residual) = tf.unstack(residuals, num=7, axis=-1)

        diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

        x_predicted = x_a + x_residual * diagonal_xy
        y_predicted = y_a + y_residual * diagonal_xy
        z_predicted = z_a + z_residual * dz_a

        dx_predicted = dx_a * tf.exp(dx_residual)
        dy_predicted = dy_a * tf.exp(dy_residual)
        dz_predicted = dz_a * tf.exp(dz_residual)

        # We bound the angle between [min_angle_rad, max_angle_rad], which should
        # be passed in depending on the heading handling in the calling model.
        # If the model uses a sine(delta_phi) transformation in the loss, then it
        # cannot distinguish direction and a [0, np.pi]
        # [min_angle_rad, max_angle_rad] should be used.
        # If there is a heading encoding that is directional, most likely you
        # should use a [-np.pi, np.pi] [min_angle_rad, max_angle_rad].
        phi_predicted = phi_a + phi_residual
        phi_predicted = geometry.WrapAngleRad(phi_predicted, min_angle_rad,
                                              max_angle_rad)

        return tf.stack([
            x_predicted,
            y_predicted,
            z_predicted,
            dx_predicted,
            dy_predicted,
            dz_predicted,
            phi_predicted,
        ],
                        axis=-1)  # pyformat: disable
示例#8
0
    def LocalizationResiduals(self, anchor_bboxes, assigned_gt_bboxes):
        """Computes the anchor residuals for every bbox.

    For a given bbox, compute residuals in the following way:

      Let ``anchor_bbox = (x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a)``
      and ``assigned_gt_bbox = (x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt)``

      Define ``diagonal_xy = sqrt(dx_a^2 + dy_a^2)``

      Then the corresponding residuals are given by::

        x_residual = (x_gt - x_a) / (diagonal_xy)
        y_residual = (y_gt - y_a) / (diagonal_xy)
        z_residual = (z_gt - z_a) / (dz_a)

        dx_residual = log(dx_gt / dx_a)
        dy_residual = log(dy_gt / dy_a)
        dz_residual = log(dz_gt / dz_a)

        phi_residual = phi_gt - phi_a

      The normalization for x and y residuals by the diagonal was first
      proposed by [1]. Intuitively, this reflects that objects can usually
      move freely in the x-y plane, including diagonally. On the other hand,
      moving in the z-axis (up and down) can be considered orthogonal to x-y.

      For phi_residual, one way to frame the loss is with
      SmoothL1(sine(phi_residual - phi_predicted)).
      The use of sine to wrap the phi residual was proposed by [2]. This
      stems from the observation that bboxes at phi and phi + pi are the same
      bbox, fully overlapping in 3D space, except that the direction is
      different. Note that the use of sine makes this residual invariant to
      direction when a symmetric loss like SmoothL1 is used. In
      ResidualsToBBoxes, we ensure that the phi predicted is between [0, pi).

    The Huber (SmoothL1) loss can then be applied to the delta between these
    target residuals and the model predicted residuals.

    [1] VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection
        https://arxiv.org/abs/1711.06396

    [2] SECOND: Sparsely Embedded Convolutional Detection
        https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      assigned_gt_bboxes: tf.float32 of the same shape as anchor_bboxes
        containing the corresponding assigned ground-truth bboxes.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with target
      residuals for every corresponding bbox.
    """
        anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
        anchor_bboxes = py_utils.with_dependencies(
            [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
        assigned_gt_bboxes = py_utils.HasShape(assigned_gt_bboxes,
                                               anchor_bboxes_shape)

        x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(anchor_bboxes,
                                                            num=7,
                                                            axis=-1)
        x_gt, y_gt, z_gt, dx_gt, dy_gt, dz_gt, phi_gt = tf.unstack(
            assigned_gt_bboxes, num=7, axis=-1)

        diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

        # The anchor dimensions is usually a hard-coded param given to the input
        # generator and should not be 0. We use CheckNumerics to ensure that is the
        # case.
        x_residual = py_utils.CheckNumerics((x_gt - x_a) / diagonal_xy)
        y_residual = py_utils.CheckNumerics((y_gt - y_a) / diagonal_xy)
        z_residual = py_utils.CheckNumerics((z_gt - z_a) / dz_a)

        dx_residual = py_utils.CheckNumerics(tf.log(dx_gt / dx_a))
        dy_residual = py_utils.CheckNumerics(tf.log(dy_gt / dy_a))
        dz_residual = py_utils.CheckNumerics(tf.log(dz_gt / dz_a))

        phi_residual = phi_gt - phi_a

        return tf.stack([
            x_residual,
            y_residual,
            z_residual,
            dx_residual,
            dy_residual,
            dz_residual,
            phi_residual,
        ],
                        axis=-1)  # pyformat: disable
示例#9
0
    def _Extract(self, features):
        p = self.params

        source_id = py_utils.HasShape(features['image/source_id'], [])
        xmin = _Dense(features['object/image/bbox/xmin'])
        xmax = _Dense(features['object/image/bbox/xmax'])
        ymin = _Dense(features['object/image/bbox/ymin'])
        ymax = _Dense(features['object/image/bbox/ymax'])

        # 2d bounding box in image coordinates.
        bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
        bboxes_count = tf.shape(bboxes)[0]
        bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4])

        bboxes_padding = 1.0 - py_utils.PadOrTrimTo(tf.ones([bboxes_count]),
                                                    [p.max_num_objects])

        dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']),
                             [-1, 3])
        loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3])
        phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1])
        # bboxes_3d is in [x, y, z, dx, dy, dz, phi].
        bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1)

        cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1)
        bboxes_td = tf.stack([
            cy - dy / 2,
            cx - dx / 2,
            cy + dy / 2,
            cx + dx / 2,
        ],
                             axis=-1)  # pyformat: disable
        bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4])

        has_3d_info = tf.cast(_Dense(features['object/has_3d_info']),
                              tf.float32)
        bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects])
        bboxes_td_mask = bboxes_3d_mask

        # Fill in difficulties from bounding box height, truncation and occlusion.
        bb_height = ymax - ymin
        box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects])
        box_image_height *= bboxes_3d_mask

        # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly,
        occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1])
        occlusion = tf.cast(occlusion, tf.float32)
        occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects])
        occlusion *= bboxes_3d_mask

        # Truncation: 0 -> not truncated, 1.0 -> truncated
        truncation = tf.reshape(_Dense(features['object/truncation']), [-1])
        truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects])
        truncation *= bboxes_3d_mask

        difficulties = ComputeKITTIDifficulties(box_image_height, occlusion,
                                                truncation)
        difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects])

        # Make a batch axis to call BBoxCorners, and take the first result back.
        bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0]

        # Project the 3D bbox to the image plane.
        velo_to_image_plane = features['transform/velo_to_image_plane']
        bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane(
            tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane)

        # Output is [num_objects, 8 corners per object, (x, y)].
        bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane,
                                                  [-1, 8, 2])
        bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo(
            bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2])

        texts = features['object/label'].values
        labels = ops.static_map_string_int(x=texts,
                                           keys=self.KITTI_CLASS_NAMES)

        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects])

        # Filter labels by setting bboxes_padding, bboxes_3d_mask, and
        # bboxes_td_mask appropriately.
        if p.filter_labels is not None:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bbox_mask = tf.cast(bbox_mask, tf.float32)
            bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding)
            filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask
            bboxes_td_mask *= bbox_mask
        else:
            filtered_bboxes_3d_mask = bboxes_3d_mask

        # Placeholder for counting the number of laser points that reside within
        # each 3-d bounding box. This must be filled in outside of this function
        # based on the loaded 3-d laser points.
        bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32)
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])

        # Pad bboxes_3d.
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])

        return py_utils.NestedMap(
            source_id=source_id,
            bboxes_count=bboxes_count,
            bboxes=bboxes,
            bboxes_padding=bboxes_padding,
            bboxes_3d=bboxes_3d,
            bboxes_3d_mask=filtered_bboxes_3d_mask,
            unfiltered_bboxes_3d_mask=bboxes_3d_mask,
            bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane,
            bboxes_td=bboxes_td,
            bboxes_td_mask=bboxes_td_mask,
            bboxes_3d_num_points=bboxes_3d_num_points,
            labels=labels,
            texts=texts,
            box_image_height=box_image_height,
            occlusion=occlusion,
            truncation=truncation,
            difficulties=difficulties)
示例#10
0
  def FProp(self, theta, inputs, paddings, state0=None, labels=None):
    """Computes xent loss given the language model input activations.

    Args:
      theta: A `.NestedMap` object containing weights' values of this
        layer and its children layers.
      inputs: Input activation. A tensor of shape [time, batch, model_dim].
      paddings: A 0/1 tensor of shape [time, batch].
      state0: Not used for Transformer.
      labels: If not None, a `.NestedMap` containing the following fields:

        - class_weights, a tensor with shape [time, batch] containing the
          weights for each target word.
        - class_ids, a tensor with shape [time, batch] of int32 dtype containing
          the target class labels.
        - class_probabilities, a tensor with shape [time, batch, vocab_size] of
          float values indicating class-membership probabilities.

    Returns:
      If `labels` is not None, returns (xent_output, None), where
      `xent_output` is a `.NestedMap` as defined by `SoftmaxLayer`'s return
      value. Otherwise, `xent_output` only contains the softmax logits.
    """
    p = self.params
    inputs = py_utils.HasRank(inputs, 3)
    seqlen, batch, _ = tf.unstack(tf.shape(inputs), num=3)
    inputs = py_utils.HasShape(inputs, [seqlen, batch, p.model_dim])
    paddings = py_utils.HasShape(paddings, [seqlen, batch])

    # [time, 1, model_dim]
    posit_embs = tf.expand_dims(
        self.position_emb.FProp(theta.position_emb, seqlen), 1)
    # [time, batch, model_dim]
    input_embs = inputs + posit_embs
    input_embs = self.input_dropout.FProp(theta.input_dropout, input_embs)

    layer_in = input_embs
    for layer, layer_theta in zip(self.trans, theta.trans):
      # [time, batch, model_dim]
      layer_out, _ = layer.FProp(layer_theta, layer_in, paddings)
      layer_in = layer_out

    if labels is None:
      # We can only compute the logits here.
      logits = self.softmax.Logits(
          theta=theta.softmax,
          inputs=tf.reshape(layer_out, [seqlen * batch, -1]))
      xent_output = py_utils.NestedMap(
          logits=tf.reshape(logits, [seqlen, batch, -1]))
    elif 'class_ids' in labels:
      xent_output = self.softmax.FProp(
          theta=theta.softmax,
          inputs=layer_out,
          class_weights=labels.class_weights,
          class_ids=labels.class_ids)
    else:
      assert 'class_probabilities' in labels
      xent_output = self.softmax.FProp(
          theta=theta.softmax,
          inputs=layer_out,
          class_weights=labels.class_weights,
          class_probabilities=labels.class_probabilities)
    xent_output.last_hidden = layer_out
    return xent_output, None
示例#11
0
  def FProp(self, theta, inputs, paddings, state0, labels=None):
    """Forward compute."""
    p = self.params

    ids = py_utils.HasRank(inputs, 2)
    paddings = py_utils.HasShape(paddings, tf.shape(ids))
    seqlen, batch = tf.unstack(tf.shape(inputs), num=2)
    assert state0

    paddings_3d = tf.expand_dims(paddings, axis=2)

    # RNNs
    if p.shared_emb:
      emb_act = [self.emb.EmbLookup(theta.emb, inputs)
                ] * (1 + p.number_of_experts)
    else:
      emb_act = [
          self.emb[i].EmbLookup(theta.emb[i], inputs)
          for i in range(1 + p.number_of_experts)
      ]
    state1 = py_utils.NestedMap(rnns=[])
    rnns_act = []
    for i, act in enumerate(emb_act):
      act, state = self.rnns[i].FProp(theta.rnns[i], act, paddings_3d,
                                      state0.rnns[i])
      act = py_utils.HasRank(act, 3)
      rnns_act += [act]
      state1.rnns += [state]

    # [time, batch, experts, dims].
    expert_stacked = tf.stack(rnns_act[1:], axis=2)

    # Compute gating softmax. The 0-th rnns is used as the expert
    # predictor.  Because SoftmaxLayer.Logits takes a matrix as input,
    # we reshape rnns_act[0], the domain predictor activation, to a
    # matrix here.
    act = tf.reshape(rnns_act[0], [seqlen * batch, -1])
    logits = self.domain_predictor_softmax.Logits(
        theta.domain_predictor_softmax, act)
    # [time, batch, experts]
    gating = tf.reshape(tf.nn.softmax(logits), [seqlen, batch, -1])

    # Mix the experts.
    # [time, batch, dims]
    combined = tf.squeeze(
        tf.matmul(
            # [time, batch, 1, experts]
            tf.expand_dims(gating, axis=2),
            # [time, batch, experts, dims]
            expert_stacked),
        axis=2)

    if p.add_postgating_rnn:
      # Note that this layer includes 1 or more RNN layers followed
      # by a softmax.
      xent_loss, state1.merge = self.merge.FProp(theta.merge, combined,
                                                 paddings, state0.merge, labels)
    else:
      xent_loss = self.output_softmax.FProp(
          theta=theta.output_softmax,
          inputs=combined,
          class_weights=labels.class_weights,
          class_ids=labels.class_ids)

    # return xent_loss, state1
    return xent_loss, state1
示例#12
0
  def FProp(self,
            theta,
            inputs,
            paddings,
            state0,
            labels=None,
            direct_features=None):
    """Computes xent loss given the language model input activations.

    Args:
      theta: A `.NestedMap` object containing weights' values of this
        layer and its children layers.
      inputs: input activation. A tensor of shape [time, batch, dims].
      paddings: a 0/1 tensor of shape [time, batch].
      state0: A `.NestedMap` containing the initial recurrent state.
      labels: If not None, a `.NestedMap` containing the following fields.

        - class_weights, a tensor with shape [time, batch] containing the
          weights for each target word.
        - class_ids, a tensor with shape [time, batch] of int32 dtype containing
          the target class labels.
        - class_probabilities, a tensor with shape [time, batch, vocab_size] of
          float values indicating class-membership probabilities.
      direct_features:
        If not None, a tensor of [time, batch, direct_feature_dims] that is
        concatenated to the output of the last RNN layer.

    Returns:
      If `labels` is not None, returns (xent_output, state1), where
      `xent_output` is a `.NestedMap` as defined by `SoftmaxLayer`'s return
      value and `state1` is the next recurrent state. Otherwise,
      `xent_output` contains the softmax logits, probabilities (.probs) and
      log-probabilities (.log_probs).
    """
    inputs = py_utils.HasRank(inputs, 3)
    seqlen, batch, _ = tf.unstack(tf.shape(inputs), num=3)
    paddings = py_utils.HasShape(paddings, [seqlen, batch])
    assert state0 is not None
    activation, state1 = self.rnns.FProp(theta.rnns, inputs,
                                         tf.expand_dims(paddings, 2), state0)

    if direct_features is not None:
      direct_features = py_utils.HasRank(direct_features, 3)
      activation = tf.concat([activation, direct_features], axis=2)

    if labels is None:
      # We can only compute the logits here.
      logits = self.softmax.Logits(
          theta=theta.softmax,
          inputs=tf.reshape(activation, [seqlen * batch, -1]))
      xent_output = py_utils.NestedMap(
          logits=tf.reshape(logits, [seqlen, batch, -1]))
      xent_output.probs = tf.nn.softmax(xent_output.logits)
      xent_output.log_probs = tf.nn.log_softmax(xent_output.logits)
    elif 'class_ids' in labels:
      xent_output = self.softmax.FProp(
          theta=theta.softmax,
          inputs=activation,
          class_weights=labels.class_weights,
          class_ids=labels.class_ids)
    else:
      assert 'class_probabilities' in labels
      xent_output = self.softmax.FProp(
          theta=theta.softmax,
          inputs=activation,
          class_weights=labels.class_weights,
          class_probabilities=labels.class_probabilities)
    xent_output.last_hidden = activation
    return xent_output, state1
示例#13
0
 def Logits(self, theta, inputs, paddings, *args, **kwargs):
   """FProp and returns the logits for the whole sequence."""
   p = self.params
   del theta, paddings
   time, batch = tf.unstack(tf.shape(inputs)[:2])
   return tf.zeros([time, batch, p.vocab_size], dtype=p.dtype)
示例#14
0
 def _Preprocess(self, raw):
     data = tf.stack([
         tf.image.per_image_standardization(img) for img in tf.unstack(raw)
     ])
     data.set_shape(raw.shape)
     return data
示例#15
0
  def ResidualsToBBoxes(self, anchor_bboxes, residuals):
    r"""Converts anchor_boxes and residuals to predicted bboxes.

    This converts predicted residuals into bboxes using the following formulae:

      x_predicted = x_a + x_residual \* diagonal_xy
      y_predicted = y_a + y_residual \* diagonal_xy
      z_predicted = z_a + z_residual \* dz_a

      dx_predicted = dx_a \* exp(dx_residual)
      dy_predicted = dy_a \* exp(dy_residual)
      dz_predicted = dz_a \* exp(dz_residual)

      phi_predicted = phi_a + phi_residual

    These equations follow from those in LocalizationResiduals, where we solve
    for the \*_gt variables.

    Args:
      anchor_bboxes: tf.float32. where [..., :7] contains (x, y, z, dx, dy, dz,
        phi), corresponding to each anchor bbox parameters.
      residuals: tf.float32 of the same shape as anchor_bboxes containing
        predicted residuals at each anchor location.

    Returns:
      A tf.float32 tensor of the same shape as anchor_bboxes with predicted
      bboxes.
    """
    anchor_bboxes_shape = py_utils.GetShape(anchor_bboxes)
    anchor_bboxes = py_utils.with_dependencies(
        [py_utils.assert_equal(anchor_bboxes_shape[-1], 7)], anchor_bboxes)
    residuals = py_utils.HasShape(residuals, anchor_bboxes_shape)

    x_a, y_a, z_a, dx_a, dy_a, dz_a, phi_a = tf.unstack(
        anchor_bboxes, num=7, axis=-1)
    (x_residual, y_residual, z_residual, dx_residual, dy_residual, dz_residual,
     phi_residual) = tf.unstack(
         residuals, num=7, axis=-1)

    diagonal_xy = tf.sqrt(tf.square(dx_a) + tf.square(dy_a))

    x_predicted = x_a + x_residual * diagonal_xy
    y_predicted = y_a + y_residual * diagonal_xy
    z_predicted = z_a + z_residual * dz_a

    dx_predicted = dx_a * tf.exp(dx_residual)
    dy_predicted = dy_a * tf.exp(dy_residual)
    dz_predicted = dz_a * tf.exp(dz_residual)

    # Assuming a sine(delta_phi) transformation is used in the loss, then, it
    # is not possible to distinguish direction, hence, we use floormod here to
    # ensure that the predicted_phi is always in [0, np.pi) for consistency.
    # A separate direction classifier should be added the model if needed.
    phi_predicted = phi_a + phi_residual
    phi_predicted = tf.floormod(phi_predicted, np.pi)

    return tf.stack([
        x_predicted, y_predicted, z_predicted,
        dx_predicted, dy_predicted, dz_predicted,
        phi_predicted,
    ], axis=-1)  # pyformat: disable