Пример #1
0
    def _compute_sdf(self, x, translations, blend_terms, points):
        """Compute signed distances between query points and hyperplanes."""
        n_parts = tf.shape(x)[1]
        n_planes = tf.shape(x)[2]
        norm_logit = x[..., 0:self._dims - 1]
        offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) *
                    self._offset_scale + self._offset_lbound))
        blend_planes = (
            tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale +
            self._blend_lbound)

        # Norm of the boundary line
        norm_rad = tf.tanh(norm_logit) * np.pi  # [..., (azimuth, altitude)]
        if self._dims == 3:
            norm = tf.stack([
                tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]),
                tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]),
                tf.cos(norm_rad[..., 1])
            ],
                            axis=-1)
        else:
            norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1)

        # Calculate signed distances to hyperplanes.
        points = (tf.expand_dims(points, axis=1) -
                  tf.expand_dims(translations, axis=2))
        points = tf.expand_dims(points, axis=2)
        points = tf.tile(points, [1, 1, n_planes, 1, 1])
        signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1))
        signed_dis = signed_dis + tf.expand_dims(offset, axis=-2)

        return signed_dis, translations, blend_planes, offset
Пример #2
0
def euler2mat(z, y, x):
    """Converts euler angles to rotation matrix
   TODO: remove the dimension for 'N' (deprecated for converting all source
         poses altogether)
   Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174
  Args:
      z: rotation angle along z axis (in radians) -- size = [B, N]
      y: rotation angle along y axis (in radians) -- size = [B, N]
      x: rotation angle along x axis (in radians) -- size = [B, N]
  Returns:
      Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
  """
    B = tf.shape(z)[0]
    N = 1
    z = tf.clip_by_value(z, -np.pi, np.pi)
    y = tf.clip_by_value(y, -np.pi, np.pi)
    x = tf.clip_by_value(x, -np.pi, np.pi)

    # Expand to B x N x 1 x 1
    z = tf.expand_dims(tf.expand_dims(z, -1), -1)
    y = tf.expand_dims(tf.expand_dims(y, -1), -1)
    x = tf.expand_dims(tf.expand_dims(x, -1), -1)

    zeros = tf.zeros([B, N, 1, 1])
    ones = tf.ones([B, N, 1, 1])

    cosz = tf.cos(z)
    sinz = tf.sin(z)
    rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
    rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
    rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
    zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)

    cosy = tf.cos(y)
    siny = tf.sin(y)
    roty_1 = tf.concat([cosy, zeros, siny], axis=3)
    roty_2 = tf.concat([zeros, ones, zeros], axis=3)
    roty_3 = tf.concat([-siny, zeros, cosy], axis=3)
    ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)

    cosx = tf.cos(x)
    sinx = tf.sin(x)
    rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
    rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
    rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
    xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)

    rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
    return rotMat
Пример #3
0
def positional_embedding(pos_seq, inv_freq, bsz=None):
    sinusoid_inp = tf.einsum('i,j->ij', pos_seq, inv_freq)
    pos_emb = tf.concat([tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)], -1)
    if bsz is not None:
        return tf.tile(pos_emb[:, None, :], [1, bsz, 1])
    else:
        return pos_emb[:, None, :]
Пример #4
0
 def f_tf(self, t, x, y, z):
     x_square = tf.reduce_sum(tf.square(x), 1, keep_dims=True)
     base = self._total_time - t + x_square / self._dim
     base_alpha = tf.pow(base, self._alpha)
     derivative = self._alpha * tf.pow(base,
                                       self._alpha - 1) * tf.cos(base_alpha)
     term1 = tf.reduce_sum(tf.square(z), 1, keep_dims=True)
     term2 = -4.0 * (derivative**2) * x_square / (self._dim**2)
     term3 = derivative
     term4 = -0.5 * (2.0 * derivative + 4.0 /
                     (self._dim**2) * x_square * self._alpha *
                     ((self._alpha - 1) * tf.pow(base, self._alpha - 2) *
                      tf.cos(base_alpha) -
                      (self._alpha * tf.pow(base, 2 * self._alpha - 2) *
                       tf.sin(base_alpha))))
     return term1 + term2 + term3 + term4
Пример #5
0
def loss(y_true_cls, y_pred_cls,
         y_true_geo, y_pred_geo,
         training_mask):
    '''
    define the loss used for training, contraning two part,
    the first part we use dice loss instead of weighted logloss,
    the second part is the iou loss defined in the paper
    :param y_true_cls: ground truth of text
    :param y_pred_cls: prediction os text
    :param y_true_geo: ground truth of geometry
    :param y_pred_geo: prediction of geometry
    :param training_mask: mask used in training, to ignore some text annotated by ###
    :return:
    '''
    classification_loss = dice_coefficient(y_true_cls, y_pred_cls, training_mask)
    # scale classification loss to match the iou loss part
    classification_loss *= 0.01

    # d1 -> top, d2->right, d3->bottom, d4->left
    d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = tf.split(value=y_true_geo, num_or_size_splits=5, axis=3)
    d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = tf.split(value=y_pred_geo, num_or_size_splits=5, axis=3)
    area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt)
    area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred)
    w_union = tf.minimum(d2_gt, d2_pred) + tf.minimum(d4_gt, d4_pred)
    h_union = tf.minimum(d1_gt, d1_pred) + tf.minimum(d3_gt, d3_pred)
    area_intersect = w_union * h_union
    area_union = area_gt + area_pred - area_intersect
    L_AABB = -tf.log((area_intersect + 1.0)/(area_union + 1.0))
    L_theta = 1 - tf.cos(theta_pred - theta_gt)
    tf.summary.scalar('geometry_AABB', tf.reduce_mean(L_AABB * y_true_cls * training_mask))
    tf.summary.scalar('geometry_theta', tf.reduce_mean(L_theta * y_true_cls * training_mask))
    L_g = L_AABB + 20 * L_theta

    return tf.reduce_mean(L_g * y_true_cls * training_mask) + classification_loss