def _compute_sdf(self, x, translations, blend_terms, points): """Compute signed distances between query points and hyperplanes.""" n_parts = tf.shape(x)[1] n_planes = tf.shape(x)[2] norm_logit = x[..., 0:self._dims - 1] offset = (-(tf.nn.sigmoid(x[..., self._dims - 1:self._dims]) * self._offset_scale + self._offset_lbound)) blend_planes = ( tf.nn.sigmoid(blend_terms[..., :n_parts]) * self._blend_scale + self._blend_lbound) # Norm of the boundary line norm_rad = tf.tanh(norm_logit) * np.pi # [..., (azimuth, altitude)] if self._dims == 3: norm = tf.stack([ tf.sin(norm_rad[..., 1]) * tf.cos(norm_rad[..., 0]), tf.sin(norm_rad[..., 1]) * tf.sin(norm_rad[..., 0]), tf.cos(norm_rad[..., 1]) ], axis=-1) else: norm = tf.concat([tf.cos(norm_rad), tf.sin(norm_rad)], axis=-1) # Calculate signed distances to hyperplanes. points = (tf.expand_dims(points, axis=1) - tf.expand_dims(translations, axis=2)) points = tf.expand_dims(points, axis=2) points = tf.tile(points, [1, 1, n_planes, 1, 1]) signed_dis = tf.matmul(points, tf.expand_dims(norm, axis=-1)) signed_dis = signed_dis + tf.expand_dims(offset, axis=-2) return signed_dis, translations, blend_planes, offset
def euler2mat(z, y, x): """Converts euler angles to rotation matrix TODO: remove the dimension for 'N' (deprecated for converting all source poses altogether) Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174 Args: z: rotation angle along z axis (in radians) -- size = [B, N] y: rotation angle along y axis (in radians) -- size = [B, N] x: rotation angle along x axis (in radians) -- size = [B, N] Returns: Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3] """ B = tf.shape(z)[0] N = 1 z = tf.clip_by_value(z, -np.pi, np.pi) y = tf.clip_by_value(y, -np.pi, np.pi) x = tf.clip_by_value(x, -np.pi, np.pi) # Expand to B x N x 1 x 1 z = tf.expand_dims(tf.expand_dims(z, -1), -1) y = tf.expand_dims(tf.expand_dims(y, -1), -1) x = tf.expand_dims(tf.expand_dims(x, -1), -1) zeros = tf.zeros([B, N, 1, 1]) ones = tf.ones([B, N, 1, 1]) cosz = tf.cos(z) sinz = tf.sin(z) rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3) rotz_2 = tf.concat([sinz, cosz, zeros], axis=3) rotz_3 = tf.concat([zeros, zeros, ones], axis=3) zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2) cosy = tf.cos(y) siny = tf.sin(y) roty_1 = tf.concat([cosy, zeros, siny], axis=3) roty_2 = tf.concat([zeros, ones, zeros], axis=3) roty_3 = tf.concat([-siny, zeros, cosy], axis=3) ymat = tf.concat([roty_1, roty_2, roty_3], axis=2) cosx = tf.cos(x) sinx = tf.sin(x) rotx_1 = tf.concat([ones, zeros, zeros], axis=3) rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3) rotx_3 = tf.concat([zeros, sinx, cosx], axis=3) xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2) rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat) return rotMat
def positional_embedding(pos_seq, inv_freq, bsz=None): sinusoid_inp = tf.einsum('i,j->ij', pos_seq, inv_freq) pos_emb = tf.concat([tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)], -1) if bsz is not None: return tf.tile(pos_emb[:, None, :], [1, bsz, 1]) else: return pos_emb[:, None, :]
def f_tf(self, t, x, y, z): x_square = tf.reduce_sum(tf.square(x), 1, keep_dims=True) base = self._total_time - t + x_square / self._dim base_alpha = tf.pow(base, self._alpha) derivative = self._alpha * tf.pow(base, self._alpha - 1) * tf.cos(base_alpha) term1 = tf.reduce_sum(tf.square(z), 1, keep_dims=True) term2 = -4.0 * (derivative**2) * x_square / (self._dim**2) term3 = derivative term4 = -0.5 * (2.0 * derivative + 4.0 / (self._dim**2) * x_square * self._alpha * ((self._alpha - 1) * tf.pow(base, self._alpha - 2) * tf.cos(base_alpha) - (self._alpha * tf.pow(base, 2 * self._alpha - 2) * tf.sin(base_alpha)))) return term1 + term2 + term3 + term4
def loss(y_true_cls, y_pred_cls, y_true_geo, y_pred_geo, training_mask): ''' define the loss used for training, contraning two part, the first part we use dice loss instead of weighted logloss, the second part is the iou loss defined in the paper :param y_true_cls: ground truth of text :param y_pred_cls: prediction os text :param y_true_geo: ground truth of geometry :param y_pred_geo: prediction of geometry :param training_mask: mask used in training, to ignore some text annotated by ### :return: ''' classification_loss = dice_coefficient(y_true_cls, y_pred_cls, training_mask) # scale classification loss to match the iou loss part classification_loss *= 0.01 # d1 -> top, d2->right, d3->bottom, d4->left d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = tf.split(value=y_true_geo, num_or_size_splits=5, axis=3) d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = tf.split(value=y_pred_geo, num_or_size_splits=5, axis=3) area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt) area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred) w_union = tf.minimum(d2_gt, d2_pred) + tf.minimum(d4_gt, d4_pred) h_union = tf.minimum(d1_gt, d1_pred) + tf.minimum(d3_gt, d3_pred) area_intersect = w_union * h_union area_union = area_gt + area_pred - area_intersect L_AABB = -tf.log((area_intersect + 1.0)/(area_union + 1.0)) L_theta = 1 - tf.cos(theta_pred - theta_gt) tf.summary.scalar('geometry_AABB', tf.reduce_mean(L_AABB * y_true_cls * training_mask)) tf.summary.scalar('geometry_theta', tf.reduce_mean(L_theta * y_true_cls * training_mask)) L_g = L_AABB + 20 * L_theta return tf.reduce_mean(L_g * y_true_cls * training_mask) + classification_loss