Пример #1
0
    def forward(self, z, condition=None):
        """Transform a random noise sampled from a standard Gaussian distribution into sample from the target distribution. And output the mean and log standard deviation of the output distribution.

        Args:
            z (Variable): shape(B, T), random noise sampled from a standard gaussian disribution.
            condition (Variable, optional): shape(B, F, T), dtype float, the upsampled condition. Defaults to None.

        Returns:
            (z, out_mu, out_log_std)
            z (Variable): shape(B, T), dtype float, transformed noise, it is the synthesized waveform.
            out_mu (Variable): shape(B, T), dtype float, means of the output distributions.
            out_log_std (Variable): shape(B, T), dtype float, log standard deviations of the output distributions.
        """
        for i, flow in enumerate(self.flows):
            theta = flow(z, condition)  # w, mu, log_std [0: T]
            w, mu, log_std = F.split(theta, 3, dim=-1)  # (B, T, 1) for each
            mu = F.squeeze(mu, [-1])  #[0: T]
            log_std = F.squeeze(log_std, [-1])  #[0: T]
            z = z * F.exp(log_std) + mu  #[0: T]

            if i == 0:
                out_mu = mu
                out_log_std = log_std
            else:
                out_mu = out_mu * F.exp(log_std) + mu
                out_log_std += log_std

        return z, out_mu, out_log_std
Пример #2
0
def _matrix_nms(bboxes, cate_labels, cate_scores, kernel='gaussian', sigma=2.0):
    """Matrix NMS for multi-class bboxes.
    Args:
        bboxes (Tensor): shape (n, 4)
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gaussian'
        sigma (float): std in gaussian method
    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = len(cate_labels)
    if n_samples == 0:
        return []

    # 计算一个n×n的IOU矩阵,两组矩形两两之间的IOU
    iou_matrix = jaccard(bboxes, bboxes)   # shape: [n_samples, n_samples]
    iou_matrix = paddle.triu(iou_matrix, diagonal=1)   # 只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])   # shape: [n_samples, n_samples]
    # 第i行第j列表示的是第i个预测框和第j个预测框的类别id是否相同。我们抑制的是同类的预测框。
    d = cate_labels_x - L.transpose(cate_labels_x, [1, 0])
    d = L.pow(d, 2)   # 同类处为0,非同类处>0。 tf中用 == 0比较无效,所以用 < 1
    label_matrix = paddle.triu(L.cast(d < 1, 'float32'), diagonal=1)   # shape: [n_samples, n_samples]

    # IoU compensation
    # 非同类的iou置为0,同类的iou保留。逐列取最大iou
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, [0, ])   # shape: [n_samples, ]
    # compensate_iou第0行里的值a0(重复了n_samples次)表示第0个物体与 比它分高 的 同类物体的最高iou为a0,
    # compensate_iou第1行里的值a1(重复了n_samples次)表示第1个物体与 比它分高 的 同类物体的最高iou为a1,...
    # compensate_iou里每一列里的值依次代表第0个物体、第1个物体、...、第n_samples-1个物体与 比它自己分高 的 同类物体的最高iou。
    compensate_iou = L.transpose(L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1]), [1, 0])   # shape: [n_samples, n_samples]

    # IoU decay
    # 非同类的iou置为0,同类的iou保留。
    # decay_iou第i行第j列表示的是第i个预测框和第j个预测框的iou,如果不是同类,该iou置0。且只取上三角部分。
    decay_iou = iou_matrix * label_matrix   # shape: [n_samples, n_samples]

    # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_sum(decay_matrix / compensate_matrix, [0, ])
    elif kernel == 'linear':
        # 看第j列。(1_test_matrixnms.py里的例子,看第2列)
        # decay_iou     里第2列里的值为[0.9389, 0.9979, 0,      0]。第2个物体与比它分高的2个同类物体的iou是0.9389, 0.9979。
        # compensate_iou里第2列里的值为[0,      0.9409, 0.9979, 0]。比第2个物体分高的2个同类物体 与 比它们自己分高 的 同类物体的最高iou 是0,      0.9409。
        # decay_matrix  里第2列里的值为[0.0610, 0.0348, 485.28, 1]。取该列的最小值为0.0348(抑制掉第2个物体的是第1个物体)。其实后面2个值不用看,因为它们总是>=1。
        # 总结:decay_matrix里第j列里的第i个值若为最小值,则抑制掉第j个物体的是第i个物体。
        # 而且,表现为decay_iou尽可能大,decay_matrix才会尽可能小。
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, [0, ])
    else:
        raise NotImplementedError

    # 更新分数
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
Пример #3
0
    def forward(self, audio, mel, audio_start, clip_kl=True):
        """Compute loss of Clarinet model.

        Args:
            audio (Variable): shape(B, T_audio), dtype flaot32, ground truth waveform.
            mel (Variable): shape(B, F, T_mel), dtype flaot32, condition(mel spectrogram here).
            audio_start (Variable): shape(B, ), dtype int64, audio starts positions.
            clip_kl (bool, optional): whether to clip kl_loss by maximum=100. Defaults to True.

        Returns:
            Dict(str, Variable)
            loss (Variable): shape(1, ), dtype flaot32, total loss.
            kl (Variable): shape(1, ), dtype flaot32, kl divergence between the teacher's output distribution and student's output distribution.
            regularization (Variable): shape(1, ), dtype flaot32, a regularization term of the KL divergence.
            spectrogram_frame_loss (Variable): shape(1, ), dytpe: float, stft loss, the L1-distance of the magnitudes of the spectrograms of the ground truth waveform and synthesized waveform.
        """
        batch_size, audio_length = audio.shape  # audio clip's length

        z = F.gaussian_random(audio.shape)
        condition = self.encoder(mel)  # (B, C, T)
        condition_slice = crop(condition, audio_start, audio_length)

        x, s_means, s_scales = self.student(z, condition_slice)  # all [0: T]
        s_means = s_means[:, 1:]  # (B, T-1), time steps [1: T]
        s_scales = s_scales[:, 1:]  # (B, T-1), time steps [1: T]
        s_clipped_scales = F.clip(s_scales, self.min_log_scale, 100.)

        # teacher outputs single gaussian
        y = self.teacher(x[:, :-1], condition_slice[:, :, 1:])
        _, t_means, t_scales = F.split(y, 3, -1)  # time steps [1: T]
        t_means = F.squeeze(t_means, [-1])  # (B, T-1), time steps [1: T]
        t_scales = F.squeeze(t_scales, [-1])  # (B, T-1), time steps [1: T]
        t_clipped_scales = F.clip(t_scales, self.min_log_scale, 100.)

        s_distribution = D.Normal(s_means, F.exp(s_clipped_scales))
        t_distribution = D.Normal(t_means, F.exp(t_clipped_scales))

        # kl divergence loss, so we only need to sample once? no MC
        kl = s_distribution.kl_divergence(t_distribution)
        if clip_kl:
            kl = F.clip(kl, -100., 10.)
        # context size dropped
        kl = F.reduce_mean(kl[:, self.teacher.context_size:])
        # major diff here
        regularization = F.mse_loss(t_scales[:, self.teacher.context_size:],
                                    s_scales[:, self.teacher.context_size:])

        # introduce information from real target
        spectrogram_frame_loss = F.mse_loss(self.stft.magnitude(audio),
                                            self.stft.magnitude(x))
        loss = kl + self.lmd * regularization + spectrogram_frame_loss
        loss_dict = {
            "loss": loss,
            "kl_divergence": kl,
            "regularization": regularization,
            "stft_loss": spectrogram_frame_loss
        }
        return loss_dict
Пример #4
0
def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
    """Matrix NMS for multi-class masks.

    Args:
        seg_masks (Tensor): shape (n, h, w)   0、1组成的掩码
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gauss'
        sigma (float): std in gaussian method
        sum_masks (Tensor):  shape (n, )      n个物体的面积

    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = L.shape(cate_labels)[0]   # 物体数
    seg_masks = L.reshape(seg_masks, (n_samples, -1))   # [n, h*w]
    # inter.
    inter_matrix = L.matmul(seg_masks, seg_masks, transpose_y=True)   # [n, n] 自己乘以自己的转置。两两之间的交集面积。
    # union.
    sum_masks_x = L.expand(L.reshape(sum_masks, (1, -1)), [n_samples, 1])     # [n, n]  sum_masks重复了n行得到sum_masks_x
    # iou.
    iou_matrix = inter_matrix / (sum_masks_x + L.transpose(sum_masks_x, [1, 0]) - inter_matrix)
    rows = L.range(0, n_samples, 1, 'int32')
    cols = L.range(0, n_samples, 1, 'int32')
    rows = L.expand(L.reshape(rows, (1, -1)), [n_samples, 1])
    cols = L.expand(L.reshape(cols, (-1, 1)), [1, n_samples])
    tri_mask = L.cast(rows > cols, 'float32')
    iou_matrix = tri_mask * iou_matrix   # [n, n]   只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])     # [n, n]  cate_labels重复了n行得到cate_labels_x
    label_matrix = L.cast(L.equal(cate_labels_x, L.transpose(cate_labels_x, [1, 0])), 'float32')
    label_matrix = tri_mask * label_matrix   # [n, n]   只取上三角部分

    # IoU compensation
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, dim=0)
    compensate_iou = L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1])     # [n, n]
    compensate_iou = L.transpose(compensate_iou, [1, 0])      # [n, n]

    # IoU decay
    decay_iou = iou_matrix * label_matrix

    # # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_min((decay_matrix / compensate_matrix), dim=0)
    elif kernel == 'linear':
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, dim=0)
    else:
        raise NotImplementedError

    # update the score.
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
Пример #5
0
    def points2bbox(self, pts, y_first=True):
        """点集转换成包围框.

        :param pts: the input points sets (fields), each points
            set (fields) is represented as 2n scalar.
        :param y_first: if y_first=True, the point set is represented as
            [y1, x1, y2, x2 ... yn, xn], otherwise the point set is
            represented as [x1, y1, x2, y2 ... xn, yn].
        :return: each points set is converting to a bbox [x1, y1, x2, y2].
        """
        pts_reshape = L.reshape(pts, (pts.shape[0], -1, 2, pts.shape[2], pts.shape[3]))
        pts_y = pts_reshape[:, :, 0, :, :] if y_first else pts_reshape[:, :, 1, :, :]
        pts_x = pts_reshape[:, :, 1, :, :] if y_first else pts_reshape[:, :, 0, :, :]
        if self.transform_method == 'minmax':
            # bbox_left = pts_x.min(dim=1, keepdim=True)[0]
            # bbox_right = pts_x.max(dim=1, keepdim=True)[0]
            # bbox_up = pts_y.min(dim=1, keepdim=True)[0]
            # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
            # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
            #                  dim=1)
            pass
        elif self.transform_method == 'partial_minmax':
            # pts_y = pts_y[:, :4, ...]
            # pts_x = pts_x[:, :4, ...]
            # bbox_left = pts_x.min(dim=1, keepdim=True)[0]
            # bbox_right = pts_x.max(dim=1, keepdim=True)[0]
            # bbox_up = pts_y.min(dim=1, keepdim=True)[0]
            # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
            # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
            #                  dim=1)
            pass
        elif self.transform_method == 'moment':
            pts_y_mean = L.reduce_mean(pts_y, dim=1, keep_dim=True)
            pts_x_mean = L.reduce_mean(pts_x, dim=1, keep_dim=True)
            pts_y_std = paddle.std(pts_y - pts_y_mean, axis=1, keepdim=True)
            pts_x_std = paddle.std(pts_x - pts_x_mean, axis=1, keepdim=True)
            moment_transfer = (self.moment_transfer * self.moment_mul) + (
                self.moment_transfer.detach() * (1 - self.moment_mul))
            moment_width_transfer = moment_transfer[0]
            moment_height_transfer = moment_transfer[1]
            half_width = pts_x_std * L.exp(moment_width_transfer)
            half_height = pts_y_std * L.exp(moment_height_transfer)
            bbox = L.concat([
                pts_x_mean - half_width, pts_y_mean - half_height,
                pts_x_mean + half_width, pts_y_mean + half_height
            ], axis=1)
        else:
            raise NotImplementedError
        return bbox
Пример #6
0
 def test_exp(self):
     program = Program()
     with program_guard(program):
         input = layers.data(name="input", shape=[16], dtype="float32")
         out = layers.exp(input, name='exp')
         self.assertIsNotNone(out)
     print(str(program))
Пример #7
0
    def sample_from_mog(self, y):
        """Sample from the output distribution where the output distribution is a mixture of Gaussians.
        Args:
            y (Variable): shape(B, T, C_output), dtype float32, the parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where `n_mixture` means the number of Gaussians in the mixture.

        Returns:
            Variable: shape(B, T), waveform sampled from the output distribution.
        """
        batch_size, time_steps, output_dim = y.shape
        n_mixture = output_dim // 3

        w, mu, log_std = F.split(y, 3, dim=-1)

        reshaped_w = F.reshape(w, (batch_size * time_steps, n_mixture))
        prob_ids = F.sampling_id(F.softmax(reshaped_w))
        prob_ids = F.reshape(prob_ids, (batch_size, time_steps))
        prob_ids = prob_ids.numpy()

        index = np.array([[[b, t, prob_ids[b, t]] for t in range(time_steps)]
                          for b in range(batch_size)]).astype("int32")
        index_var = dg.to_variable(index)

        mu_ = F.gather_nd(mu, index_var)
        log_std_ = F.gather_nd(log_std, index_var)

        dist = D.Normal(mu_, F.exp(log_std_))
        samples = dist.sample(shape=[])
        samples = F.clip(samples, min=-1., max=1.)
        return samples
Пример #8
0
def decode(conv_output, anchors, stride, num_class, conf_thresh):
    conv_shape = P.shape(conv_output)
    batch_size = conv_shape[0]
    n_grid = conv_shape[1]
    anchor_per_scale = len(anchors)
    conv_output = P.reshape(
        conv_output,
        (batch_size, n_grid, n_grid, anchor_per_scale, 5 + num_class))
    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
    conv_raw_conf = conv_output[:, :, :, :, 4:5]
    conv_raw_prob = conv_output[:, :, :, :, 5:]

    rows = P.range(0, n_grid, 1, 'float32')
    cols = P.range(0, n_grid, 1, 'float32')
    rows = P.expand(P.reshape(rows, (1, -1, 1)), [n_grid, 1, 1])
    cols = P.expand(P.reshape(cols, (-1, 1, 1)), [1, n_grid, 1])
    offset = P.concat([rows, cols], axis=-1)
    offset = P.reshape(offset, (1, n_grid, n_grid, 1, 2))
    offset = P.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1])

    pred_xy = (P.sigmoid(conv_raw_dxdy) + offset) * stride
    pred_wh = (P.exp(conv_raw_dwdh) * P.assign(anchors))
    pred_xywh = P.concat([pred_xy, pred_wh], axis=-1)
    pred_conf = P.sigmoid(conv_raw_conf)
    pred_prob = P.sigmoid(conv_raw_prob)

    pred_xywh = P.reshape(pred_xywh, (batch_size, -1, 4))  # [-1, -1, 4]
    pred_conf = P.reshape(pred_conf, (batch_size, -1, 1))  # [-1, -1, 1]
    pred_prob = P.reshape(pred_prob,
                          (batch_size, -1, num_class))  # [-1, -1, 80]
    return pred_xywh, pred_conf, pred_prob
    def get_embedding(self, num_embeddings,
                      embedding_dim, padding_idx=None):
        """
        Build sinusoidal embeddings.
        This matches the implementation in tensor2tensor,
        but differs slightly from the description
        in Section 3.5 of "Attention Is All You Need".
        """
        half_dim = embedding_dim // 2
        emb = layers.log(float(10000)) / (half_dim - -1)
        emb = layers.exp(layers.arange(
            start=0, end=half_dim, dtype='float32') * -emb)

        # [num_embeddings, embedding_dim // 2]
        emb = layers.unsqueeze(layers.arange(-num_embeddings // 2,
                                             num_embeddings // 2, dtype='float32'), axis=1) *\
            layers.unsqueeze(emb, axis=0)

        emb = layers.concat([layers.sin(emb), layers.cos(emb)], dim=1)
        # [num_embeddings, embedding_dim]
        if embedding_dim % 2 == 1:
            emb = layers.concat(
                [emb, layers.zeros(shape=(num_embeddings, 1))], dim=1)
        if padding_idx is not None:
            emb[paddings_idx, :] = 0
        self.origin_shift = num_embeddings // 2
        return emb
Пример #10
0
def sequence_softmax(x, beta=None):
    """Compute sequence softmax over paddle LodTensor

    This function compute softmax normalization along with the length of sequence.
    This function is an extention of :code:`L.sequence_softmax` which can only
    deal with LodTensor whose last dimension is 1.

    Args:
        x: The input variable which is a LodTensor.
        beta: Inverse Temperature

    Return:
        Output of sequence_softmax
    """

    if beta is not None:
        x = x * beta

    x_max = L.sequence_pool(x, "max")
    x_max = L.sequence_expand_as(x_max, x)
    x = x - x_max
    exp_x = L.exp(x)
    sum_exp_x = L.sequence_pool(exp_x, "sum")
    sum_exp_x = L.sequence_expand_as(sum_exp_x, exp_x)
    return exp_x / sum_exp_x
Пример #11
0
 def _sampling(self, z_mean, z_log_var):
     """reparameterization trick 
     """
     # by default, random_normal has mean=0 and std=1.0
     epsilon = layers.gaussian_random_batch_size_like(
         self.tar, shape=[-1, self.latent_size])
     epsilon.stop_gradient = True
     return z_mean + layers.exp(0.5 * z_log_var) * epsilon
Пример #12
0
def R2Penalty(fake_img, f):
    # gradient penalty
    fakes = fake_img
    fakes.stop_gradient = False
    fake_logit = f(fake)

    apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0))
    undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0))

    fake_logit = apply_loss_scaling(layers.sum(fake_logit))
    #grads = dygraph.grad(fake_logit, fakes,create_graph=True)
    grads = dygraph.grad(fake_logit, fakes, create_graph=False)
    fake_grads = layers.reshape(grads[0], (fakes.shape[0], -1))
    fake_grads = undo_loss_scaling(fake_grads)
    r2_penalty = layers.reduce_sum(
        layers.elementwise_mul(fake_grads, fake_grads))
    return r2_penalty
Пример #13
0
def chunk_softmax(logits, labels, topk=10):
    after_exp = L.exp(logits)
    out, _ = L.argsort(after_exp, axis=-1)
    denorm = L.reduce_sum(out[:, -topk:], dim=-1, keep_dim=True)
    probs = after_exp / denorm
    one_hot = F.one_hot(labels, depth=probs.shape[-1])
    loss = -L.reduce_sum(one_hot * L.log(probs)) / logits.shape[0]
    return loss
Пример #14
0
def R1Penalty(real_img, f):
    # gradient penalty
    reals = real_img
    reals.stop_gradient = False
    #reals = real_img
    real_logit = f(reals)
    apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0,
                                                             dtype='float32'))
    undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0,
                                                             dtype='float32'))

    real_logit = apply_loss_scaling(layers.sum(real_logit))
    #grads = dygraph.grad(real_logit, reals, create_graph=True)
    grads = dygraph.grad(real_logit, reals, create_graph=False)
    real_grads = layers.reshape(grads[0], (reals.shape[0], -1))
    real_grads = undo_loss_scaling(real_grads)
    r1_penalty = layers.reduce_sum(
        layers.elementwise_mul(real_grads, real_grads))
    return r1_penalty
Пример #15
0
 def loss_neg_log_of_pos(self, pos_score, neg_score_n, gama=5.0):
     """
         pos_score: batch_size x 1
         neg_score_n: batch_size x n
     """
     # n x batch_size
     neg_score_n = L.transpose(neg_score_n, [1, 0])
     # 1 x batch_size
     pos_score = L.reshape(pos_score, [1, -1])
     exp_pos_score = L.exp(pos_score * gama)
     exp_neg_score_n = L.exp(neg_score_n * gama)
     # (n+1) x batch_size
     pos_neg_score = L.concat([exp_pos_score, exp_neg_score_n], axis=0)
     # 1 x batch_size
     exp_sum = L.reduce_sum(pos_neg_score, dim=0, keep_dim=True)
     # 1 x batch_size
     loss = -1.0 * L.log(exp_pos_score / exp_sum)
     # batch_size
     loss = L.reshape(loss, [-1, 1])
     return loss
Пример #16
0
    def forward(self, mu, logvar=None):
        """
        Compute loss

        Args:
            mu (tensor): mean
            logvar (tensor): logarithm of variance
        """
        if logvar is None:
            logvar = L.zeros_like(mu)
        return -0.5 * L.reduce_sum(1 + logvar - L.pow(mu, 2) - L.exp(logvar))
Пример #17
0
def log_sum_exp(x):
    """预测为背景的概率是(axx是神经网络的输出)
    p = e^(a00-max)/[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    取对数
    lnp = a00-max-ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    移项
    a00 = lnp + max + ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    如果真的是背景类,标记p=1,所以
    a00 = max + ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    神经网络的输出要尽量接近等号右边,才能预测为背景类。
    """
    x_max = P.reduce_max(x)
    return P.log(P.reduce_sum(P.exp(x - x_max), 1)) + x_max
Пример #18
0
def decode(pred_txtytwth, priors, use_yolo_regressors: bool = False):
    """ 对神经网络预测的坐标tx、ty、tw、th进行解码。默认用的是SSD的解码方式 """
    if use_yolo_regressors:
        # Decoded boxes in center-size notation
        boxes = P.concat([
            pred_txtytwth[:, :2] + priors[:, :2],
            priors[:, 2:] * P.exp(pred_txtytwth[:, 2:])
        ], 1)

        boxes = point_form(boxes)
    else:
        variances = [0.1, 0.2]

        boxes = P.concat([
            priors[:, :2] +
            pred_txtytwth[:, :2] * variances[0] * priors[:, 2:],
            priors[:, 2:] * P.exp(pred_txtytwth[:, 2:] * variances[1])
        ], 1)
        x1y1 = boxes[:, :2] - boxes[:, 2:] / 2
        x2y2 = boxes[:, :2] + boxes[:, 2:] / 2

    return P.concat([x1y1, x2y2], 1)
Пример #19
0
    def compute_mog_loss(self, y, t):
        """compute the loss where output distribution is a mixture of Gaussians.

        Args:
            y (Variable): shape(B, T, C_output), dtype float32, the parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where `n_mixture` means the number of Gaussians in the mixture.
            t (Variable): shape(B, T), dtype float32, the target audio. Note that the target's corresponding time index is one step ahead of the output distribution. And output distribution whose input contains padding is neglected in loss computation.

        Returns:
            Variable: shape(1, ), dtype float32, the loss.
        """
        n_mixture = self.output_dim // 3

        # context size is not taken in to account
        y = y[:, self.context_size:, :]
        t = t[:, self.context_size:]

        w, mu, log_std = F.split(y, 3, dim=2)
        # 100.0 is just a large float
        log_std = F.clip(log_std, min=self.log_scale_min, max=100.)
        inv_std = F.exp(-log_std)
        p_mixture = F.softmax(w, axis=-1)

        t = F.unsqueeze(t, axes=[-1])
        if n_mixture > 1:
            # t = F.expand_as(t, log_std)
            t = F.expand(t, [1, 1, n_mixture])

        x_std = inv_std * (t - mu)
        exponent = F.exp(-0.5 * x_std * x_std)
        pdf_x = 1.0 / math.sqrt(2.0 * math.pi) * inv_std * exponent

        pdf_x = p_mixture * pdf_x
        # pdf_x: [bs, len]
        pdf_x = F.reduce_sum(pdf_x, dim=-1)
        per_sample_loss = -F.log(pdf_x + 1e-9)

        loss = F.reduce_mean(per_sample_loss)
        return loss
Пример #20
0
    def _decode(self,
                x,
                y,
                w,
                h,
                anchors,
                stride,
                scale_x_y,
                eps,
                is_gt=False):
        conv_shape = x.shape  # (8, 13, 13, 3)
        batch_size = conv_shape[0]
        n_grid = conv_shape[1]
        anchor_per_scale = conv_shape[3]

        _x = L.unsqueeze(x, 4)
        _y = L.unsqueeze(y, 4)
        conv_raw_dxdy = L.concat([_x, _y], -1)  # (8, 13, 13, 3, 2)
        _w = L.unsqueeze(w, 4)
        _h = L.unsqueeze(h, 4)
        conv_raw_dwdh = L.concat([_w, _h], -1)  # (8, 13, 13, 3, 2)

        rows = L.range(0, n_grid, 1, 'float32')
        cols = L.range(0, n_grid, 1, 'float32')
        rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1])
        cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1])
        offset = L.concat([rows, cols], axis=-1)
        offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2))
        offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1])

        if is_gt:
            decode_xy = (conv_raw_dxdy + offset) / n_grid
        else:
            if (abs(scale_x_y - 1.0) < eps):
                decode_xy = L.sigmoid(conv_raw_dxdy)
                decode_xy = (decode_xy + offset) / n_grid
            else:
                # Grid Sensitive
                decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * (
                    scale_x_y - 1.0)
                decode_xy = (decode_xy + offset) / n_grid
        anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32))
        decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride)
        decode_xywh = L.concat([decode_xy, decode_wh], axis=-1)
        if is_gt:
            decode_xywh.stop_gradient = True

        return decode_xywh  # (8, 13, 13, 3, 4)
Пример #21
0
    def pairwise_hinge(self):
        """pairwise model"""
        poi_repr = L.split(self.poi_repr, 2, dim=0)
        pos_repr, neg_repr = poi_repr
        pos_pred = L.cos_sim(self.query_repr, pos_repr)
        neg_pred = L.cos_sim(self.query_repr, neg_repr)

        mode = 'hinge_loss'
        # log(1 + e-z), max(0, 1 - z)
        if 'hinge_loss' == mode:
            theta_z = L.relu(1 + neg_pred - pos_pred)
        elif 'logistic_loss' == mode:
            theta_z = L.log(1 + L.exp(neg_pred - pos_pred))
        self.loss = L.reduce_mean(theta_z)
        pos_cnt = L.reduce_sum(L.cast(L.greater_than(pos_pred, neg_pred), dtype="float32"))
        neg_cnt = L.reduce_sum(L.cast(L.less_than(pos_pred, neg_pred), dtype="float32"))
        self.order = pos_cnt / (1e-5 + neg_cnt)
        self.metrics = [self.loss, self.order]
Пример #22
0
    def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
        b, _, h, w = tenFlow.shape
        tenDifference = tenFirst - backwarp(tenInput=tenSecond, tenFlow=tenFlow * self.fltBackward)
        tenDifference = L.pow(tenDifference, 2)
        tenDifference = L.reduce_sum(tenDifference, 1, True) # [b, 1, h, w]
        tenDifference = L.sqrt(tenDifference).detach()

        tenFeaturesFirst = self.moduleFeat(tenFeaturesFirst)

        tenMean = L.reshape(tenFlow, (b, 2, -1))    # [b, 2, h * w]
        tenMean = L.reduce_mean(tenMean, 2, True)   # [b, 2, 1]
        tenMean = L.reshape(tenMean, (b, 2, 1, 1))  # [b, 2, 1, 1]
        tenMean = L.expand(tenMean, (1, 1, h, w))   # [b, 2, h, w]
        delta = tenFlow - tenMean

        diff = L.concat([tenDifference, delta, tenFeaturesFirst], 1)
        tenDist = self.moduleDist(self.moduleMain(diff))
        tenDist = L.pow(tenDist, 2.0) * -1.0
        tenDist = tenDist - L.reduce_max(tenDist, 1, True)
        tenDist = L.exp(tenDist)

        tenDivisor = L.reduce_sum(tenDist, 1, True)
        tenDivisor = L.reciprocal(tenDivisor)

        tenScaleX = L.unfold(x=tenFlow[:, 0:1, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleX = L.reshape(tenScaleX, (b, -1, h, w))          # [b, c, h, w]
        tenScaleX = self.moduleScaleX(tenDist * tenScaleX) * tenDivisor

        tenScaleY = L.unfold(x=tenFlow[:, 1:2, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleY = L.reshape(tenScaleY, (b, -1, h, w))          # [b, c, h, w]
        tenScaleY = self.moduleScaleY(tenDist * tenScaleY) * tenDivisor

        return L.concat([tenScaleX, tenScaleY], 1)
Пример #23
0
def decode(conv_output, anchors, stride, num_class, grid_offset):
    conv_shape = P.shape(conv_output)
    batch_size = conv_shape[0]
    output_size = conv_shape[1]
    anchor_per_scale = len(anchors)

    conv_output = P.reshape(conv_output, (batch_size, output_size, output_size,
                                          anchor_per_scale, 5 + num_class))

    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
    conv_raw_conf = conv_output[:, :, :, :, 4:5]
    conv_raw_prob = conv_output[:, :, :, :, 5:]

    pred_xy = (P.sigmoid(conv_raw_dxdy) + grid_offset) * stride
    anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32))
    pred_wh = (P.exp(conv_raw_dwdh) * anchor_t) * stride
    pred_xywh = P.concat([pred_xy, pred_wh], axis=-1)

    pred_conf = P.sigmoid(conv_raw_conf)
    pred_prob = P.sigmoid(conv_raw_prob)

    return P.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
Пример #24
0
def elu(x, alpha):
    return layers.relu(x) + alpha * (layers.exp(-1 * layers.relu(-1 * x)) - 1)
Пример #25
0
def log_softmax(x):
    """ log softmax """
    t1 = layers.exp(x)
    t1 = layers.reduce_sum(t1, dim=-1)
    t1 = layers.log(t1)
    return layers.elementwise_sub(x, t1, axis=0)
Пример #26
0
 def __softmax(x, eps=1e-9):
     exp_out = layers.exp(x=x)
     sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False)
     return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
Пример #27
0
    def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio,
                        batch_size, is_gt, scale_x_y, eps):
        shape_fmp = dcx.shape
        # batch_size = shape_fmp[0]
        anchor_per_scale = shape_fmp[1]
        output_size = shape_fmp[2]
        rows = L.range(0, output_size, 1., dtype='float32')
        cols = L.range(0, output_size, 1., dtype='float32')
        rows = L.reshape(rows, (1, 1, 1, -1))  # [1, 1, 1, w]
        cols = L.reshape(cols, (1, 1, -1, 1))  # [1, 1, h, 1]
        rows = L.expand(
            rows,
            [batch_size, anchor_per_scale, output_size, 1])  # [b, 3, h, w]
        cols = L.expand(
            cols,
            [batch_size, anchor_per_scale, 1, output_size])  # [b, 3, h, w]

        if is_gt:
            cx = (dcx + rows) / output_size
            cy = (dcy + cols) / output_size
        else:
            dcx_sig = L.sigmoid(dcx)
            dcy_sig = L.sigmoid(dcy)
            if (abs(scale_x_y - 1.0) > eps):
                dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1)
                dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1)
            cx = (dcx_sig + rows) / output_size
            cy = (dcy_sig + cols) / output_size

        anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]
        anchor_w_np = np.array(anchor_w_)
        # anchor_w_ = paddle.to_tensor(anchor_w_np, place=paddle.CUDAPlace(0))
        anchor_w_ = paddle.to_tensor(anchor_w_np)
        anchor_w = L.reshape(anchor_w_, (1, -1, 1, 1))  # [1, 3, 1, 1]
        anchor_w = L.expand(
            anchor_w,
            [batch_size, 1, output_size, output_size])  # [b, 3, h, w]

        anchor_h_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 1]
        anchor_h_np = np.array(anchor_h_)
        # anchor_h_ = paddle.to_tensor(anchor_h_np, place=paddle.CUDAPlace(0))
        anchor_h_ = paddle.to_tensor(anchor_h_np)
        anchor_h = L.reshape(anchor_h_, (1, -1, 1, 1))  # [1, 3, 1, 1]
        anchor_h = L.expand(
            anchor_h,
            [batch_size, 1, output_size, output_size])  # [b, 3, h, w]

        # e^tw e^th
        exp_dw = L.exp(dw)
        exp_dh = L.exp(dh)
        pw = (exp_dw * anchor_w) / (output_size * downsample_ratio)
        ph = (exp_dh * anchor_h) / (output_size * downsample_ratio)
        if is_gt:
            exp_dw.stop_gradient = True
            exp_dh.stop_gradient = True
            pw.stop_gradient = True
            ph.stop_gradient = True

        x1 = cx - 0.5 * pw
        y1 = cy - 0.5 * ph
        x2 = cx + 0.5 * pw
        y2 = cy + 0.5 * ph
        if is_gt:
            x1.stop_gradient = True
            y1.stop_gradient = True
            x2.stop_gradient = True
            y2.stop_gradient = True

        return x1, y1, x2, y2
    def network(self, for_test=False):
        """
        定义train_model的网络结构
        :return:
        """
        if not for_test:
            before = fluid.data(name='before_train',
                                shape=[-1, self.sent_len],
                                dtype='int64')
            target = fluid.data(name='target_train',
                                shape=[-1, self.sent_len],
                                dtype='int64')
            after = fluid.data(name='after_train',
                               shape=[-1, self.sent_len],
                               dtype='int64')
            # 定义数据读取工具
            reader = fluid.io.DataLoader.from_generator(
                feed_list=[before, target, after], capacity=64, iterable=True)
            # 前向传播
            rnn_out, encode_hidden = self.forward(target)
            pred_before = self.sent_pred(target,
                                         dir='before',
                                         encode_hidden=encode_hidden,
                                         for_test=False)
            pred_after = self.sent_pred(target,
                                        dir='after',
                                        encode_hidden=encode_hidden,
                                        for_test=False)
        else:
            before = fluid.data(name='before_test',
                                shape=[-1, self.sent_len],
                                dtype='int64')
            target = fluid.data(name='target_test',
                                shape=[-1, self.sent_len],
                                dtype='int64')
            after = fluid.data(name='after_test',
                               shape=[-1, self.sent_len],
                               dtype='int64')
            # 定义数据读取工具
            reader = fluid.io.DataLoader.from_generator(
                feed_list=[before, target, after], capacity=64, iterable=True)
            # 前向传播
            rnn_out, encode_hidden = self.forward(target)
            pred_before = self.sent_pred(target,
                                         dir='before',
                                         encode_hidden=encode_hidden,
                                         for_test=True)
            pred_after = self.sent_pred(target,
                                        dir='after',
                                        encode_hidden=encode_hidden,
                                        for_test=True)

        # 将batch_size 置为1列,为什么不是0列?0列是num_layers.
        pred_before = layers.transpose(pred_before, perm=[0, 2, 1, 3])
        pred_after = layers.transpose(pred_after, perm=[0, 2, 1, 3])
        if not for_test:
            before_emb = self.embedding(before)
            after_emb = self.embedding(after)
            vocab_emb = self.embedding.parameters()[0]
        else:
            before_emb = self.test_embedding(before)
            after_emb = self.test_embedding(after)
            vocab_emb = self.test_embedding.parameters()[0]
        #loss_before = layers.cross_entropy(pred_before, before, soft_label=False)
        #loss_after = layers.cross_entropy(pred_after, after, soft_label=False)
        vocab_emb = layers.reshape(
            vocab_emb, shape=[1, 1, 1, vocab_emb.shape[0], vocab_emb.shape[1]])
        new_shape = pred_before.shape[:-1] + (1, ) + pred_before.shape[-1:]
        pred_before = layers.reshape(pred_before, shape=new_shape)
        pred_after = layers.reshape(pred_after, shape=new_shape)
        prob_w_before = layers.reduce_sum(layers.elementwise_mul(
            pred_before, vocab_emb),
                                          dim=[0, 4])
        prob_w_after = layers.reduce_sum(layers.elementwise_mul(
            pred_after, vocab_emb),
                                         dim=[0, 4])
        prob_w_before = layers.reduce_sum(layers.exp(prob_w_before), dim=-1)
        prob_w_after = layers.reduce_sum(layers.exp(prob_w_after), dim=-1)
        new_shape = before_emb.shape[:-1] + (1, ) + before_emb.shape[-1:]
        before_emb = layers.reshape(before_emb, shape=new_shape)
        after_emb = layers.reshape(after_emb, shape=new_shape)
        pred_before = layers.reduce_sum(layers.elementwise_mul(
            pred_before, before_emb),
                                        dim=[0, 3, 4])
        pred_after = layers.reduce_sum(layers.elementwise_mul(
            pred_after, after_emb),
                                       dim=[0, 3, 4])
        prob_before = layers.elementwise_div(layers.exp(pred_before),
                                             prob_w_before + 1e-6)
        prob_after = layers.elementwise_div(layers.exp(pred_after),
                                            prob_w_after + 1e-6)
        loss = -layers.reduce_mean(
            (layers.log(prob_after) + layers.log(prob_before)) / 2.0)
        return loss, reader
Пример #29
0
 def __softmax(x, eps=1e-9):
     exp_out = layers.exp(x=x)
     sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False)
     return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
Пример #30
0
    def ohem_conf_loss(self, pred_allboxes_conf, batch_size, labels_neg_mask,
                       labels_pos_mask, labels_pos_index, class_vectors,
                       labels_pos_cid):
        batch_conf = P.reshape(pred_allboxes_conf, (-1, self.num_classes))
        loss_c = log_sum_exp(batch_conf) - batch_conf[:, 0]
        loss_c = P.reshape(loss_c, (batch_size, -1))  # (batch_size, 19248)
        labels_neg_mask = P.concat(labels_neg_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_neg_mask = P.reshape(labels_neg_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        loss_c = labels_neg_mask * loss_c  # 只留下负样本损失, (batch_size, 19248)
        sorted_loss_c, loss_idx = P.argsort(loss_c, axis=-1, descending=True)

        labels_pos_mask = P.concat(labels_pos_mask,
                                   axis=0)  # (batch_size*19248, 1)
        labels_pos_mask = P.reshape(labels_pos_mask,
                                    (batch_size, -1))  # (batch_size, 19248)
        num_pos = P.cast(P.reduce_sum(labels_pos_mask, dim=1),
                         'int32')  # (batch_size, )
        num_neg = self.negpos_ratio * num_pos  # (batch_size, )
        neg_topk_mask = []
        for idx in range(batch_size):
            desc = P.range(num_neg[idx],
                           num_neg[idx] - P.shape(labels_pos_mask)[1], -1,
                           'int32')
            neg_topk_mask.append(desc)
        neg_topk_mask = P.concat(neg_topk_mask, axis=0)  # (batch_size*19248, )
        neg_topk_mask = P.reshape(neg_topk_mask,
                                  (batch_size, -1))  # (batch_size, 19248)
        neg_topk_mask = P.cast(neg_topk_mask > 0,
                               'float32')  # (batch_size, 19248)
        sorted_loss_c = neg_topk_mask * sorted_loss_c
        selected_poss = []
        selected_negs = []
        selected_pos_class_vectors = []
        selected_neg_class_vectors = []
        for idx in range(batch_size):
            selected_neg_idx_idx = P.where(sorted_loss_c[idx] > 0)
            selected_neg_idx_idx.stop_gradient = True
            selected_neg_idx = P.gather(loss_idx[idx], selected_neg_idx_idx)
            selected_neg_idx.stop_gradient = True
            selected_neg = P.gather(pred_allboxes_conf[idx], selected_neg_idx)
            selected_neg.stop_gradient = True
            selected_negs.append(selected_neg)
            selected_pos = P.gather(pred_allboxes_conf[idx],
                                    labels_pos_index[idx])
            selected_pos.stop_gradient = True
            selected_poss.append(selected_pos)

            zeros = P.fill_constant(shape=[
                P.shape(selected_neg)[0],
            ],
                                    value=0,
                                    dtype='int32')
            zeros.stop_gradient = True
            selected_neg_class_vector = P.gather(class_vectors, zeros)
            selected_neg_class_vector.stop_gradient = True
            selected_neg_class_vectors.append(selected_neg_class_vector)

            labels_pos_cid.stop_gradient = True
            labels_pos_index[idx].stop_gradient = True
            selected_pos_cid = P.gather(labels_pos_cid[idx],
                                        labels_pos_index[idx])
            selected_pos_cid.stop_gradient = True
            selected_pos_class_vector = P.gather(class_vectors,
                                                 selected_pos_cid)
            selected_pos_class_vector.stop_gradient = True
            selected_pos_class_vectors.append(selected_pos_class_vector)
        selected_negs = P.concat(selected_negs, axis=0)  # (?, 1+80)
        selected_poss = P.concat(selected_poss, axis=0)  # (?, 1+80)
        pred_ = P.concat([selected_negs, selected_poss], axis=0)  # (?, 1+80)
        selected_neg_class_vectors = P.concat(selected_neg_class_vectors,
                                              axis=0)  # (?, 1+80)
        selected_pos_class_vectors = P.concat(selected_pos_class_vectors,
                                              axis=0)  # (?, 1+80)
        labels_ = P.concat(
            [selected_neg_class_vectors, selected_pos_class_vectors],
            axis=0)  # (?, 1+80)

        # softmax交叉熵
        fenzi = P.exp(pred_)
        fenmu = P.reduce_sum(fenzi, dim=1, keep_dim=True)
        pred_prob = fenzi / P.expand_as(fenmu, target_tensor=fenzi)
        conf_loss = labels_ * (0 - P.log(pred_prob + 1e-9))  # 交叉熵,加了极小的常数防止nan
        conf_loss = P.reduce_sum(conf_loss)
        return conf_loss