示例#1
0
def masks_to_boxes(masks):
    """
    Compute the bounding boxes around the provided masks

    The masks should be in format [N, H, W] where N is the number
    of masks, (H, W) are the spatial dimensions.

    Returns a [N, 4] tensors, with the boxes in xyxy format
    """
    if np.sum(masks.shape) == 0:
        return dg.to_variable(np.zeros((0, 4)))

    h, w = masks.shape[-2:]
    y = dg.to_variable(np.arange(0, h, 1, dtype="float32"))
    x = dg.to_variable(np.arange(0, w, 1, dtype="float32"))
    y, x = T.meshgrid([y, x])  # [h, w]

    x_mask = (masks * L.unsqueeze(x, [0]))  # [N, H, W]
    x_max = L.reduce_max(L.flatten(x_mask, axis=1), dim=-1)
    non_mask = dg.to_variable(~masks.numpy())
    x_mask[non_mask] = 1e8
    x_min = L.reduce_min(L.flatten(x_mask, axis=1), dim=-1)

    y_mask = (masks * L.unsqueeze(y, [0]))  # [N, H, W]
    y_max = L.reduce_max(L.flatten(y_mask, axis=1), dim=-1)
    y_mask[non_mask] = 1e8
    y_min = L.reduce_min(L.flatten(y_mask, axis=1), dim=-1)

    return L.stack([x_min, y_min, x_max, y_max], 1)
示例#2
0
def get_face_bbox_for_output(data_cfg, pose, crop_smaller=0):
    """
    Get pixel coordinates of the face bounding box.
    """
    if len(pose.shape) == 3:
        pose = L.unsqueeze(pose, [0])
    elif len(pose.shape) == 5:
        pose = pose[-1, -1:]
    _, _, h, w = pose.shape

    use_openpose = False  # 'pose_maps-densepose' not in data_cfg.input_labels
    if use_openpose:  # Use openpose face keypoints to identify face region.
        raise NotImplementedError()
    else:  # Use densepose labels.
        # face = T.search.nonzero(dg.to_variable((pose[:, 2] > 0.9).numpy().astype("int64")), as_tuple=False)
        face = T.search.nonzero((pose[:, 2] > 0.9).astype("int64"),
                                as_tuple=False)

    ylen = xlen = h // 32 * 8
    if face.shape[0]:
        y, x = face[:, 1], face[:, 2]
        ys, ye = L.reduce_min(y), L.reduce_max(y)
        xs, xe = L.reduce_min(x), L.reduce_max(x)
        if use_openpose:
            xc, yc = (xs + xe) // 2, (ys * 3 + ye * 2) // 5
            ylen = int((xe - xs) * 2.5)
        else:
            xc, yc = (xs + xe) // 2, (ys + ye) // 2
            ylen = int((ye - ys) * 1.25)
        ylen = xlen = min(w, max(32, ylen))
        yc = max(ylen // 2, min(h - 1 - ylen // 2, yc))
        xc = max(xlen // 2, min(w - 1 - xlen // 2, xc))
    else:
        yc = h // 4
        xc = w // 2

    ys, ye = yc - ylen // 2, yc + ylen // 2
    xs, xe = xc - xlen // 2, xc + xlen // 2
    if crop_smaller != 0:  # Crop slightly smaller inside face.
        ys += crop_smaller
        xs += crop_smaller
        ye -= crop_smaller
        xe -= crop_smaller

    if not isinstance(ys, int):
        ys = int(ys.numpy()[0])
    if not isinstance(ye, int):
        ye = int(ye.numpy()[0])
    if not isinstance(xs, int):
        xs = int(xs.numpy()[0])
    if not isinstance(xe, int):
        xe = int(xe.numpy()[0])

    return [ys, ye, xs, xe]
示例#3
0
def get_k_inter(seq, k):
    k = layers.unsqueeze(k, -1)
    seq_mean = seq * k
    seq_mean = layers.reduce_sum(seq_mean, dim=1, keep_dim=True) / layers.reduce_sum(k, dim=1, keep_dim=True)
    seq_max = seq - (1 - k) * 1e10
    seq_max = layers.reduce_max(seq_max, dim=1, keep_dim=True)
    return layers.concat([seq_mean, seq_max], axis=-1)
示例#4
0
 def _compute_pc(self, x, mask):
     if mask is not None:
         x -= (1 - mask) * 1e10
     x = layers.reduce_max(x, dim=1, keep_dim=True)
     x = layers.relu(self.pc_fc1(x))
     x = layers.sigmoid(self.pc_fc2(x))
     return x
示例#5
0
    def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data):
        """ Perform nms for only the max scoring class that isn't background (class 0) """
        # 确实是先坐标全部解码完成,在进行分数过滤。可以考虑过滤后再进行坐标解码
        cur_scores = conf_preds[batch_idx, 1:, :]
        conf_scores = P.reduce_max(cur_scores, dim=0)
        '''
        gpu版本的paddlepaddle1.6.2里有一个问题。keep如果是[None],并且在gather()里使用了keep,就会出现
        cudaGetLastError  invalid configuration argument errno: 9   这个错误。cpu版本则可以正常跑。
        为了避免上面的问题,只能让keep不是[None],所以这里给keep额外添加了一个元素keep_extra。
        '''
        keep = P.where(conf_scores > self.conf_thresh)
        keep_extra = P.where(conf_scores < self.conf_thresh)
        keep_extra = keep_extra[:1]
        keep = P.concat([keep, keep_extra], axis=0)
        scores = P.gather(P.transpose(cur_scores, perm=[1, 0]), keep)
        scores = P.transpose(scores, perm=[1, 0])
        boxes = P.gather(decoded_boxes, keep)
        masks = P.gather(mask_data[batch_idx], keep)
        '''
        因为上面增加了一个keep_extra,所以keep一定至少有一个预测框。
        当官方修复了上述问题后,删除上面keep_extra的代码,下面的代码解除注释。
        这么做的原因是判断keep为空太难了。
        '''
        # 可能没有框被保留。所以添加一个得分垫底的框让fast_nms()能进行下去
        # extra_box = P.fill_constant((1, 4), 'float32', value=-1.0)
        # extra_score = P.fill_constant((P.shape(cur_scores)[0], 1), 'float32', value=-1.0)
        # extra_mask = P.fill_constant((1, P.shape(mask_data)[2]), 'float32', value=-1.0)
        # boxes = P.concat([boxes, extra_box], axis=0)
        # scores = P.concat([scores, extra_score], axis=1)
        # masks = P.concat([masks, extra_mask], axis=0)

        return self.fast_nms(boxes, scores, masks)
示例#6
0
def _matrix_nms(bboxes, cate_labels, cate_scores, kernel='gaussian', sigma=2.0):
    """Matrix NMS for multi-class bboxes.
    Args:
        bboxes (Tensor): shape (n, 4)
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gaussian'
        sigma (float): std in gaussian method
    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = len(cate_labels)
    if n_samples == 0:
        return []

    # 计算一个n×n的IOU矩阵,两组矩形两两之间的IOU
    iou_matrix = jaccard(bboxes, bboxes)   # shape: [n_samples, n_samples]
    iou_matrix = paddle.triu(iou_matrix, diagonal=1)   # 只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])   # shape: [n_samples, n_samples]
    # 第i行第j列表示的是第i个预测框和第j个预测框的类别id是否相同。我们抑制的是同类的预测框。
    d = cate_labels_x - L.transpose(cate_labels_x, [1, 0])
    d = L.pow(d, 2)   # 同类处为0,非同类处>0。 tf中用 == 0比较无效,所以用 < 1
    label_matrix = paddle.triu(L.cast(d < 1, 'float32'), diagonal=1)   # shape: [n_samples, n_samples]

    # IoU compensation
    # 非同类的iou置为0,同类的iou保留。逐列取最大iou
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, [0, ])   # shape: [n_samples, ]
    # compensate_iou第0行里的值a0(重复了n_samples次)表示第0个物体与 比它分高 的 同类物体的最高iou为a0,
    # compensate_iou第1行里的值a1(重复了n_samples次)表示第1个物体与 比它分高 的 同类物体的最高iou为a1,...
    # compensate_iou里每一列里的值依次代表第0个物体、第1个物体、...、第n_samples-1个物体与 比它自己分高 的 同类物体的最高iou。
    compensate_iou = L.transpose(L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1]), [1, 0])   # shape: [n_samples, n_samples]

    # IoU decay
    # 非同类的iou置为0,同类的iou保留。
    # decay_iou第i行第j列表示的是第i个预测框和第j个预测框的iou,如果不是同类,该iou置0。且只取上三角部分。
    decay_iou = iou_matrix * label_matrix   # shape: [n_samples, n_samples]

    # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_sum(decay_matrix / compensate_matrix, [0, ])
    elif kernel == 'linear':
        # 看第j列。(1_test_matrixnms.py里的例子,看第2列)
        # decay_iou     里第2列里的值为[0.9389, 0.9979, 0,      0]。第2个物体与比它分高的2个同类物体的iou是0.9389, 0.9979。
        # compensate_iou里第2列里的值为[0,      0.9409, 0.9979, 0]。比第2个物体分高的2个同类物体 与 比它们自己分高 的 同类物体的最高iou 是0,      0.9409。
        # decay_matrix  里第2列里的值为[0.0610, 0.0348, 485.28, 1]。取该列的最小值为0.0348(抑制掉第2个物体的是第1个物体)。其实后面2个值不用看,因为它们总是>=1。
        # 总结:decay_matrix里第j列里的第i个值若为最小值,则抑制掉第j个物体的是第i个物体。
        # 而且,表现为decay_iou尽可能大,decay_matrix才会尽可能小。
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, [0, ])
    else:
        raise NotImplementedError

    # 更新分数
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
示例#7
0
    def forward(self, inputs, labels=None, logits_softmax=False):
        """前向预测
        """
        #logging.info("inputs shape: {}".format(inputs.shape))
        emb = self.embedding(inputs)
        #logging.info("emb shape: {}".format(emb.shape))

        emb_dropout = self.dropout(emb)

        lstm_forward, _ = self._lstm_forward(emb_dropout)
        #logging.info("lstm_forward shape: {}".format(lstm_forward.shape))
        lstm_forward_tanh = L.tanh(lstm_forward)
        if self.bi_direction:
            lstm_backward, _ = self._lstm_backward(emb_dropout)
            lstm_backward_tanh = L.tanh(lstm_backward)
            encoded_vector = L.concat(
                input=[lstm_forward_tanh, lstm_backward_tanh], axis=-1)
            encoded_vector = L.reduce_max(encoded_vector, dim=1)
        else:
            encoded_vector = L.reduce_max(lstm_forward_tanh, dim=1)

        #logging.info("encoded_vector shape: {}".format(encoded_vector.shape))

        hid_fc_2 = self._hid_fc2(encoded_vector)
        #logging.info("hid_fc_2 shape: {}".format(hid_fc_2.shape))

        logits = self._output_fc(hid_fc_2)
        #logging.info("logits shape: {}".format(logits.shape))

        # 输出logits为softmax后的结果
        if logits_softmax:
            logits = L.softmax(logits)

        # 如果没有给标签 则输出logits结果
        if labels is None:
            return logits

        if len(labels.shape) == 1:
            labels = L.reshape(labels, [-1, 1])
        #print("labels shape: {}".format(labels.shape))

        loss = L.softmax_with_cross_entropy(logits, labels)
        # 如果输出logits的激活函数为softmax 则不能用softmax_with_cross_entropy
        #loss = L.cross_entropy(logits, labels)
        loss = L.reduce_mean(loss)
        return loss, logits
示例#8
0
def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
    """Matrix NMS for multi-class masks.

    Args:
        seg_masks (Tensor): shape (n, h, w)   0、1组成的掩码
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gauss'
        sigma (float): std in gaussian method
        sum_masks (Tensor):  shape (n, )      n个物体的面积

    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = L.shape(cate_labels)[0]   # 物体数
    seg_masks = L.reshape(seg_masks, (n_samples, -1))   # [n, h*w]
    # inter.
    inter_matrix = L.matmul(seg_masks, seg_masks, transpose_y=True)   # [n, n] 自己乘以自己的转置。两两之间的交集面积。
    # union.
    sum_masks_x = L.expand(L.reshape(sum_masks, (1, -1)), [n_samples, 1])     # [n, n]  sum_masks重复了n行得到sum_masks_x
    # iou.
    iou_matrix = inter_matrix / (sum_masks_x + L.transpose(sum_masks_x, [1, 0]) - inter_matrix)
    rows = L.range(0, n_samples, 1, 'int32')
    cols = L.range(0, n_samples, 1, 'int32')
    rows = L.expand(L.reshape(rows, (1, -1)), [n_samples, 1])
    cols = L.expand(L.reshape(cols, (-1, 1)), [1, n_samples])
    tri_mask = L.cast(rows > cols, 'float32')
    iou_matrix = tri_mask * iou_matrix   # [n, n]   只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])     # [n, n]  cate_labels重复了n行得到cate_labels_x
    label_matrix = L.cast(L.equal(cate_labels_x, L.transpose(cate_labels_x, [1, 0])), 'float32')
    label_matrix = tri_mask * label_matrix   # [n, n]   只取上三角部分

    # IoU compensation
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, dim=0)
    compensate_iou = L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1])     # [n, n]
    compensate_iou = L.transpose(compensate_iou, [1, 0])      # [n, n]

    # IoU decay
    decay_iou = iou_matrix * label_matrix

    # # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_min((decay_matrix / compensate_matrix), dim=0)
    elif kernel == 'linear':
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, dim=0)
    else:
        raise NotImplementedError

    # update the score.
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
示例#9
0
    def fast_nms(self, boxes, scores, masks, max_num_detections=100):
        iou_threshold = self.nms_thresh
        top_k = self.top_k

        # 同类方框根据得分降序排列
        scores, idx = P.argsort(scores, axis=1, descending=True)

        idx = idx[:, :top_k]
        scores = scores[:, :top_k]

        num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

        idx = P.reshape(idx, (-1, ))
        boxes = P.gather(boxes, idx)
        boxes = P.reshape(boxes, (num_classes, num_dets, 4))
        masks = P.gather(masks, idx)
        masks = P.reshape(masks, (num_classes, num_dets, -1))

        # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
        iou = jaccard(boxes, boxes)
        # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
        # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
        rows = P.range(0, num_dets, 1, 'int32')
        cols = P.range(0, num_dets, 1, 'int32')
        rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
        cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
        tri_mask = P.cast(rows > cols, 'float32')
        tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                            [num_classes, 1, 1])
        iou = tri_mask * iou
        iou_max = P.reduce_max(iou, dim=1)

        # Now just filter out the ones higher than the threshold
        keep = P.where(iou_max <= iou_threshold)

        # Assign each kept detection to its corresponding class
        classes = P.range(0, num_classes, 1, 'int32')
        classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
        classes = P.gather_nd(classes, keep)

        boxes = P.gather_nd(boxes, keep)
        masks = P.gather_nd(masks, keep)
        scores = P.gather_nd(scores, keep)

        # Only keep the top cfg.max_num_detections highest scores across all classes
        scores, idx = P.argsort(scores, axis=0, descending=True)
        idx = idx[:max_num_detections]
        scores = scores[:max_num_detections]

        classes = P.gather(classes, idx)
        boxes = P.gather(boxes, idx)
        masks = P.gather(masks, idx)

        return boxes, masks, classes, scores
示例#10
0
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k):
    '''
    :param boxes:    [?, 4]
    :param scores:   [80, ?]
    '''

    # 同类方框根据得分降序排列
    scores, idx = P.argsort(scores, axis=1, descending=True)

    idx = idx[:, :keep_top_k]
    scores = scores[:, :keep_top_k]

    num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

    idx = P.reshape(idx, (-1, ))
    boxes = P.gather(boxes, idx)
    boxes = P.reshape(boxes, (num_classes, num_dets, 4))

    # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
    iou = _iou(boxes, boxes)

    # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
    # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
    rows = P.range(0, num_dets, 1, 'int32')
    cols = P.range(0, num_dets, 1, 'int32')
    rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
    cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
    tri_mask = P.cast(rows > cols, 'float32')
    tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                        [num_classes, 1, 1])
    iou = tri_mask * iou
    iou_max = P.reduce_max(iou, dim=1)

    # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。
    keep = P.where(iou_max <= nms_thresh)

    # Assign each kept detection to its corresponding class
    classes = P.range(0, num_classes, 1, 'int32')
    classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
    classes = P.gather_nd(classes, keep)

    boxes = P.gather_nd(boxes, keep)
    scores = P.gather_nd(scores, keep)

    # Only keep the top cfg.max_num_detections highest scores across all classes
    scores, idx = P.argsort(scores, axis=0, descending=True)
    idx = idx[:nms_top_k]
    scores = scores[:nms_top_k]

    classes = P.gather(classes, idx)
    boxes = P.gather(boxes, idx)

    return boxes, scores, classes
示例#11
0
    def forward(self, inputs, labels=None, logits_softmax=False):
        """前向预测
        """
        emb = self.embedding(inputs)

        hid_fc1 = self._hid_fc1(emb)

        gru_forward = self._gru_forward(hid_fc1)
        gru_forward_tanh = L.tanh(gru_forward)
        if self.bi_direction:
            gru_backward = self._gru_backward(hid_fc1)
            gru_backward_tanh = L.tanh(gru_backward)
            encoded_vector = L.concat(
                input=[gru_forward_tanh, gru_backward_tanh], axis=2)
            encoded_vector = L.reduce_max(encoded_vector, dim=1)
        else:
            encoded_vector = L.reduce_max(gru_forward_tanh, dim=1)

        hid_fc_2 = self._hid_fc2(encoded_vector)

        logits = self._output_fc(hid_fc_2)

        # 输出logits为softmax后的结果
        if logits_softmax:
            logits = L.softmax(logits)

        # 如果没有给标签 则输出logits结果
        if labels is None:
            return logits

        if len(labels.shape) == 1:
            labels = L.reshape(labels, [-1, 1])
        #print("labels shape: {}".format(labels.shape))

        loss = L.softmax_with_cross_entropy(logits, labels)
        # 如果输出logits的激活函数为softmax 则不能用softmax_with_cross_entropy
        #loss = L.cross_entropy(logits, labels)
        loss = L.reduce_mean(loss)
        return loss, logits
示例#12
0
def log_sum_exp(x):
    """预测为背景的概率是(axx是神经网络的输出)
    p = e^(a00-max)/[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    取对数
    lnp = a00-max-ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    移项
    a00 = lnp + max + ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    如果真的是背景类,标记p=1,所以
    a00 = max + ln[e^(a00-max)+e^(a01-max)+...+e^(a80-max)]
    神经网络的输出要尽量接近等号右边,才能预测为背景类。
    """
    x_max = P.reduce_max(x)
    return P.log(P.reduce_sum(P.exp(x - x_max), 1)) + x_max
示例#13
0
    def forward(self, inputs):
        """前向预测
        """
        # inputs shape = [batch_size, num_channels, seq_len, emb_dim] [N, C, H, W]
        #print("inputs shape: {}".format(inputs.shape))

        # x shape = [batch_size, num_filters, height_after_conv, width_after_conv=1]
        x = self._conv2d(inputs)
        #print("conv3d shape: {}".format(x.shape))

        # x shape = [batch_size, num_filters, height_after_pool=1, width_after_pool=1]
        x = L.reduce_max(x, dim=2, keep_dim=True)
        #print("reduce sum shape: {}".format(x.shape))

        # x shape = [batch_size, num_filters]
        x = L.squeeze(x, axes=[2, 3])
        return x
示例#14
0
    def forward(self, x):
        b, c, h, w = x.shape

        f_query = reshape(x, (b, -1, h * w))
        f_key = reshape(x, (b, -1, h * w))
        f_key = transpose(f_key, (0, 2, 1))
        f_value = reshape(x, (b, -1, h * w))

        f_similarity = bmm(f_query, f_key)  # [h*w, h*w]
        f_similarity_max = reduce_max(f_similarity, -1, keep_dim=True)
        f_similarity_max_reshape = expand_as(f_similarity_max, f_similarity)
        f_similarity = f_similarity_max_reshape - f_similarity

        f_similarity = softmax(f_similarity)
        f_similarity = transpose(f_similarity, (0, 2, 1))

        f_attention = bmm(f_similarity, f_value)  # [h*w, c]
        f_attention = reshape(f_attention, (b, c, h, w))

        out = self.gamma * f_attention + x
        return out
示例#15
0
    def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
        b, _, h, w = tenFlow.shape
        tenDifference = tenFirst - backwarp(tenInput=tenSecond, tenFlow=tenFlow * self.fltBackward)
        tenDifference = L.pow(tenDifference, 2)
        tenDifference = L.reduce_sum(tenDifference, 1, True) # [b, 1, h, w]
        tenDifference = L.sqrt(tenDifference).detach()

        tenFeaturesFirst = self.moduleFeat(tenFeaturesFirst)

        tenMean = L.reshape(tenFlow, (b, 2, -1))    # [b, 2, h * w]
        tenMean = L.reduce_mean(tenMean, 2, True)   # [b, 2, 1]
        tenMean = L.reshape(tenMean, (b, 2, 1, 1))  # [b, 2, 1, 1]
        tenMean = L.expand(tenMean, (1, 1, h, w))   # [b, 2, h, w]
        delta = tenFlow - tenMean

        diff = L.concat([tenDifference, delta, tenFeaturesFirst], 1)
        tenDist = self.moduleDist(self.moduleMain(diff))
        tenDist = L.pow(tenDist, 2.0) * -1.0
        tenDist = tenDist - L.reduce_max(tenDist, 1, True)
        tenDist = L.exp(tenDist)

        tenDivisor = L.reduce_sum(tenDist, 1, True)
        tenDivisor = L.reciprocal(tenDivisor)

        tenScaleX = L.unfold(x=tenFlow[:, 0:1, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleX = L.reshape(tenScaleX, (b, -1, h, w))          # [b, c, h, w]
        tenScaleX = self.moduleScaleX(tenDist * tenScaleX) * tenDivisor

        tenScaleY = L.unfold(x=tenFlow[:, 1:2, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleY = L.reshape(tenScaleY, (b, -1, h, w))          # [b, c, h, w]
        tenScaleY = self.moduleScaleY(tenDist * tenScaleY) * tenDivisor

        return L.concat([tenScaleX, tenScaleY], 1)
示例#16
0
    def forward(self, outputs, target_sizes):
        """
        Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each image
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"]

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = L.softmax(out_logits, -1)  # [bs, num_queries, num_classes + 1]
        labels = L.argmax(prob[:, :, :], axis=-1)  # [bs, num_queries]
        scores = L.reduce_max(prob, dim=-1)  # [bs, num_queries]

        # convert to [x0, y0, x1, y1] format
        bs, num_queries, _ = out_bbox.shape
        out_bbox = L.reshape(out_bbox, (-1, 4))
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = L.reshape(boxes, (bs, num_queries, 4))
        # and fromm relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes[:, 0], target_sizes[:, 1]
        scale_fct = L.stack([img_w, img_h, img_w, img_h], 1)  # [bs, 4]
        scale_fct = L.expand(L.unsqueeze(scale_fct, [1]), (1, num_queries, 1))
        boxes = boxes * scale_fct

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores.numpy(), labels.numpy(), boxes.numpy())]

        return results
示例#17
0
        def early_finish(alive_log_probs, finished_scores,
                         finished_in_finished):
            max_length_penalty = np.power(((5. + max_len) / 6.), alpha)
            # The best possible score of the most likely alive sequence
            lower_bound_alive_scores = alive_log_probs[:,
                                                       0] / max_length_penalty

            # Now to compute the lowest score of a finished sequence in finished
            # If the sequence isn't finished, we multiply it's score by 0. since
            # scores are all -ve, taking the min will give us the score of the lowest
            # finished item.
            lowest_score_of_fininshed_in_finished = layers.reduce_min(
                finished_scores * finished_in_finished, 1)
            # If none of the sequences have finished, then the min will be 0 and
            # we have to replace it by -ve INF if it is. The score of any seq in alive
            # will be much higher than -ve INF and the termination condition will not
            # be met.
            lowest_score_of_fininshed_in_finished += (
                1. - layers.reduce_max(finished_in_finished, 1)) * -inf
            bound_is_met = layers.reduce_all(
                layers.greater_than(lowest_score_of_fininshed_in_finished,
                                    lower_bound_alive_scores))

            return bound_is_met
示例#18
0
文件: layers.py 项目: Yelrose/PGL
def topk_pool(gw, score, graph_id, ratio):
    """Implementation of topk pooling, where k means pooling ratio.
    
    Args:
        gw: Graph wrapper object.

        score: The attention score of all nodes, which is used to select 
               important nodes.

        graph_id: The graphs that the nodes belong to.

        ratio: The pooling ratio of nodes we want to select.

    Return: 
        perm: The index of nodes we choose.

        ratio_length: The selected node numbers of each graph.
    """

    graph_lod = gw.graph_lod
    graph_nodes = gw.num_nodes
    num_graph = gw.num_graph

    num_nodes = L.ones(shape=[graph_nodes], dtype="float32")
    num_nodes = L.lod_reset(num_nodes, graph_lod)
    num_nodes_per_graph = L.sequence_pool(num_nodes, pool_type='sum')
    max_num_nodes = L.reduce_max(num_nodes_per_graph, dim=0)
    max_num_nodes = L.cast(max_num_nodes, dtype="int32")

    index = L.arange(0, gw.num_nodes, dtype="int64")
    offset = L.gather(graph_lod, graph_id, overwrite=False)
    index = (index - offset) + (graph_id * max_num_nodes)
    index.stop_gradient = True

    # padding
    dense_score = L.fill_constant(shape=[num_graph * max_num_nodes],
                                  dtype="float32",
                                  value=-999999)
    index = L.reshape(index, shape=[-1])
    dense_score = L.scatter(dense_score, index, updates=score)
    num_graph = L.cast(num_graph, dtype="int32")
    dense_score = L.reshape(dense_score, shape=[num_graph, max_num_nodes])

    # record the sorted index
    _, sort_index = L.argsort(dense_score, axis=-1, descending=True)

    # recover the index range
    graph_lod = graph_lod[:-1]
    graph_lod = L.reshape(graph_lod, shape=[-1, 1])
    graph_lod = L.cast(graph_lod, dtype="int64")
    sort_index = L.elementwise_add(sort_index, graph_lod, axis=-1)
    sort_index = L.reshape(sort_index, shape=[-1, 1])

    # use sequence_slice to choose selected node index
    pad_lod = L.arange(0, (num_graph + 1) * max_num_nodes,
                       step=max_num_nodes,
                       dtype="int32")
    sort_index = L.lod_reset(sort_index, pad_lod)
    ratio_length = L.ceil(num_nodes_per_graph * ratio)
    ratio_length = L.cast(ratio_length, dtype="int64")
    ratio_length = L.reshape(ratio_length, shape=[-1, 1])
    offset = L.zeros(shape=[num_graph, 1], dtype="int64")
    choose_index = L.sequence_slice(input=sort_index,
                                    offset=offset,
                                    length=ratio_length)

    perm = L.reshape(choose_index, shape=[-1])
    return perm, ratio_length
示例#19
0
def sgat(gw,
         node_feat,
         edge_feat,
         hidden_size,
         name,
         activation='relu',
         combine='mean',
         num_heads=4,
         feat_drop=0.2,
         attn_drop=0.2,
         is_test=False):
    """
    The sgat function can aggregate the edge-neighbors of node to update the node embedding.
    Adapted from https://github.com/PaddlePaddle/PGL/blob/main/pgl/layers/conv.py.
    Args:
        gw(GraphWrapper): A graph wrapper for edge-node graph.
        node_feat(Variable): A tensor of node-edge features with shape (num_nodes + num_nodes, feature_size).
        edge_feat(Variable): A tensor of spatial distance features with shape (num_edges, feature_size).
        combine(str): The choice of combining multi-head embeddings. It can be mean, max or dense.

        hidden_size: The hidden size for gat.
        activation: The activation for the output.
        name: Gat layer names.
        num_heads: The head number in gat.
        feat_drop: Dropout rate for feature.
        attn_drop: Dropout rate for attention.
        is_test: Whether in test phrase.
    Returns:
        Variable: The updated node-edge feature matrix with shape (num_nodes + num_edges, feature_size).
    """
    def send_attention(src_feat, dst_feat, edge_feat):
        output = src_feat["left_a"] + dst_feat["right_a"]
        if 'edge_a' in edge_feat:
            output += edge_feat["edge_a"]
        output = L.leaky_relu(output, alpha=0.2)  # (num_edges, num_heads)
        return {"alpha": output, "h": src_feat["h"]}

    def reduce_attention(msg):
        alpha = msg["alpha"]  # lod-tensor (batch_size, seq_len, num_heads)
        h = msg["h"]
        alpha = paddle_helper.sequence_softmax(alpha)
        old_h = h
        h = L.reshape(h, [-1, num_heads, hidden_size])
        alpha = L.reshape(alpha, [-1, num_heads, 1])
        if attn_drop > 1e-15:
            alpha = L.dropout(alpha,
                              dropout_prob=attn_drop,
                              is_test=is_test,
                              dropout_implementation="upscale_in_train")
        h = h * alpha
        h = L.reshape(h, [-1, num_heads * hidden_size])
        h = L.lod_reset(h, old_h)
        return L.sequence_pool(h, "sum")

    if feat_drop > 1e-15:
        node_feat = L.dropout(node_feat,
                              dropout_prob=feat_drop,
                              is_test=is_test,
                              dropout_implementation='upscale_in_train')
        edge_feat = L.dropout(edge_feat,
                              dropout_prob=feat_drop,
                              is_test=is_test,
                              dropout_implementation='upscale_in_train')

    ft = L.fc(node_feat,
              hidden_size * num_heads,
              bias_attr=False,
              param_attr=fluid.ParamAttr(name=name + '_weight'))
    left_a = L.create_parameter(shape=[num_heads, hidden_size],
                                dtype='float32',
                                name=name + '_gat_l_A')
    right_a = L.create_parameter(shape=[num_heads, hidden_size],
                                 dtype='float32',
                                 name=name + '_gat_r_A')
    reshape_ft = L.reshape(ft, [-1, num_heads, hidden_size])
    left_a_value = L.reduce_sum(reshape_ft * left_a, -1)
    right_a_value = L.reduce_sum(reshape_ft * right_a, -1)

    fd = L.fc(edge_feat,
              size=hidden_size * num_heads,
              bias_attr=False,
              param_attr=fluid.ParamAttr(name=name + '_fc_eW'))
    edge_a = L.create_parameter(shape=[num_heads, hidden_size],
                                dtype='float32',
                                name=name + '_gat_d_A')
    fd = L.reshape(fd, [-1, num_heads, hidden_size])
    edge_a_value = L.reduce_sum(fd * edge_a, -1)
    efeat_list = [('edge_a', edge_a_value)]

    msg = gw.send(send_attention,
                  nfeat_list=[("h", ft), ("left_a", left_a_value),
                              ("right_a", right_a_value)],
                  efeat_list=efeat_list)
    output = gw.recv(msg, reduce_attention)

    if combine == 'mean':
        output = L.reshape(output, [-1, num_heads, hidden_size])
        output = L.reduce_mean(output, dim=1)
        num_heads = 1
    if combine == 'max':
        output = L.reshape(output, [-1, num_heads, hidden_size])
        output = L.reduce_max(output, dim=1)
        num_heads = 1
    if combine == 'dense':
        output = L.fc(output,
                      hidden_size,
                      bias_attr=False,
                      param_attr=fluid.ParamAttr(name=name + '_dense_combine'))
        num_heads = 1

    bias = L.create_parameter(shape=[hidden_size * num_heads],
                              dtype='float32',
                              is_bias=True,
                              name=name + '_bias')
    bias.stop_gradient = True
    output = L.elementwise_add(output, bias, act=activation)
    return output
示例#20
0
def fastnms(all_pred_boxes, all_pred_scores, resize_shape, origin_shape,
            conf_thresh, nms_thresh, keep_top_k, nms_top_k, use_yolo_box):
    '''
    :param all_pred_boxes:      [batch_size, -1, 4]
    :param all_pred_scores:     [batch_size, -1, 80]
    :param resize_shape:        [batch_size, 2]
    :param origin_shape:        [batch_size, 2]
    '''
    conf_preds = P.transpose(all_pred_scores, perm=[0, 2, 1])  # [1, 80, -1]
    cur_scores = conf_preds[0]  # [80, -1]
    conf_scores = P.reduce_max(cur_scores, dim=0)  # [-1, ]
    # keep如果是[None],并且在gather()里使用了keep,就会出现
    # cudaGetLastError  invalid configuration argument errno: 9   这个错误。
    # 为了避免上面的问题,只能让keep不是[None],所以这里当keep是[None]时给keep赋予一个坐标[[0]]。
    keep = P.where(conf_scores > conf_thresh)

    def exist_objs_1(keep):
        return keep

    def no_objs_1():
        keep_extra = P.zeros((1, 1), 'int64')
        return keep_extra

    keep = P.cond(P.shape(keep)[0] == 0, no_objs_1, lambda: exist_objs_1(keep))
    scores = P.gather(all_pred_scores[0], keep)
    scores = P.transpose(scores, perm=[1, 0])
    boxes = P.gather(all_pred_boxes[0], keep)
    boxes, scores, classes = fast_nms(boxes, scores, conf_thresh, nms_thresh,
                                      keep_top_k, nms_top_k)

    # 再做一次分数过滤。前面提到,只要某个框最高分数>阈值就保留,
    # 然而计算上面那个矩阵时,这个框其实重复了80次,每一个分身代表是不同类的物品。
    # 非最高分数的其它类别,它的得分可能小于阈值,要过滤。
    # 所以fastnms存在这么一个现象:某个框它最高分数 > 阈值,它有一个非最高分数类的得分也超过了阈值,
    # 那么最后有可能两个框都保留,而且这两个框有相同的xywh
    keep = P.where(scores > conf_thresh)

    def exist_objs_2(keep, boxes, classes, scores):
        boxes = P.gather(boxes, keep)
        classes = P.gather(classes, keep)
        scores = P.gather(scores, keep)
        return boxes, classes, scores

    def no_objs_2(boxes, classes, scores):
        keep = P.zeros((1, 1), 'int64')
        boxes = P.gather(boxes, keep)
        classes = P.gather(classes, keep)
        scores = P.gather(scores, keep)
        scores -= 2.0  # 巧妙设置为负分数让python端过滤
        return boxes, classes, scores

    boxes, classes, scores = P.cond(
        P.shape(keep)[0] == 0, lambda: no_objs_2(boxes, classes, scores),
        lambda: exist_objs_2(keep, boxes, classes, scores))
    # 变成左上角坐标、右下角坐标
    boxes = P.concat(
        [boxes[:, :2] - boxes[:, 2:] * 0.5, boxes[:, :2] + boxes[:, 2:] * 0.5],
        axis=-1)

    # 缩放到原图大小
    resize_shape_f = P.cast(resize_shape, 'float32')
    origin_shape_f = P.cast(origin_shape, 'float32')
    if use_yolo_box:
        scale = origin_shape_f
    else:
        scale = origin_shape_f / resize_shape_f
    scale = P.expand(scale, [1, 2])
    boxes *= scale  # 批大小是1才支持这么做,因为scale第0维表示批大小,boxes第0维却表示这张图片预测出的物体数

    # 批大小在前
    boxes = P.reshape(boxes, (1, -1, 4), name='boxes')
    scores = P.reshape(scores, (1, -1), name='scores')
    classes = P.reshape(classes, (1, -1), name='classes')
    return [boxes, scores, classes]
示例#21
0
    def _get_bboxes_single(self,
                           cls_scores,
                           bbox_preds,
                           mlvl_points,
                           img_shape,
                           scale_factor,
                           rescale=False,
                           with_nms=True):
        # mlvl_points 里面每个元素是[格子行数*格子列数, 3]  具体是(格子左上角x坐标, 格子左上角y坐标, 格子边长)
        nms_cfg = self.nms_cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_bboxes = []
        mlvl_scores = []
        # 遍历每个fpn输出层
        for i_lvl, (cls_score, bbox_pred, points) in enumerate(
                zip(cls_scores, bbox_preds, mlvl_points)):
            # cls_score.shape = [80, h, w]
            # bbox_pred.shape = [ 4, h, w]
            # points.shape    = [h*w, 3]   具体是(格子左上角x坐标, 格子左上角y坐标, 格子边长)
            cls_score = L.transpose(cls_score, [1, 2, 0])              # [h, w, 80]
            cls_score = L.reshape(cls_score, (-1, self.num_classes))   # [h*w, 80]
            if self.use_sigmoid_cls:
                scores = L.sigmoid(cls_score)   # [h*w, 80]
            else:
                scores = L.softmax(cls_score)
            bbox_pred = L.transpose(bbox_pred, [1, 2, 0])   # [h, w, 4]
            bbox_pred = L.reshape(bbox_pred, (-1, 4))       # [h*w, 4]
            nms_top_k = nms_cfg.get('nms_top_k', -1)
            if nms_top_k > 0 and scores.shape[0] > nms_top_k:
                if self.use_sigmoid_cls:
                    max_scores = L.reduce_max(scores, dim=1)
                else:
                    # remind that we set FG labels to [0, num_class-1]
                    # since mmdet v2.0
                    # BG cat_id: num_class
                    # max_scores, _ = scores[:, :-1].max(dim=1)
                    pass
                _, topk_inds = L.topk(max_scores, k=nms_top_k)
                scores = L.gather(scores, topk_inds)  # [M, 80]
                points = L.gather(points, topk_inds)  # [M, 3]   格子xy坐标、边长
                bbox_pred = L.gather(bbox_pred, topk_inds)  # [M, 4]

            # [M, 4]  格子xy坐标重复2次。格子左上角坐标。
            bbox_pos_center = L.concat([points[:, :2], points[:, :2]], axis=1)

            # [M, 4]  物体最终预测坐标(x1y1x2y2格式) = bbox_pred*格子边长 + 格子左上角坐标
            bboxes = bbox_pred * self.fpn_stride[i_lvl] + bbox_pos_center

            x1 = L.clip(bboxes[:, 0], 0.0, img_shape[1])
            y1 = L.clip(bboxes[:, 1], 0.0, img_shape[0])
            x2 = L.clip(bboxes[:, 2], 0.0, img_shape[1])
            y2 = L.clip(bboxes[:, 3], 0.0, img_shape[0])
            bboxes = paddle.stack([x1, y1, x2, y2], axis=-1)  # [M, 4]
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
        mlvl_scores = L.concat(mlvl_scores, axis=0)  # [M2, 80]  各个fpn层预测的分数汇合在一起
        mlvl_bboxes = L.concat(mlvl_bboxes, axis=0)  # [M2, 4]   各个fpn层预测的bbox(x1y1x2y2格式)汇合在一起
        if rescale:
            scale_factor_ = paddle.to_tensor(scale_factor)
            mlvl_bboxes /= scale_factor_  # [M2, 4]   预测的bbox(x1y1x2y2格式)

        pred_scores = L.unsqueeze(mlvl_scores, axes=0)  # [1, M2, 80]
        pred_boxes = L.unsqueeze(mlvl_bboxes, axes=0)   # [1, M2,  4],最终坐标
        pred_scores = L.transpose(pred_scores, perm=[0, 2, 1])  # [1, 80, M2],最终分数

        # nms
        pred = None
        i = 0
        nms_cfg = copy.deepcopy(self.nms_cfg)
        nms_type = nms_cfg.pop('nms_type')
        if nms_type == 'matrix_nms':
            pred = fluid.layers.matrix_nms(pred_boxes[i:i+1, :, :], pred_scores[i:i+1, :, :], background_label=-1, **nms_cfg)
        elif nms_type == 'multiclass_nms':
            pred = fluid.layers.multiclass_nms(pred_boxes[i:i+1, :, :], pred_scores[i:i+1, :, :], background_label=-1, **nms_cfg)
        return pred
示例#22
0
 def net_func():
     x = layers.fill_constant(shape=[10], dtype='float32', value=2.0)
     condition = layers.reduce_max(x) < 1.0
     layers.Assert(condition, (x, ), 5)
示例#23
0
 def net_func():
     x = layers.fill_constant(shape=[2, 3], dtype='float32', value=2.0)
     condition = layers.reduce_max(x) < 1.0
     layers.Assert(condition, [x], 10, name="test")
示例#24
0
 def norm_range(t, range):
     if range is not None:
         norm_ip(t, range[0], range[1])
     else:
         norm_ip(t, float(F.reduce_min(t)), float(F.reduce_max(t)))
    def norm_img(self, x):
        mx = layers.reduce_max(x)
        mn = layers.reduce_min(x)
        x = 255 * (x - mn) / (mn - mx)

        return x
示例#26
0
    def __call__(
            self,
            predictions,
            labels_pos_mask,  # Shape: [batch_size, 19248, 1]
            labels_neg_mask,  # Shape: [batch_size, 19248, 1]
            labels_allboxes_vector,  # Shape: [batch_size, 19248, 8]
            segment_t,  # list  Shape: [batch_size, 19248, 1]
            label_masks,
            labels_best_truth_idx,
            labels_pos_index,
            labels_pos_cid,  #  Shape: [batch_size, 19248]
            labels_pos_cid2,  #  Shape: [batch_size, 19248]
            priors,
            class_vectors,
            batch_size,
            use_maskiou=True,
            use_ce_loss=True,
            use_ghm_c_loss=False,
            use_focal_loss=False,
            use_ohem_loss=False):

        pred_allboxes_encode_x0y0x1y1 = predictions[
            'loc']  # Shape: [batch_size, 19248, 4]
        pred_allboxes_conf = predictions[
            'conf']  # Shape: [batch_size, 19248, 1+80]
        pred_allboxes_mask_coef = predictions[
            'mask']  # Shape: [batch_size, 19248, 原型数=32]
        pred_proto = predictions[
            'proto']  # Shape: [batch_size, s4=138, s4=138, 原型数=32]
        pred_segm = predictions[
            'segm']  # Shape: [batch_size, 类别数=80, s8=69, s8=69]

        labels_allboxes_x0y0x1y1 = labels_allboxes_vector[:, :, 0:
                                                          4]  # Shape: [batch_size, 19248, 4]
        labels_allboxes_decode_x0y0x1y1 = labels_allboxes_vector[:, :, 4:
                                                                 8]  # Shape: [batch_size, 19248, 4]

        losses = {}

        # 1.bbox_loss,只有正例才计算。
        # bbox_alpha = 1.5
        # bbox_loss = P.smooth_l1(P.reshape(pred_allboxes_encode_x0y0x1y1, (-1, 4)), P.reshape(labels_allboxes_x0y0x1y1, (-1, 4)))
        # bbox_loss = P.reshape(labels_pos_mask, (-1, 1)) * bbox_loss
        # bbox_loss = P.reduce_sum(bbox_loss) * bbox_alpha
        # losses['B'] = bbox_loss

        # 1.bbox_loss,ciou_loss
        pred_x0y0x1y1 = []
        for idx in range(batch_size):
            temp = decode(pred_allboxes_encode_x0y0x1y1[idx], priors)
            pred_x0y0x1y1.append(temp)
        pred_x0y0x1y1 = P.concat(pred_x0y0x1y1,
                                 axis=0)  # Shape: [batch_size*num_priors, 4]
        pred_x0y0x1y1 = P.reshape(
            pred_x0y0x1y1,
            (batch_size, -1, 4))  # Shape: [batch_size, num_priors, 4]

        ciou = P.reshape(
            self.bbox_ciou(pred_x0y0x1y1, labels_allboxes_decode_x0y0x1y1),
            (batch_size, -1, 1))  # (batch_size, num_priors, 1)

        # 每个预测框ciou_loss的权重 = 2 - (ground truth的面积/图片面积)
        gt_area = (labels_allboxes_decode_x0y0x1y1[:, :, 2:3] - labels_allboxes_decode_x0y0x1y1[:, :, 0:1]) * \
                  (labels_allboxes_decode_x0y0x1y1[:, :, 3:4] - labels_allboxes_decode_x0y0x1y1[:, :, 1:2])
        bbox_loss_scale = 2.0 - gt_area
        ciou_loss = labels_pos_mask * bbox_loss_scale * (1 - ciou)
        bbox_alpha = 1.5
        ciou_loss = P.reduce_sum(ciou_loss) * bbox_alpha
        losses['B'] = ciou_loss

        # 2.mask_loss,只有正例才计算
        mask_h = P.shape(pred_proto)[1]
        mask_w = P.shape(pred_proto)[2]
        loss_m = 0
        maskiou_t_list = []
        maskiou_net_input_list = []
        label_t_list = []
        for idx in range(batch_size):
            # [[0], [0], [0], [0], [0], [0], [0], [0]]。把8个正样本的最匹配gt的下标(在label_x0y0x1y1cid[idx]中的下标)选出来。
            # 因为只有一个gt,所以下标全是0
            labels_pos_index[idx].stop_gradient = True
            cur_gt = P.gather(labels_best_truth_idx[idx],
                              labels_pos_index[idx])  # (?, 1)
            cur_gt.stop_gradient = True
            cur_x0y0x1y1 = P.gather(labels_allboxes_decode_x0y0x1y1[idx],
                                    labels_pos_index[idx])  # (?, 4)

            proto_masks = pred_proto[idx]  # (138, 138, 32)
            # pred_mask_coef (batch_size, 19248, 32)。 把8个正样本预测的mask系数选出来。
            proto_coef = P.gather(pred_allboxes_mask_coef[idx],
                                  labels_pos_index[idx])  # (?, 32)

            # (?, 138, 138),把8个正样本所匹配的gt的真实mask抽出来。因为匹配到同一个gt,所以是同一个mask重复了8次。
            mask_t = P.gather(label_masks[idx], cur_gt)  # (?, 138, 138)
            # (?, ),把8个正样本所匹配的gt的真实cid抽出来。因为匹配到同一个gt,所以是同一个cid重复了8次。
            label_t = P.gather(labels_pos_cid[idx],
                               labels_pos_index[idx])  # (?, )

            # Size: (138, 138, ?)  =  原型*系数转置
            pred_masks = P.matmul(proto_masks, proto_coef, transpose_y=True)
            pred_masks = P.sigmoid(pred_masks)  # sigmoid激活

            pred_masks = crop(pred_masks, cur_x0y0x1y1)
            pred_masks = P.transpose(pred_masks, perm=[2, 0, 1])

            masks_pos_loss = mask_t * (0 - P.log(pred_masks + 1e-9)
                                       )  # 二值交叉熵,加了极小的常数防止nan
            masks_neg_loss = (1 - mask_t) * (0 - P.log(1 - pred_masks + 1e-9)
                                             )  # 二值交叉熵,加了极小的常数防止nan
            pre_loss = (masks_pos_loss + masks_neg_loss)
            pre_loss = P.reduce_sum(pre_loss, dim=[1, 2])

            # gt面积越小,对应mask损失权重越大
            cur_cxcywh = center_size(cur_x0y0x1y1)
            gt_box_width = cur_cxcywh[:, 2]
            gt_box_height = cur_cxcywh[:, 3]
            pre_loss = pre_loss / (gt_box_width * gt_box_height)
            loss_m += P.reduce_sum(pre_loss)

            if use_maskiou:
                # mask_t中,面积<=5*5的被丢弃
                # discard_mask_area = 5*5
                '''
                gpu版本的paddlepaddle1.6.2里有一个问题。select如果是[None],并且在gather()里使用了select,就会出现
                cudaGetLastError  invalid configuration argument errno: 9   这个错误。cpu版本则可以正常跑。
                为了避免上面的问题,只能让select不是[None],所以这里不做面积过滤,mask_t全部保留。
                '''
                discard_mask_area = -1
                gt_mask_area = P.reduce_sum(mask_t, dim=[1, 2])
                gt_mask_area.stop_gradient = True
                select = P.where(gt_mask_area > discard_mask_area)
                select.stop_gradient = True
                pred_masks = P.gather(pred_masks, select)
                mask_t = P.gather(mask_t, select)
                label_t = P.gather(label_t, select)
                label_t.stop_gradient = True

                maskiou_net_input = P.reshape(
                    pred_masks, (P.shape(pred_masks)[0], 1, mask_h, mask_w))
                pred_masks = P.cast(pred_masks > 0.5, 'float32')  # 四舍五入
                maskiou_t = self._mask_iou(pred_masks, mask_t)  # (8, )
                maskiou_net_input_list.append(
                    maskiou_net_input)  # (8, 1, 138, 138)
                maskiou_t_list.append(maskiou_t)  # (8, )
                label_t_list.append(label_t)  # (8, )
        mask_alpha = 6.125
        losses['M'] = loss_m * mask_alpha / mask_h / mask_w

        # 余下部分
        if use_maskiou:
            maskiou_net_input = P.concat(
                maskiou_net_input_list,
                axis=0)  # (21, 1, 138, 138)  21个正例预测的掩码
            maskiou_t = P.concat(maskiou_t_list,
                                 axis=0)  # (21, )  21个正例预测的掩码和真实掩码的iou
            label_t = P.concat(label_t_list, axis=0)  # (21, )  21个正例预测的cid
            label_t.stop_gradient = True  # 因为是整数所以才?
            maskiou_targets = [maskiou_net_input, maskiou_t, label_t]

        # 3.conf_loss。
        conf_alpha = 1.0
        if use_ce_loss:
            conf_loss = self.ce_conf_loss(pred_allboxes_conf, labels_pos_mask,
                                          labels_neg_mask, class_vectors,
                                          labels_pos_cid2, gt_area)
        elif use_ghm_c_loss:
            conf_loss = self.ghm_c_loss(pred_allboxes_conf, labels_pos_mask,
                                        labels_neg_mask, class_vectors,
                                        labels_pos_cid2)
        elif use_focal_loss:
            conf_loss = self.focal_conf_loss(pred_allboxes_conf,
                                             labels_pos_mask, labels_neg_mask,
                                             class_vectors, labels_pos_cid2)
        elif use_ohem_loss:
            conf_loss = self.ohem_conf_loss(pred_allboxes_conf, batch_size,
                                            labels_neg_mask, labels_pos_mask,
                                            labels_pos_index, class_vectors,
                                            labels_pos_cid)
        losses['C'] = conf_loss * conf_alpha

        # 4.mask_iou_loss,只有正例才计算。
        if use_maskiou:
            # maskiou_net_input  (21, 1, 138, 138)  21个正例预测的掩码
            # maskiou_t          (21, )             21个正例预测的掩码和真实掩码的iou
            # label_t            (21, )             21个正例预测的cid
            maskiou_net_input, maskiou_t, label_t = maskiou_targets
            maskiou_p = maskiou_net(maskiou_net_input, self.num_classes - 1)
            maskiou_p = P.reduce_max(maskiou_p, dim=[2, 3])  # 最大池化  (21, 80)
            temp_mask = P.gather(class_vectors, label_t)  # 掩码  (21, 81)
            temp_mask = temp_mask[:, 1:]  # 掩码  (21, 80)
            maskiou_p = temp_mask * maskiou_p  # 只保留真实类别的那个通道  (21, 80)
            maskiou_p = P.reduce_sum(maskiou_p, dim=1,
                                     keep_dim=True)  # (21, 1)
            loss_i = P.smooth_l1(
                maskiou_p, P.reshape(maskiou_t, (P.shape(maskiou_t)[0], 1)))
            maskiou_alpha = 25.0
            losses['I'] = maskiou_alpha * P.reduce_sum(loss_i)

        # 5.semantic_segmentation_loss,只有正例才计算
        mask_h = P.shape(pred_segm)[2]
        mask_w = P.shape(pred_segm)[3]
        loss_s = 0.0
        for idx in range(batch_size):
            cur_segment = pred_segm[idx]  # (80, 69, 69)
            l = P.sigmoid_cross_entropy_with_logits(cur_segment,
                                                    segment_t[idx])
            loss_s += P.reduce_sum(l)

        semantic_segmentation_alpha = 1.0
        losses['S'] = loss_s / mask_h / mask_w * semantic_segmentation_alpha

        total_num_pos = P.cast(P.reduce_sum(labels_pos_mask), 'float32')
        for k in losses:
            if k not in ('S', ):
                losses[k] /= total_num_pos
            else:
                losses[k] /= batch_size
        total_loss = 0.0
        for k in losses:
            total_loss += losses[k]

        # Loss Key:
        #  - B: Box Localization Loss
        #  - M: Mask Loss
        #  - C: Class Confidence Loss
        #  - I: MaskIou Loss
        #  - S: Semantic Segmentation Loss
        # return losses['M'], losses['C']
        return losses, total_loss
 def norm_img(self, x):
     mx = reduce_max(x)
     mn = reduce_min(x)
     x = 255 * (x - mn) / (mn - mx)  # 原为(mn-mx)  255 *
     return x