def __call__(self, pbox, gbox, iou_weight=1., loc_reweight=None): x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1) x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1) box1 = [x1, y1, x2, y2] box2 = [x1g, y1g, x2g, y2g] iou, overlap, union = self.bbox_overlap(box1, box2, self.eps) xc1 = paddle.minimum(x1, x1g) yc1 = paddle.minimum(y1, y1g) xc2 = paddle.maximum(x2, x2g) yc2 = paddle.maximum(y2, y2g) area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps miou = iou - ((area_c - union) / area_c) if loc_reweight is not None: loc_reweight = paddle.reshape(loc_reweight, shape=(-1, 1)) loc_thresh = 0.9 giou = 1 - (1 - loc_thresh ) * miou - loc_thresh * miou * loc_reweight else: giou = 1 - miou if self.reduction == 'none': loss = giou elif self.reduction == 'sum': loss = paddle.sum(giou * iou_weight) else: loss = paddle.mean(giou * iou_weight) return loss * self.loss_weight
def test_dynamic_api(self): paddle.disable_static() x = paddle.to_tensor(self.input_x) y = paddle.to_tensor(self.input_y) z = paddle.to_tensor(self.input_z) a = paddle.to_tensor(self.input_a) b = paddle.to_tensor(self.input_b) c = paddle.to_tensor(self.input_c) res = paddle.maximum(x, y) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected1)) # test broadcast res = paddle.maximum(x, z) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected2)) res = paddle.maximum(a, c) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected3)) res = paddle.maximum(b, c) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected4))
def bbox_overlap(self, box1, box2, eps=1e-10): """calculate the iou of box1 and box2 Args: box1 (Tensor): box1 with the shape (..., 4) box2 (Tensor): box1 with the shape (..., 4) eps (float): epsilon to avoid divide by zero Return: iou (Tensor): iou of box1 and box2 overlap (Tensor): overlap of box1 and box2 union (Tensor): union of box1 and box2 """ x1, y1, x2, y2 = box1 x1g, y1g, x2g, y2g = box2 xkis1 = paddle.maximum(x1, x1g) ykis1 = paddle.maximum(y1, y1g) xkis2 = paddle.minimum(x2, x2g) ykis2 = paddle.minimum(y2, y2g) w_inter = (xkis2 - xkis1).clip(0) h_inter = (ykis2 - ykis1).clip(0) overlap = w_inter * h_inter area1 = (x2 - x1) * (y2 - y1) area2 = (x2g - x1g) * (y2g - y1g) union = area1 + area2 - overlap + eps iou = overlap / union return iou, overlap, union
def test_static_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.maximum(data_x, data_y) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "y": self.input_y }, fetch_list=[result_max]) self.assertEqual((res == np.maximum(self.input_x, self.input_y)).all(), True) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.maximum(data_x, data_z, axis=1) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "z": self.input_z }, fetch_list=[result_max]) self.assertEqual((res == np.maximum(self.input_x, self.input_z)).all(), True)
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # [N, C*4] bbox = paddle.concat(roi) bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # [N*C, 4] bbox_num_class = bbox.shape[1] // 4 bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores
def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9): """calculate the iou of box1 and box2 Args: box1 (list): [x, y, w, h], all have the shape [b, na, h, w, 1] box2 (list): [x, y, w, h], all have the shape [b, na, h, w, 1] giou (bool): whether use giou or not, default False diou (bool): whether use diou or not, default False ciou (bool): whether use ciou or not, default False eps (float): epsilon to avoid divide by zero Return: iou (Tensor): iou of box1 and box1, with the shape [b, na, h, w, 1] """ px1, py1, px2, py2 = box1 gx1, gy1, gx2, gy2 = box2 x1 = paddle.maximum(px1, gx1) y1 = paddle.maximum(py1, gy1) x2 = paddle.minimum(px2, gx2) y2 = paddle.minimum(py2, gy2) overlap = (x2 - x1) * (y2 - y1) overlap = overlap.clip(0) area1 = (px2 - px1) * (py2 - py1) area1 = area1.clip(0) area2 = (gx2 - gx1) * (gy2 - gy1) area2 = area2.clip(0) union = area1 + area2 - overlap + eps iou = overlap / union if giou or ciou or diou: # convex w, h cw = paddle.maximum(px2, gx2) - paddle.minimum(px1, gx1) ch = paddle.maximum(py2, gy2) - paddle.minimum(py1, gy1) if giou: c_area = cw * ch + eps return iou - (c_area - union) / c_area else: # convex diagonal squared c2 = cw**2 + ch**2 + eps # center distance rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4 if diou: return iou - rho2 / c2 else: w1, h1 = px2 - px1, py2 - py1 + eps w2, h2 = gx2 - gx1, gy2 - gy1 + eps delta = paddle.atan(w1 / h1) - paddle.atan(w2 / h2) v = (4 / math.pi**2) * paddle.pow(delta, 2) alpha = v / (1 + eps - iou + v) alpha.stop_gradient = True return iou - (rho2 / c2 + v * alpha) else: return iou
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Notes: Currently only support bs = 1. Args: bbox_pred (Tensor): The output bboxes with shape [N, 6] after decode and NMS, including labels, scores and bboxes. bbox_num (Tensor): The number of prediction boxes of each batch with shape [1], and is N. im_shape (Tensor): The shape of the input image. scale_factor (Tensor): The scale factor of the input image. Returns: pred_result (Tensor): The final prediction results with shape [N, 6] including labels, scores and bboxes. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): The output of __call__ with shape [N, 6] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 6] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) # TODO: Because paddle.expand transform error when dygraph # to static, use reshape to avoid mistakes. expand_scale = paddle.reshape(expand_scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def __iou_loss(self, pred, targets, positive_mask, weights=None): """ Calculate the loss for location prediction Args: pred (Tensor): bounding boxes prediction targets (Tensor): targets for positive samples positive_mask (Tensor): mask of positive samples weights (Tensor): weights for each positive samples Return: loss (Tensor): location loss """ plw = pred[:, 0] * positive_mask pth = pred[:, 1] * positive_mask prw = pred[:, 2] * positive_mask pbh = pred[:, 3] * positive_mask tlw = targets[:, 0] * positive_mask tth = targets[:, 1] * positive_mask trw = targets[:, 2] * positive_mask tbh = targets[:, 3] * positive_mask tlw.stop_gradient = True trw.stop_gradient = True tth.stop_gradient = True tbh.stop_gradient = True ilw = paddle.minimum(plw, tlw) irw = paddle.minimum(prw, trw) ith = paddle.minimum(pth, tth) ibh = paddle.minimum(pbh, tbh) clw = paddle.maximum(plw, tlw) crw = paddle.maximum(prw, trw) cth = paddle.maximum(pth, tth) cbh = paddle.maximum(pbh, tbh) area_predict = (plw + prw) * (pth + pbh) area_target = (tlw + trw) * (tth + tbh) area_inter = (ilw + irw) * (ith + ibh) ious = (area_inter + 1.0) / ( area_predict + area_target - area_inter + 1.0) ious = ious * positive_mask if self.iou_loss_type.lower() == "linear_iou": loss = 1.0 - ious elif self.iou_loss_type.lower() == "giou": area_uniou = area_predict + area_target - area_inter area_circum = (clw + crw) * (cth + cbh) + 1e-7 giou = ious - (area_circum - area_uniou) / area_circum loss = 1.0 - giou elif self.iou_loss_type.lower() == "iou": loss = 0.0 - paddle.log(ious) else: raise KeyError if weights is not None: loss = loss * weights return loss
def test_broadcast_axis(self): paddle.disable_static() np_x = np.random.rand(5, 4, 3, 2).astype("float64") np_y = np.random.rand(4, 3).astype("float64") x = paddle.to_variable(self.input_x) y = paddle.to_variable(self.input_y) result_1 = paddle.maximum(x, y, axis=1) result_2 = paddle.maximum(x, y, axis=-2) self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True)
def test_static_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.maximum(data_x, data_y) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "y": self.input_y }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected1)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.maximum(data_x, data_z) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "z": self.input_z }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected2)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_a = paddle.static.data("a", shape=[3], dtype="int64") data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.maximum(data_a, data_c) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "a": self.input_a, "c": self.input_c }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected3)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_b = paddle.static.data("b", shape=[3], dtype="int64") data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.maximum(data_b, data_c) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "b": self.input_b, "c": self.input_c }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected4))
def power_to_db(magnitude: Tensor, ref_value: float = 1.0, amin: float = 1e-10, top_db: Optional[float] = 80.0) -> Tensor: """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling ``10 * log10(x / ref)`` in a numerically stable way. Parameters: magnitude(Tensor): the input magnitude tensor of any shape. ref_value(float): the reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. amin(float): the minimum value of input magnitude, below which the input magnitude is clipped(to amin). top_db(float): the maximum db value of resulting spectrum, above which the spectrum is clipped(to top_db). Returns: The spectrogram in log-scale. shape: input: any shape output: same as input Notes: This function is consistent with librosa.power_to_db(). Examples: .. code-block:: python import paddle import paddleaudio.functional as F F.power_to_db(paddle.rand((10, 10))) >> Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, [[-6.22858429, -3.51512218], [-0.38168561, -1.44466150]]) """ if amin <= 0: raise ParameterError("amin must be strictly positive") if ref_value <= 0: raise ParameterError("ref_value must be strictly positive") ones = paddle.ones_like(magnitude) log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, magnitude)) log_spec -= 10.0 * math.log10(max(ref_value, amin)) if top_db is not None: if top_db < 0: raise ParameterError("top_db must be non-negative") log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db)) return log_spec
def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9): """calculate the iou of box1 and box2 Args: box1 (Tensor): box1 with the shape (N, M, 4) box2 (Tensor): box1 with the shape (N, M, 4) giou (bool): whether use giou or not, default False diou (bool): whether use diou or not, default False ciou (bool): whether use ciou or not, default False eps (float): epsilon to avoid divide by zero Return: iou (Tensor): iou of box1 and box1, with the shape (N, M) """ px1y1, px2y2 = box1[:, :, 0:2], box1[:, :, 2:4] gx1y1, gx2y2 = box2[:, :, 0:2], box2[:, :, 2:4] x1y1 = paddle.maximum(px1y1, gx1y1) x2y2 = paddle.minimum(px2y2, gx2y2) overlap = (x2y2 - x1y1).clip(0).prod(-1) area1 = (px2y2 - px1y1).clip(0).prod(-1) area2 = (gx2y2 - gx1y1).clip(0).prod(-1) union = area1 + area2 - overlap + eps iou = overlap / union if giou or ciou or diou: # convex w, h cwh = paddle.maximum(px2y2, gx2y2) - paddle.minimum(px1y1, gx1y1) if ciou or diou: # convex diagonal squared c2 = (cwh**2).sum(2) + eps # center distance rho2 = ((px1y1 + px2y2 - gx1y1 - gx2y2)**2).sum(2) / 4 if diou: return iou - rho2 / c2 elif ciou: wh1 = px2y2 - px1y1 wh2 = gx2y2 - gx1y1 w1, h1 = wh1[:, :, 0], wh1[:, :, 1] + eps w2, h2 = wh2[:, :, 0], wh2[:, :, 1] + eps v = (4 / math.pi**2) * paddle.pow( paddle.atan(w1 / h1) - paddle.atan(w2 / h2), 2) alpha = v / (1 + eps - iou + v) alpha.stop_gradient = True return iou - (rho2 / c2 + v * alpha) else: c_area = cwh.prod(2) + eps return iou - (c_area - union) / c_area else: return iou
def bbox_overlaps(boxes1, boxes2): """ Calculate overlaps between boxes1 and boxes2 Args: boxes1 (Tensor): boxes with shape [M, 4] boxes2 (Tensor): boxes with shape [N, 4] Return: overlaps (Tensor): overlaps between boxes1 and boxes2 with shape [M, N] """ M = boxes1.shape[0] N = boxes2.shape[0] if M * N == 0: return paddle.zeros([M, N], dtype='float32') area1 = bbox_area(boxes1) area2 = bbox_area(boxes2) xy_max = paddle.minimum( paddle.unsqueeze(boxes1, 1)[:, :, 2:], boxes2[:, 2:]) xy_min = paddle.maximum( paddle.unsqueeze(boxes1, 1)[:, :, :2], boxes2[:, :2]) width_height = xy_max - xy_min width_height = width_height.clip(min=0) inter = width_height.prod(axis=2) overlaps = paddle.where( inter > 0, inter / (paddle.unsqueeze(area1, 1) + area2 - inter), paddle.zeros_like(inter)) return overlaps
def get_bboxes_giou(boxes1, boxes2, eps=1e-9): """calculate the ious of boxes1 and boxes2 Args: boxes1 (Tensor): shape [N, 4] boxes2 (Tensor): shape [M, 4] eps (float): epsilon to avoid divide by zero Return: ious (Tensor): ious of boxes1 and boxes2, with the shape [N, M] """ assert (boxes1[:, 2:] >= boxes1[:, :2]).all() assert (boxes2[:, 2:] >= boxes2[:, :2]).all() iou, union = boxes_iou(boxes1, boxes2) lt = paddle.minimum(boxes1.unsqueeze(-2)[:, :, :2], boxes2[:, :2]) rb = paddle.maximum(boxes1.unsqueeze(-2)[:, :, 2:], boxes2[:, 2:]) wh = (rb - lt).astype("float32").clip(min=eps) enclose_area = wh[:, :, 0] * wh[:, :, 1] giou = iou - (enclose_area - union) / enclose_area return giou
def net(self, input, is_infer=False): pyramid_model = MatchPyramidLayer( self.emb_path, self.vocab_size, self.emb_size, self.kernel_num, self.conv_filter, self.conv_act, self.hidden_size, self.out_size, self.pool_size, self.pool_stride, self.pool_padding, self.pool_type, self.hidden_act) prediction = pyramid_model(input) if is_infer: fetch_dict = {'prediction': prediction} return fetch_dict # calculate hinge loss pos = paddle.slice(prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice(prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full(shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full(shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) self.inference_target_var = avg_cost self._cost = avg_cost fetch_dict = {'cost': avg_cost} return fetch_dict
def net(self, inputs, is_infer=False): pyramid_model = MatchPyramidLayer( self.emb_path, self.vocab_size, self.emb_size, self.kernel_num, self.conv_filter, self.conv_act, self.hidden_size, self.out_size, self.pool_size, self.pool_stride, self.pool_padding, self.pool_type, self.hidden_act) prediction = pyramid_model(inputs) if is_infer: self._infer_results["prediction"] = prediction return pos = paddle.slice( prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice( prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full( shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full( shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) self._cost = avg_cost
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): bboxes [N, 10] bbox_num(Tensor): bbox_num im_shape(Tensor): [1 2] scale_factor(Tensor): [1 2] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 8] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([ scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x, scale_y ]) expand_scale = paddle.expand(scale, [bbox_num[i], 8]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 10], label, score, bbox pred_label_score = bboxes[:, 0:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image pred_bbox = pred_bbox.reshape([-1, 8]) scaled_bbox = pred_bbox / scale_factor_list origin_h = origin_shape_list[:, 0] origin_w = origin_shape_list[:, 1] bboxes = scaled_bbox zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros) y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros) x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros) y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros) x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros) y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros) x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros) y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1) pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1) return pred_result
def test_dynamic_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') x = paddle.to_variable(self.input_x) y = paddle.to_variable(self.input_y) z = paddle.maximum(x, y) np_z = z.numpy() z_expected = np.array(np.maximum(self.input_x, self.input_y)) self.assertEqual((np_z == z_expected).all(), True)
def forward(self): input = self.input('Input', 0) im_info = self.input('ImInfo', 0) im_info = paddle.reshape(im_info, shape=[3]) h, w, s = paddle.tensor.split(im_info, axis=0, num_or_sections=3) tensor_one = paddle.full(shape=[1], dtype='float32', fill_value=1.0) tensor_zero = paddle.full(shape=[1], dtype='float32', fill_value=0.0) h = paddle.subtract(h, tensor_one) w = paddle.subtract(w, tensor_one) xmin, ymin, xmax, ymax = paddle.tensor.split(input, axis=-1, num_or_sections=4) xmin = paddle.maximum(paddle.minimum(xmin, w), tensor_zero) ymin = paddle.maximum(paddle.minimum(ymin, h), tensor_zero) xmax = paddle.maximum(paddle.minimum(xmax, w), tensor_zero) ymax = paddle.maximum(paddle.minimum(ymax, h), tensor_zero) cliped_box = paddle.concat([xmin, ymin, xmax, ymax], axis=-1) return {'Output': [cliped_box]}
def bboxes_iou_batch(bboxes_a, bboxes_b, xyxy=True): """计算两组矩形两两之间的iou Args: bboxes_a: (tensor) bounding boxes, Shape: [N, A, 4]. bboxes_b: (tensor) bounding boxes, Shape: [N, B, 4]. Return: (tensor) iou, Shape: [N, A, B]. """ N = bboxes_a.shape[0] A = bboxes_a.shape[1] B = bboxes_b.shape[1] if xyxy: box_a = bboxes_a box_b = bboxes_b else: # cxcywh格式 box_a = paddle.concat([ bboxes_a[:, :, :2] - bboxes_a[:, :, 2:] * 0.5, bboxes_a[:, :, :2] + bboxes_a[:, :, 2:] * 0.5 ], axis=-1) box_b = paddle.concat([ bboxes_b[:, :, :2] - bboxes_b[:, :, 2:] * 0.5, bboxes_b[:, :, :2] + bboxes_b[:, :, 2:] * 0.5 ], axis=-1) box_a_rb = paddle.reshape(box_a[:, :, 2:], (N, A, 1, 2)) box_a_rb = paddle.tile(box_a_rb, [1, 1, B, 1]) box_b_rb = paddle.reshape(box_b[:, :, 2:], (N, 1, B, 2)) box_b_rb = paddle.tile(box_b_rb, [1, A, 1, 1]) max_xy = paddle.minimum(box_a_rb, box_b_rb) box_a_lu = paddle.reshape(box_a[:, :, :2], (N, A, 1, 2)) box_a_lu = paddle.tile(box_a_lu, [1, 1, B, 1]) box_b_lu = paddle.reshape(box_b[:, :, :2], (N, 1, B, 2)) box_b_lu = paddle.tile(box_b_lu, [1, A, 1, 1]) min_xy = paddle.maximum(box_a_lu, box_b_lu) inter = F.relu(max_xy - min_xy) inter = inter[:, :, :, 0] * inter[:, :, :, 1] box_a_w = box_a[:, :, 2] - box_a[:, :, 0] box_a_h = box_a[:, :, 3] - box_a[:, :, 1] area_a = box_a_h * box_a_w area_a = paddle.reshape(area_a, (N, A, 1)) area_a = paddle.tile(area_a, [1, 1, B]) # [N, A, B] box_b_w = box_b[:, :, 2] - box_b[:, :, 0] box_b_h = box_b[:, :, 3] - box_b[:, :, 1] area_b = box_b_h * box_b_w area_b = paddle.reshape(area_b, (N, 1, B)) area_b = paddle.tile(area_b, [1, A, 1]) # [N, A, B] union = area_a + area_b - inter + 1e-9 return inter / union # [N, A, B]
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred = bbox_head_out[0] cls_prob = bbox_head_out[1] roi = rois[0] rois_num = rois[1] origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # bbox_pred.shape: [N, C*4] # C=num_classes in faster/mask rcnn(bbox_head), C=1 in cascade rcnn(cascade_head) bbox = paddle.concat(roi) if bbox.shape[0] == 0: bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32') else: bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # bbox.shape: [N, C, 4] # bbox.shape[1] must be equal to scores.shape[1] bbox_num_class = bbox.shape[1] if bbox_num_class == 1: bbox = paddle.tile(bbox, [1, self.num_classes, 1]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores
def forward(self, x1, x2, target): similarity = paddle.fluid.layers.reduce_sum(x1 * x2, dim=-1) / ( paddle.norm(x1, axis=-1) * paddle.norm(x2, axis=-1) + self.epsilon) one_list = paddle.full_like(target, fill_value=1) out = paddle.fluid.layers.reduce_mean( paddle.where( paddle.equal(target, one_list), 1. - similarity, paddle.maximum(paddle.zeros_like(similarity), similarity - self.margin))) return out
def create_loss(batch_size, margin, cos_pos, cos_neg): loss_part1 = paddle.subtract( paddle.full(shape=[batch_size, 1], fill_value=margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def create_loss(prediction): pos = paddle.slice(prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice(prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full(shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full(shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def __call__(self, pbox, gbox, iou_weight=1.): x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1) x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1) box1 = [x1, y1, x2, y2] box2 = [x1g, y1g, x2g, y2g] iou, overlap, union = self.bbox_overlap(box1, box2, self.eps) xc1 = paddle.minimum(x1, x1g) yc1 = paddle.minimum(y1, y1g) xc2 = paddle.maximum(x2, x2g) yc2 = paddle.maximum(y2, y2g) area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps miou = iou - ((area_c - union) / area_c) giou = 1 - miou if self.reduction == 'none': loss = giou elif self.reduction == 'sum': loss = paddle.sum(giou * iou_weight) else: loss = paddle.mean(giou * iou_weight) return loss * self.loss_weight
def true_func1(): d2 = d2_square.sqrt() def true_fn2(): return x2 - (x2 - x1) * ((g2 + d2 - d1) / (g2 - g1 + 2 * d2)) def false_fn2(): return x1 - (x1 - x2) * ((g1 + d2 - d1) / (g1 - g2 + 2 * d2)) pred = paddle.less_equal(x=x1, y=x2) min_pos = paddle.static.nn.cond(pred, true_fn2, false_fn2) return paddle.minimum(paddle.maximum(min_pos, xmin), xmax)
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois origin_shape = im_shape / scale_factor scale_list = [] origin_shape_list = [] for idx in range(self.batch_size): scale = scale_factor[idx, :][0] rois_num_per_im = rois_num[idx] expand_scale = paddle.expand(scale, [rois_num_per_im, 1]) scale_list.append(expand_scale) expand_im_shape = paddle.expand(origin_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) scale = paddle.concat(scale_list) origin_shape = paddle.concat(origin_shape_list) bbox = roi / scale prior_box_var = [i / self.var_weight for i in self.prior_box_var] bbox = ops.box_coder( prior_box=bbox, prior_box_var=prior_box_var, target_box=bbox_pred, code_type=self.code_type, box_normalized=self.box_normalized, axis=self.axis) # TODO: Updata box_clip origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1) zeros = paddle.zeros(paddle.shape(origin_h), 'float32') x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, cls_prob
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 32)).astype('float32') b_np = np.random.random(size=(32, 32)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') c = paddle.maximum(a, b) fc_1 = fluid.layers.fc(input=c, size=128) prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(100): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def bbox_iou(box1, box2, x1y1x2y2=False): """ Returns the IoU of two bounding boxes """ N, M = len(box1), len(box2) if x1y1x2y2: # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] else: # Transform from center and width to exact coordinates b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 # get the coordinates of the intersection rectangle # inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1) # inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1) # inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2) # inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2) inter_rect_x1 = paddle.maximum(b1_x1.unsqueeze(1), b2_x1) inter_rect_y1 = paddle.maximum(b1_y1.unsqueeze(1), b2_y1) inter_rect_x2 = paddle.minimum(b1_x2.unsqueeze(1), b2_x2) inter_rect_y2 = paddle.minimum(b1_y2.unsqueeze(1), b2_y2) # Intersection area # inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0) inter_area = paddle.clip(inter_rect_x2 - inter_rect_x1, 0) * paddle.clip(inter_rect_y2 - inter_rect_y1, 0) # Union Area b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)) # b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M) # b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M) b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).reshape(-1,1).expand(N,M) b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).reshape(1,-1).expand(N,M) return inter_area / (b1_area + b2_area - inter_area + 1e-16)