def forward(self, x): # scale = self.weight.reshape(1, -1, 1, 1) * (1.0 / (self.running_var + self.eps).sqrt()) mask = self.running_var >= 0 scale = self.weight.reshape(1, -1, 1, 1) * (1.0 / F.sqrt(self.running_var * mask + self.eps)) bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale return x * scale + bias
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale = 0.25): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 19, 19). gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. Returns: cls_labels (Tensor): Labels. (B, 1, 19, 19) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 19, 19) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 19, 19) only consider the foreground, for the background should set loss as 0! """ gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1-bbox_scale) / 2 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B,1,19,19) # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 19, 19) bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis = 1) # (B, 4, 19, 19) # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum(up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum(up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) return cls_labels, bbox_targets, centerness_targets
def test_GammaRNG(): m1 = RNG(seed=111, device="xpu0") m2 = RNG(seed=111, device="xpu1") m3 = RNG(seed=222, device="xpu0") out1 = m1.gamma(2, size=(100, )) out1_ = m1.uniform(size=(100, )) out2 = m2.gamma(2, size=(100, )) out3 = m3.gamma(2, size=(100, )) np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6) assert out1.device == "xpu0" and out2.device == "xpu1" assert not (out1.numpy() == out3.numpy()).all() assert not (out1.numpy() == out1_.numpy()).all() shape = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0") scale = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0") expected_mean = (shape * scale).numpy() expected_std = (F.sqrt(shape) * scale).numpy() out = m1.gamma(shape=shape, scale=scale, size=(20, 30, 40)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (20, 30, 40, 2, 3) else: assert all(out.shape.numpy() == np.array([20, 30, 40, 2, 3])) assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) / expected_std).mean() < 0.1 assert (np.abs(np.std(out.numpy(), axis=(0, 1)) - expected_std)).mean() < 0.1
def test_BetaRNG(): m1 = RNG(seed=111, device="xpu0") m2 = RNG(seed=111, device="xpu1") m3 = RNG(seed=222, device="xpu0") out1 = m1.beta(2, 1, size=(100, )) out1_ = m1.uniform(size=(100, )) out2 = m2.beta(2, 1, size=(100, )) out3 = m3.beta(2, 1, size=(100, )) np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6) assert out1.device == "xpu0" and out2.device == "xpu1" assert not (out1.numpy() == out3.numpy()).all() assert not (out1.numpy() == out1_.numpy()).all() alpha = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0") beta = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0") expected_mean = (alpha / (alpha + beta)).numpy() expected_std = (F.sqrt(alpha * beta / (F.pow(alpha + beta, 2) * (alpha + beta + 1)))).numpy() out = m1.beta(alpha=alpha, beta=beta, size=(20, 30)) out_shp = out.shape if isinstance(out_shp, tuple): assert out_shp == (20, 30, 2, 3) else: assert all(out.shape.numpy() == np.array([20, 30, 2, 3])) assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) / expected_std).mean() < 0.1 assert (np.abs(np.std(out.numpy(), axis=(0, 1)) - expected_std)).mean() < 0.1
def get_plane_anchors(self, anchor_scales: np.ndarray): """get anchors per location on feature map. The anchor number is anchor_scales x anchor_ratios """ base_anchor = Tensor([0, 0, self.base_size - 1, self.base_size - 1]) base_anchor = base_anchor.reshape(1, -1) w, h, x_ctr, y_ctr = self._whctrs(base_anchor) # ratio enumerate size = w * h size_ratios = size / self.anchor_ratios #pdb.set_trace() ws = F.sqrt(size_ratios) hs = ws * self.anchor_ratios # ws = size_ratios.sqrt().round() # hs = (ws * self.anchor_ratios).round() # scale enumerate anchor_scales = anchor_scales.reshape(1, -1).astype(np.float32) ws = F.expand_dims(ws, 1) hs = F.expand_dims(hs, 1) ws = (ws * anchor_scales).reshape(-1, 1) hs = (hs * anchor_scales).reshape(-1, 1) # make anchors anchors = F.concat( [ x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1), ], axis=1, ) return anchors.astype(np.float32)
def _ternary_transform_mge(image): n, c, h, w = image.shape if c == 3: R, G, B = F.split(image, 3, 1) intensities = (0.2989 * R + 0.5870 * G + 0.1140 * B ) # * 255 # convert to gray elif c == 1: intensities = image else: raise ValueError('image channel should be 3 or 1: %s' % c) # intensities = tf.image.rgb_to_grayscale(image) * 255 out_channels = patch_size * patch_size w = np.eye(out_channels).reshape( (patch_size, patch_size, 1, out_channels)) # h,w,1,out_c w_ = np.transpose(w, (3, 2, 0, 1)) # 1,out_c,h,w # weight = torch.from_numpy(w_).float() weight = mge.tensor(w_.astype(np.float32)) # need check cuda? # if image.is_cuda: # weight = weight.cuda() # patches_torch = torch.conv2d(input=out_channels, weight=weight, bias=None, stride=[1, 1], padding=[max_distance, max_distance]) patches_mge = F.nn.conv2d(inp=intensities, weight=weight, bias=None, stride=[1, 1], padding=[max_distance, max_distance]) transf_mge = patches_mge - intensities transf_norm_mge = transf_mge / F.sqrt(0.81 + transf_mge**2) return transf_norm_mge
def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def forward(self, x): B, C, _, _ = x.shape # avg_dims = tuple(range(2, len(x.shape))) # [2 ,3 ] nu2 = F.expand_dims(F.pow(x, 2).reshape(B, C, -1).mean(axis=-1, keepdims=True), axis=-1) # [B, C, 1, 1] x = x / F.sqrt(nu2 + F.abs(self.eps)) return F.maximum(self.gamma * x + self.beta, self.tau)
def layernorm(x): original_shape = x.shape x = x.reshape(original_shape[0], -1) m = F.mean(x, axis=1, keepdims=True) v = F.mean((x - m)**2, axis=1, keepdims=True) x = (x - m) / F.maximum(F.sqrt(v), 1e-6) x = x.reshape(original_shape) return x
def gelu(x): """Implementation of the gelu activation function. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): x * 0.5 * (1.0 + F.tanh((F.sqrt(2 / math.pi) * (x + 0.044715 * (x ** 3))))) Also see https://arxiv.org/abs/1606.08415 """ return x * 0.5 * (1.0 + F.tanh(F.sqrt(2 / math.pi) * (x + 0.044715 * (x ** 3))))
def calc(self, X, Y, mask=None): diff = X - Y error = F.sqrt(diff * diff + self.eps) if mask is not None: error = error * mask if self.reduction == "mean": loss = F.mean(error) else: loss = F.sum(error) return loss
def forward(self, x): output = x.reshape(x.shape[0], self.num_groups, -1) mean = F.mean(output, axis=2, keepdims=True) mean2 = F.mean(output**2, axis=2, keepdims=True) var = mean2 - mean * mean output = (output - mean) / F.sqrt(var + self.eps) output = output.reshape(x.shape) if self.affine: output = self.weight.reshape(1, -1, 1, 1) * output + \ self.bias.reshape(1, -1, 1, 1) return output
def fold_linear_bn(linear_weight, linear_bias, gamma, beta, bn_mean, bn_var, eps): linear_bias = linear_bias.reshape(1, -1) gamma = gamma.reshape(1, -1) beta = beta.reshape(1, -1) bn_mean = bn_mean.reshape(1, -1) bn_var = bn_var.reshape(1, -1) # bn_istd = 1 / bn_std bn_istd = 1.0 / sqrt(bn_var + eps) # type: ignore[attr-defined] # w_fold = gamma / bn_std * W scale_factor = gamma * bn_istd w_fold = linear_weight * scale_factor.reshape(-1, 1) b_fold = beta + gamma * (linear_bias - bn_mean) * bn_istd return w_fold, b_fold
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', labels=None, bbox_targets=None): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) level_assignments = F.floor( canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2) ) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level available_masks = F.concat( [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0) level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0) if labels is not None: labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0) bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0) pool_list, inds_list = [], [] for i in range(len(rpn_fms)): mask = level_assignments == i inds = mask_to_inds(mask) rois_fm = rois.ai[inds] if roi_type == 'roi_pool': pool_fm = F.roi_pooling( rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i]) elif roi_type == 'roi_align': pool_fm = F.roi_align( rpn_fms[i], rois_fm, pool_shape, mode='average', spatial_scale=1.0/stride[i], sample_points=2, aligned=True) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) pool_feature = F.concat(pool_list, axis=0) ordered_available_masks = available_masks.ai[fm_order] available_inds = mask_to_inds(ordered_available_masks) pool_feature = pool_feature.ai[available_inds] rois = rois.ai[fm_order, :].ai[available_inds, :] if labels is not None: labels = labels.ai[fm_order].ai[available_inds] bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :] return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets) else: return pool_feature, rois, None, None
def forward(self, x): N, C, H, W = x.shape assert C == self.num_channels x = x.reshape(N, C, -1) mean = x.mean(axis=2, keepdims=True) var = (x**2).mean(axis=2, keepdims=True) - mean * mean x = (x - mean) / F.sqrt(var + self.eps) x = x.reshape(N, C, H, W) if self.affine: x = self.weight.reshape(1, -1, 1, 1) * x + self.bias.reshape( 1, -1, 1, 1) return x
def get_sample_code(self, gaussian, mean, var, onehot): #z = mge.random.gaussian(mean.shape, mean=0, std=1) #mean = mean.reshape(*mean.shape, 1, 1) #mean = F.add_axis(F.add_axis(mean, 2), 3) #var = F.add_axis(F.add_axis(var, 2), 3) z = gaussian z = z * F.sqrt(var) + mean print('gaussian, mean, var, z', gaussian.shape, mean.shape, var.shape, z.shape) z = F.concat([z, onehot], axis=1) return z
def forward(self, x): N, C, H, W = x.shape assert C == self.num_channels x = x.reshape(x.shape[0], -1) # NOTE mean will keepdims in next two lines. mean = x.mean(axis=1, keepdims=1) var = (x**2).mean(axis=1, keepdims=1) - mean * mean x = (x - mean) / F.sqrt(var + self.eps) x = x.reshape(N, C, H, W) if self.affine: x = self.weight.reshape(1, -1, 1, 1) * x + self.bias.reshape( 1, -1, 1, 1) return x
def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置 gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框 Returns: cls_labels (Tensor): Labels. (B, 1, 37, 37) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 37, 37) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 37, 37) only consider the foreground, for the background should set loss as 0! """ B, _ = gt_bboxes.shape gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2 #求出bbox的边长 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B, 1, 37, 37) cls_labels.requires_grad = False # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 37, 37) score map每个点和左上角点的差 bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis=1) # (B, 4, 37, 37) bbox_targets.requires_grad = False # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum( up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum( up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) centerness_targets = F.add_axis(centerness_targets, axis=1) # (B,1,37,37) centerness_targets.requires_grad = False return cls_labels, bbox_targets, centerness_targets
def fold_conv_bn( conv_weight, conv_bias, conv_groups, gamma, beta, bn_mean, bn_var, eps ): conv_bias = conv_bias.reshape(1, -1, 1, 1) gamma = gamma.reshape(1, -1, 1, 1) beta = beta.reshape(1, -1, 1, 1) bn_mean = bn_mean.reshape(1, -1, 1, 1) bn_var = bn_var.reshape(1, -1, 1, 1) # bn_istd = 1 / bn_std bn_istd = 1.0 / sqrt(bn_var + eps) # type: ignore[attr-defined] # w_fold = gamma / bn_std * W scale_factor = gamma * bn_istd if conv_groups == 1: w_fold = conv_weight * scale_factor.reshape(-1, 1, 1, 1) else: w_fold = conv_weight * scale_factor.reshape(conv_groups, -1, 1, 1, 1) # b_fold = gamma * (b - bn_mean) / bn_std + beta b_fold = beta + gamma * (conv_bias - bn_mean) * bn_istd return w_fold, b_fold
def forward(self, X, Y): diff = X - Y error = F.sqrt(diff * diff + self.eps) loss = F.mean(error) return loss
def euclidean(t): return F.sqrt(F.sum(t**2, axis=(1, ), keepdims=True))
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def forward(self, x): scale = self.weight.reshape( 1, -1, 1, 1) * (1.0 / F.sqrt(self.running_var + self.eps)) bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale return x * scale.detach() + bias.detach()
def isru(input, alpha): return input / (F.sqrt(1 + alpha * F.pow(input, 2)))
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] offsets = self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)) object_sizes_of_interest = F.concat([ F.broadcast_to( F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0), (anchors_i.shape[0], 2)) for anchors_i, size in zip( anchors_list, self.cfg.object_sizes_of_interest) ], axis=0) max_offsets = F.max(offsets, axis=2) is_cared_in_the_level = ( (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0], axis=0)) & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1], axis=0))) if self.cfg.center_sampling_radius > 0: gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 is_in_boxes = [] for stride, anchors_i in zip(self.cfg.stride, anchors_list): radius = stride * self.cfg.center_sampling_radius center_boxes = F.concat([ F.maximum(gt_centers - radius, gt_boxes[:, :2]), F.minimum(gt_centers + radius, gt_boxes[:, 2:4]), ], axis=1) center_offsets = self.point_coder.encode( anchors_i, F.expand_dims(center_boxes, axis=1)) is_in_boxes.append(F.min(center_offsets, axis=2) > 0) is_in_boxes = F.concat(is_in_boxes, axis=1) else: is_in_boxes = F.min(offsets, axis=2) > 0 gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) # FIXME: use repeat instead of broadcast_to areas = F.broadcast_to(F.expand_dims(gt_area, axis=1), offsets.shape[:2]) areas[~is_cared_in_the_level] = float("inf") areas[~is_in_boxes] = float("inf") match_indices = F.argmin(areas, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_min_area == float("inf")] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.maximum( F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) * F.maximum( F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def _anchor_double_target(gt_boxes, im_info, all_anchors): gt_boxes, im_info = gt_boxes.detach(), im_info.detach() all_anchors = all_anchors.detach() gt_boxes = gt_boxes[:im_info[5].astype(np.int32), :] dummy = -F.ones([1, gt_boxes.shape[1]]).to(gt_boxes.device) gt_boxes = F.concat([gt_boxes, dummy], axis=0) valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32) anchor_centers = _compute_center(all_anchors) gtboxes_centers = _compute_center(gt_boxes) # gtboxes_centers = gtboxes_centers * valid_mask.unsqueeze(1) gtboxes_centers = gtboxes_centers * F.expand_dims(valid_mask, axis=1) N, K = all_anchors.shape[0], gt_boxes.shape[0] an_centers = F.expand_dims(anchor_centers, axis=1) gt_centers = F.expand_dims(gtboxes_centers, axis=0) # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1) # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1) distance = F.abs(an_centers - gt_centers) distance = F.sqrt(F.pow(distance, 2).sum(axis=2)) start = 0 end = 5 overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4]) overlaps *= F.expand_dims(valid_mask, axis=0) default_num = 16 ious_list = [] for l in range(start, end): _, index = F.cond_take(all_anchors[:, 4] == l, all_anchors[:, 4]) level_dist = distance[index, :].transpose(1, 0) ious = overlaps[index, :].transpose(1, 0) sorted_index = F.argsort(level_dist, descending=False) n = min(sorted_index.shape[1], default_num) ious = F.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0) ious_list.append(ious) ious = F.concat(ious_list, axis=0) mean_var = F.mean(ious, axis=0) std_var = F.std(ious, 0) iou_thresh_per_gt = mean_var + std_var iou_thresh_per_gt = F.maximum(iou_thresh_per_gt, 0.2) # limits the anchor centers in the gtboxes N, K = all_anchors.shape[0], gt_boxes.shape[0] anchor_points = an_centers pos_area = _compute_pos_area(gt_boxes, 0.3) # pos_area = pos_area.unsqueeze(0).repeat(N, 1, 1) pos_area = F.broadcast_to(F.expand_dims(pos_area, axis=0), (N, K, pos_area.shape[-1])) l = anchor_points[:, :, 0] - pos_area[:, :, 0] r = pos_area[:, :, 2] - anchor_points[:, :, 0] t = anchor_points[:, :, 1] - pos_area[:, :, 1] b = pos_area[:, :, 3] - anchor_points[:, :, 1] is_in_gt = F.stack([l, r, t, b], axis=2) is_in_gt = is_in_gt.min(axis=2) > 0.1 valid_mask = (overlaps >= F.expand_dims( iou_thresh_per_gt, axis=0)) * is_in_gt.astype(np.float32) ious = overlaps * valid_mask sorted_index = F.argsort(ious, 1) sorted_overlaps = F.gather(ious, 1, sorted_index) max_overlaps = sorted_overlaps[:, :2].flatten() argmax_overlaps = sorted_index[:, :2].flatten() n, c = all_anchors.shape device = all_anchors.device labels = -F.ones(2 * n).to(device) positive_mask = (max_overlaps >= 0.2).to(device).astype(np.float32) negative_mask = (max_overlaps < 0.2).to(device).astype(np.float32) labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask) bbox_targets = gt_boxes[argmax_overlaps, :4] all_anchors = F.broadcast_to(F.expand_dims(all_anchors, axis=1), (n, 2, c)).reshape(-1, c) bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets) labels_cat = gt_boxes[argmax_overlaps, 4] labels_cat = labels_cat * (1 - F.equal(labels, -1).astype( np.float32)) - F.equal(labels, -1).astype(np.float32) return labels, bbox_targets, labels_cat
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets, box_ctrness = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] box_ctrness_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1) for _ in box_ctrness ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_box_ctrness = F.concat(box_ctrness_list, axis=1) if self.training: gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth( anchors_list, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) all_level_box_ctrness = all_level_box_ctrness.flatten() gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) gt_ctrness = gt_ctrness.flatten() valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() sum_ctr = gt_ctrness[fg_mask].sum() # add detach() to avoid syncing across ranks in backward num_fg = layers.all_reduce_mean(num_fg).detach() sum_ctr = layers.all_reduce_mean(sum_ctr).detach() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = (layers.iou_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], box_mode="ltrb", loss_type=self.cfg.iou_loss_type, ) * gt_ctrness[fg_mask]).sum() / F.maximum( sum_ctr, 1e-5) * self.cfg.loss_bbox_weight loss_ctr = layers.binary_cross_entropy( all_level_box_ctrness[fg_mask], gt_ctrness[fg_mask], ).sum() / F.maximum(num_fg, 1) total = loss_cls + loss_bbox + loss_ctr loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, "loss_ctr": loss_ctr, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 all_level_anchors = F.concat(anchors_list, axis=0) pred_boxes = self.point_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sqrt( F.sigmoid(all_level_box_logits) * F.sigmoid(all_level_box_ctrness))[0] return pred_score, clipped_boxes
def _anchor_target(gt_boxes, im_info, all_anchors): gt_boxes, im_info = gt_boxes.detach(), im_info.detach() all_anchors = all_anchors.detach() gt_boxes = gt_boxes[:im_info[5], :] valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32) anchor_centers = _compute_center(all_anchors) gtboxes_centers = _compute_center(gt_boxes) * F.expand_dims(valid_mask, axis=0) N, K = all_anchors.shape[0], gt_boxes.shape[0] # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1) an_centers = F.expand_dims(anchor_centers, axis=1) gt_centers = F.expand_dims(gtboxes_centers, axis=0) # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1) distance = F.abs(an_centers - gt_centers) distance = F.sqrt(F.pow(distance, 2).sum(axis=2)) start = 0 end = 5 overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4]) overlaps = overlaps * valid_mask.unsqueeze(0) default_num = 9 ious_list = [] for l in range(start, end): index = torch.nonzero(all_anchors[:, 4].eq(l), as_tuple=False)[:, 0] level_dist = level_dist[index, :].transpose(1, 0) ious = distance[index, :].transpose(1, 0) sorted_index = torch.argsort(ious, 1, descending=False) n = min(default_num, sorted_index.shape[1]) ious = torch.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0) ious_list.append(ious) ious = F.concat(ious_list, axis=0) mean_var = ious.mean(0) std_var = ious.std(0) iou_thresh_per_gt = mean_var + std_var iou_thresh_per_gt = torch.clamp(iou_thresh_per_gt, 0.35) n = iou_thresh_per_gt.shape[0] # limits the anchor centers in the gtboxes N, K = all_anchors.shape[0], gt_boxes.shape[0] anchor_points = an_centers proxies = gt_boxes.unsqueeze(0).repeat(N, 1, 1) l = anchor_points[:, :, 0] - proxies[:, :, 0] r = proxies[:, :, 2] - anchor_points[:, :, 0] t = anchor_points[:, :, 1] - proxies[:, :, 1] b = proxies[:, :, 3] - anchor_points[:, :, 1] is_in_gt = F.stack([l, r, t, b], axis=2) is_in_gt = is_in_gt.min(axis=2) > 0.1 valid_mask = (overlaps >= iou_thresh_per_gt.unsqueeze(0)) * is_in_gt ious = overlaps * valid_mask argmax_overlaps = torch.argmax(ious, axis=1) max_overlaps = torch.gather(ious, 1, argmax_overlaps.unsqueeze(1)) n = all_anchors.shape[0] labels = -F.ones(n) positive_mask = max_overlaps > 0 negative_mask = max_overlaps < config.rpn_negative_overlap labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask) bbox_targets = gt_boxes[argmax_overlaps, :4] bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets) labels_cat = gt_boxes[argmax_overlaps, 4] labels_cat = labels_cat * (1 - labels.eq(0).astype(np.float32)) labels_cat = labels_cat * (1 - labels.eq(-1).astype( np.float32)) - labels.eq(-1).astype(np.float32) return labels, bbox_targets, labels_cat