def test_add_output(): a = Tensor([1.0, 2.0]) b = Tensor([3.0, 4.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.var_filter.name("a").as_unique() var_b = net.var_filter.name("b").as_unique() y = F.add(var_a, var_b) y = F.sigmoid(y) y.name = "o1" net.add_output(y) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy(), b.numpy()) np.testing.assert_equal(out["o"], ((a + b) * 2).numpy()) np.testing.assert_equal(out["o1"], (F.sigmoid((a + b))).numpy())
def swish_function(input, swish, eswish, beta, param): if swish is False and eswish is False: return input * F.sigmoid(input) if swish: return input * F.sigmoid(param * input) if eswish: return beta * input * F.sigmoid(input)
def forward(self, xin, labels=None, imgs=None): outputs = [] assert not self.training for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( zip(self.cls_convs, self.reg_convs, self.strides, xin)): x = self.stems[k](x) cls_x = x reg_x = x cls_feat = cls_conv(cls_x) cls_output = self.cls_preds[k](cls_feat) reg_feat = reg_conv(reg_x) reg_output = self.reg_preds[k](reg_feat) obj_output = self.obj_preds[k](reg_feat) output = F.concat( [reg_output, F.sigmoid(obj_output), F.sigmoid(cls_output)], 1) outputs.append(output) self.hw = [x.shape[-2:] for x in outputs] # [batch, n_anchors_all, 85] outputs = F.concat([F.flatten(x, start_axis=2) for x in outputs], axis=2) outputs = F.transpose(outputs, (0, 2, 1)) if self.decode_in_inference: return self.decode_outputs(outputs) else: return outputs
def compute_probs(self, output_real, output_fake): r""" Computes probabilities from real/fake images logits. Args: output_real (Tensor): A batch of output logits of shape (N, 1) from real images. output_fake (Tensor): A batch of output logits of shape (N, 1) from fake images. Returns: tuple: Average probabilities of real/fake image considered as real for the batch. """ D_x = F.sigmoid(output_real).mean() D_Gz = F.sigmoid(output_fake).mean() return D_x, D_Gz
def sigmoid_focal_loss( logits: Tensor, targets: Tensor, alpha: float = -1, gamma: float = 0, ) -> Tensor: r"""Focal Loss for Dense Object Detection: <https://arxiv.org/pdf/1708.02002.pdf> .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: logits (Tensor): the predicted logits targets (Tensor): the assigned targets with the same shape as logits alpha (float): parameter to mitigate class imbalance. Default: -1 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 Returns: the calculated focal loss. """ scores = F.sigmoid(logits) loss = binary_cross_entropy(logits, targets) if gamma != 0: loss *= (targets * (1 - scores) + (1 - targets) * scores) ** gamma if alpha >= 0: loss *= targets * alpha + (1 - targets) * (1 - alpha) return loss
def forward(self, inputs): image = self.preprocess_image(inputs["image"]) features = self.backbone(image) features = [features[f] for f in self.in_features] box_cls, box_delta = self.head(features) box_cls_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, self.cfg.num_classes) for _ in box_cls ] box_delta_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_delta ] anchors_list = [ self.anchor_gen(features[i], self.stride_list[i]) for i in range(5) ] all_level_box_cls = F.sigmoid(F.concat(box_cls_list, axis=1)) all_level_box_delta = F.concat(box_delta_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: box_gt_cls, box_gt_delta = self.get_ground_truth( all_level_anchors, inputs["gt_boxes"], inputs["im_info"][:, 4].astype(np.int32), ) rpn_cls_loss = layers.get_focal_loss( all_level_box_cls, box_gt_cls, alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ) rpn_bbox_loss = ( layers.get_smooth_l1_loss(all_level_box_delta, box_gt_delta, box_gt_cls) * self.cfg.reg_loss_weight ) total = rpn_cls_loss + rpn_bbox_loss return total, rpn_cls_loss, rpn_bbox_loss else: # currently not support multi-batch testing assert self.batch_size == 1 transformed_box = self.box_coder.decode( all_level_anchors, all_level_box_delta[0], ) transformed_box = transformed_box.reshape(-1, 4) scale_w = inputs["im_info"][0, 1] / inputs["im_info"][0, 3] scale_h = inputs["im_info"][0, 0] / inputs["im_info"][0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0 ) clipped_box = layers.get_clipped_box( transformed_box, inputs["im_info"][0, 2:4] ).reshape(-1, 4) return all_level_box_cls[0], clipped_box
def train_generator_batch(optical, sar, label, *, opt, netG): netG.train() cls_score, offsets, ctr_score = netG(sar, optical) loss, loss_cls, loss_reg, loss_ctr = netG.loss(cls_score, offsets, ctr_score, label) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass # performance in the training data B, _, _, _ = cls_score.shape cls_score = F.sigmoid(cls_score) # * ctr_score cls_score = cls_score.reshape(B, -1) # find the max max_id = F.argmax(cls_score, axis=1) # (B, ) pred_box = get_box(netG.fm_ctr, offsets) # (B,4,H,W) pred_box = pred_box.reshape(B, 4, -1) output = [] for i in range(B): output.append(F.add_axis(pred_box[i, :, max_id[i]], axis=0)) # (1, 4) output = F.concat(output, axis=0) # (B, 4) return [ loss_cls, loss_reg, loss_ctr, F.norm(output[:, 0:2] - label[:, 0:2], p=2, axis=1).mean() ]
def forward(self, x): identity = x n, c, h, w = x.shape x_h = F.mean(x, axis=3, keepdims=True) # [B,C,H,1] x_w = F.mean(x, axis=2, keepdims=True).transpose(0, 1, 3, 2) # [B,C,W,1] y = F.concat([x_h, x_w], axis=2) # [B,C,H+W,1] y = self.conv1(y) # y = self.bn1(y) y = self.act(y) # [B, mip, H+W, 1] x_h = y[:, :, :h, :] # [B,mip,H,1] x_w = y[:, :, h:, :] x_w = x_w.transpose(0, 1, 3, 2) # [B,mip,1,W] a_h = F.sigmoid(self.conv_h(x_h)) a_w = F.sigmoid(self.conv_w(x_w)) out = identity * a_w * a_h return out
def forward(self, x): bs = x.shape[0] if self.radix > 1: x = x.reshape((bs, self.cardinality, self.radix, -1)) x = F.softmax(x, axis=1) x = x.reshape(bs, -1) else: x = F.sigmoid(x) return x
def decoding(self, x): for deconv in self.deconvs: x = deconv(x) print('decoding', x.shape) #x = F.sigmoid(x[:, :, 1:29, 1:29]) x = F.sigmoid(self.predict_layer(x)) x = x[:, :, 1:29, 1:29] return x
def get_focal_loss( logits: Tensor, labels: Tensor, ignore_label: int = -1, background: int = 0, alpha: float = 0.5, gamma: float = 0, norm_type: str = "fg", ) -> Tensor: r"""Focal Loss for Dense Object Detection: <https://arxiv.org/pdf/1708.02002.pdf> .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: logits (Tensor): the predicted logits with the shape of :math:`(B, A, C)` labels (Tensor): the assigned labels of boxes with shape of :math:`(B, A)` ignore_label (int): the value of ignore class. Default: -1 background (int): the value of background class. Default: 0 alpha (float): parameter to mitigate class imbalance. Default: 0.5 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 norm_type (str): current support "fg", "none": "fg": loss will be normalized by number of fore-ground samples "none": not norm Returns: the calculated focal loss. """ class_range = F.arange(1, logits.shape[2] + 1) labels = F.add_axis(labels, axis=2) scores = F.sigmoid(logits) pos_part = (1 - scores)**gamma * layers.logsigmoid(logits) neg_part = scores**gamma * layers.logsigmoid(-logits) pos_loss = -(labels == class_range) * pos_part * alpha neg_loss = (-(labels != class_range) * (labels != ignore_label) * neg_part * (1 - alpha)) loss = (pos_loss + neg_loss).sum() if norm_type == "fg": fg_mask = (labels != background) * (labels != ignore_label) return loss / F.maximum(fg_mask.sum(), 1) elif norm_type == "none": return loss else: raise NotImplementedError
def forward(self, in_tensor): att = 1 + F.sigmoid( self.channel_att(in_tensor) * self.spatial_att(in_tensor)) return att * in_tensor # if __name__ == '__main__': # a = mge.tensor(np.random.random((24,48,160, 160)).astype('float32')) # B = BAM(gate_channel=48) # x = B.forward(a) # print(x.shape)
def test_add_remove_output(): a = Tensor([1.0, 2.0]) b = Tensor([3.0, 4.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2, (a - b) fwd(a, b) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a", "b"], output_names=["o1", "o2"], optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.var_filter.name("a").as_unique() var_b = net.var_filter.name("b").as_unique() y1 = (var_a + var_b) * 3 y2 = F.sigmoid(var_a + var_b) net.remove_output(*net.output_vars) y1.name = "new_o1" y2.name = "new_o2" net.add_output(y1, y2) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy(), b.numpy()) np.testing.assert_equal(out["new_o1"], ((a + b) * 3).numpy()) np.testing.assert_almost_equal(out["new_o2"], (F.sigmoid((a + b))).numpy())
def ns_loss_gen(output_fake): r""" Non-saturating loss for generator. Args: output_fake (Tensor): Discriminator output logits for fake images. Returns: Tensor: A scalar tensor loss output. """ output_fake = F.sigmoid(output_fake) return -F.log(output_fake + 1e-8).mean()
def run(use_trace, symbolic): a = tensor(np.array([1926.0817], dtype=np.float32)) net = Sigmoid() func_run = run_saved_context if use_trace: func_run = trace(run_saved_context, symbolic=symbolic) s = func_run(a, net=net) s2 = F.sigmoid(a) assertTensorClose(s.numpy(), s2.numpy()) assertTensorClose( F.grad(s, a, use_virtual_grad=False).numpy(), F.grad(s2, a, use_virtual_grad=False).numpy(), )
def forward(self, input): """ Forward pass of the function. """ if self.hard is False: return (input >= 0).float() * swish_function( input, False, False, None, None ) + (input < 0).float() * (F.exp(input) - 1) * F.sigmoid(input) else: return (input >= 0).float() * input * F.max( self.a, F.min(self.b, (input + 1.0) / 2.0) ) + (input < 0).float() * ( F.exp(input - 1) * F.max(self.a, F.min(self.b, (input + 1.0) / 2.0)) )
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: loss_dict = self.get_losses(all_level_anchors, all_level_box_logits, all_level_box_offsets, gt_boxes, im_info) self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 transformed_box = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) transformed_box = transformed_box.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_box = layers.get_clipped_boxes(transformed_box, im_info[0, 2:4]).reshape( -1, 4) all_level_box_scores = F.sigmoid(all_level_box_logits) return all_level_box_scores[0], clipped_box
def forward(self, features): cls_prob_list, rpn_num_prob_list, pred_bbox_list, rpn_iou_prob_list = [], [], [], [] for feature in features: rpn_cls_conv = self.cls_subnet(feature) cls_score = self.cls_score(rpn_cls_conv) rpn_num_prob = self.num_pred(rpn_cls_conv) cls_prob = F.sigmoid(cls_score) rpn_box_conv = self.bbox_subnet(feature) offsets = self.bbox_pred(rpn_box_conv) rpn_iou_prob = self.iou_pred(rpn_box_conv) cls_prob_list.append(cls_prob) pred_bbox_list.append(offsets) rpn_iou_prob_list.append(rpn_iou_prob) rpn_num_prob_list.append(rpn_num_prob) assert cls_prob_list[0].ndim == 4 pred_cls_list = [ _.transpose(0, 2, 3, 1).reshape(_.shape[0], -1, (config.num_classes - 1)) for _ in cls_prob_list ] pred_reg_list = [ _.transpose(0, 2, 3, 1).reshape(_.shape[0], -1, 4) for _ in pred_bbox_list ] rpn_iou_list = [ _.transpose(0, 2, 3, 1).reshape(_.shape[0], -1, (config.num_classes - 1)) for _ in rpn_iou_prob_list ] rpn_num_prob_list = [ _.transpose(0, 2, 3, 1).reshape(_.shape[0], -1, (config.num_classes - 1)) for _ in rpn_num_prob_list ] return pred_cls_list, rpn_num_prob_list, pred_reg_list, rpn_iou_list
def test_generator_batch(optical, sar, *, netG): netG.eval() tmp = netG.z_size netG.z_size = netG.test_z_size cls_score, offsets, ctr_score = netG( sar, optical) # [B,1,19,19] [B,2,19,19] [B,1,19,19] B, _, _, _ = cls_score.shape # 加权 cls_score = F.sigmoid(cls_score) # * ctr_score cls_score = cls_score.reshape(B, -1) # find the max max_id = F.argmax(cls_score, axis=1) # (B, ) pred_box = get_box(netG.test_fm_ctr, offsets) # (B,4,H,W) pred_box = pred_box.reshape(B, 4, -1) output = [] for i in range(B): output.append(F.add_axis(pred_box[i, :, max_id[i]], axis=0)) # (1, 4) netG.z_size = tmp return F.concat(output, axis=0) # [B,4]
def test_save_context(): class Sigmoid(Function): def forward(self, x): y = 1 / (1 + F.exp(-x)) self.save_for_backward(y) return y def backward(self, grad_y): (y, ) = self.saved_tensors return grad_y * y * (1 - y) a = tensor(np.array([1926.0817], dtype=np.float32)) s = Sigmoid()(a) s2 = F.sigmoid(a) assertTensorClose(s.numpy(), s2.numpy()) assertTensorClose( F.grad(s, a, use_virtual_grad=False).numpy(), F.grad(s2, a, use_virtual_grad=False).numpy(), )
def head(self, c_out, r_out): c_out = self.cls_convs(c_out) r_out = self.reg_convs(r_out) # classification score cls_score = self.conv_cls(c_out) # [B,1,37,37] # center-ness score ctr_score = self.conv_centerness(r_out) ctr_score = F.sigmoid(ctr_score) # regression offsets = self.conv_reg(r_out) offsets = F.relu(offsets * self.total_stride + (self.z_size - 1) / 2) # [B,2,37,37] # bbox decoding # bbox = get_box(self.fm_ctr, offsets) # (B, 2, 37, 37) return [cls_score, offsets, ctr_score]
def forward(self, x): r""" Feedforwards a batch of noise vectors into a batch of fake images. Args: x (Tensor): A batch of noise vectors of shape (N, nz). Returns: Tensor: A batch of fake images of shape (N, C, H, W). """ h = self.l1(x) h = h.reshape(x.shape[0], -1, self.bottom_width, self.bottom_width) h = self.block2(h) h = self.block3(h) h = self.block4(h) h = self.b5(h) h = self.activation(h) h = F.sigmoid(self.c5(h)) # sigmoid instead of tanh return h
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,48,H,W """ batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] batchwise_ans = [] for idx in range(batch): kernel = kernels[idx] # [k*k, H, W] kernel = F.dimshuffle(kernel, (1, 2, 0)) # [H, W , k*k] kernel = F.reshape(kernel, (H, W, 1, self.K, self.K, 1)) kernel = F.broadcast_to(kernel, (C, H, W, 1, self.K, self.K, 1)) batchwise_ans.append( F.local_conv2d( F.add_axis(pre_h_SD[idx], 0), kernel, [1, 1], [1, 1], [1, 1])) # [1, C, H, W] some bug with padding similarity_matrix = F.concat(batchwise_ans, axis=0) # [B,C,H,W] del batchwise_ans similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD, similarity_matrix)
def forward(self, flow_init, feature_1, feature_2, output_level_flow=None): n, c, h, w = flow_init.shape n_f, c_f, h_f, w_f = feature_1.shape if h != h_f or w != w_f: flow_init = F.vision.interpolate(flow_init, scale_factor=2., mode='bilinear', align_corners=True) * 2 feature_2_warp = flow_warp(feature_2, flow_init) input_feature = F.concat((feature_1, feature_2_warp), axis=1) _, x_out = self.dense_estimator_mask(input_feature) inter_flow = x_out[:, :2, :, :] inter_mask = x_out[:, 2, :, :] inter_mask = F.expand_dims(inter_mask, 1) inter_mask = F.sigmoid(inter_mask) if output_level_flow is not None: inter_flow = upsample2d_flow_as(inter_flow, output_level_flow, mode="bilinear", if_rate=True) inter_mask = upsample2d_flow_as(inter_mask, output_level_flow, mode="bilinear") flow_init = output_level_flow flow_up = flow_warp(flow_init, inter_flow) * (1 - inter_mask) + flow_init * inter_mask return flow_up
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,64,H,W """ pad = self.K // 2 batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] # 对 pre_h_SD进行padding similarity_matrix = F.zeros_like(pre_h_SD) pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad) for i in range(self.K): for j in range(self.K): # 做点乘 kernel = kernels[:, i * self.K + j, :, :] # [B, H, W] kernel = F.add_axis(kernel, axis=1) # [B, 1 ,H, W] kernel = F.broadcast_to(kernel, [batch, C, H, W]) corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)] similarity_matrix = similarity_matrix + corr # [B, C, H, W] similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)], similarity_matrix)
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets, box_ctrness = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] box_ctrness_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1) for _ in box_ctrness ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_box_ctrness = F.concat(box_ctrness_list, axis=1) if self.training: gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth( anchors_list, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) all_level_box_ctrness = all_level_box_ctrness.flatten() gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) gt_ctrness = gt_ctrness.flatten() valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() sum_ctr = gt_ctrness[fg_mask].sum() # add detach() to avoid syncing across ranks in backward num_fg = layers.all_reduce_mean(num_fg).detach() sum_ctr = layers.all_reduce_mean(sum_ctr).detach() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = (layers.iou_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], box_mode="ltrb", loss_type=self.cfg.iou_loss_type, ) * gt_ctrness[fg_mask]).sum() / F.maximum( sum_ctr, 1e-5) * self.cfg.loss_bbox_weight loss_ctr = layers.binary_cross_entropy( all_level_box_ctrness[fg_mask], gt_ctrness[fg_mask], ).sum() / F.maximum(num_fg, 1) total = loss_cls + loss_bbox + loss_ctr loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, "loss_ctr": loss_ctr, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 all_level_anchors = F.concat(anchors_list, axis=0) pred_boxes = self.point_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sqrt( F.sigmoid(all_level_box_logits) * F.sigmoid(all_level_box_ctrness))[0] return pred_score, clipped_boxes
def forward(self, inputs): image = self.preprocess_image(inputs["image"]) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_offsets ] anchors_list = [ self.anchor_gen(features[i], self.stride_list[i]) for i in range(len(features)) ] all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: box_gt_scores, box_gt_offsets = self.get_ground_truth( all_level_anchors, inputs["gt_boxes"], inputs["im_info"][:, 4].astype(np.int32), ) norm_type = "none" if self.cfg.loss_normalizer_momentum > 0.0 else "fg" rpn_cls_loss = layers.get_focal_loss( all_level_box_logits, box_gt_scores, alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, norm_type=norm_type, ) rpn_bbox_loss = (layers.get_smooth_l1_loss( all_level_box_offsets, box_gt_offsets, box_gt_scores, self.cfg.smooth_l1_beta, norm_type=norm_type, ) * self.cfg.reg_loss_weight) if norm_type == "none": F.add_update( self.loss_normalizer, (box_gt_scores > 0).sum(), alpha=self.cfg.loss_normalizer_momentum, beta=1 - self.cfg.loss_normalizer_momentum, ) rpn_cls_loss = rpn_cls_loss / F.maximum( self.loss_normalizer, 1) rpn_bbox_loss = rpn_bbox_loss / F.maximum( self.loss_normalizer, 1) total = rpn_cls_loss + rpn_bbox_loss loss_dict = { "total_loss": total, "loss_cls": rpn_cls_loss, "loss_loc": rpn_bbox_loss, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert self.batch_size == 1 transformed_box = self.box_coder.decode( all_level_anchors, all_level_box_offsets[0], ) transformed_box = transformed_box.reshape(-1, 4) scale_w = inputs["im_info"][0, 1] / inputs["im_info"][0, 3] scale_h = inputs["im_info"][0, 0] / inputs["im_info"][0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_box = layers.get_clipped_box( transformed_box, inputs["im_info"][0, 2:4]).reshape(-1, 4) all_level_box_scores = F.sigmoid(all_level_box_logits) return all_level_box_scores[0], clipped_box
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: gt_labels, gt_offsets = self.get_ground_truth( all_level_anchors, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = layers.smooth_l1_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], beta=self.cfg.smooth_l1_beta, ).sum() / F.maximum(num_fg, 1) * self.cfg.loss_bbox_weight total = loss_cls + loss_bbox loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 pred_boxes = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sigmoid(all_level_box_logits)[0] return pred_score, clipped_boxes
def forward(self, a): # add if self.mode == "add": x = a + mge.tensor(np.float32(10)) y = a + mge.tensor(self.data1) z = x + y # sub elif self.mode == "sub": x = a - mge.tensor(np.float32(10)) y = a - mge.tensor(self.data1) z = x - y # mul elif self.mode == "mul": x = a * mge.tensor(np.float32(10)) y = mge.tensor(self.data1) * a z = x * y # div elif self.mode == "max": x = a + mge.tensor(self.data) y = a + mge.tensor(self.data2) z = F.maximum(x, y) elif self.mode == "min": x = a + mge.tensor(self.data) y = a + mge.tensor(self.data2) z = F.minimum(x, y) elif self.mode == "pow": z = a**2 elif self.mode == "ceil": z = F.ceil(a) elif self.mode == "floor": z = F.floor(a) elif self.mode == "div": y = mge.tensor(self.data1) / a x = a / mge.tensor(np.float32(2)) z = y / x # cycle_div elif self.mode == "cycle_div": z = a / mge.tensor(self.data1) # abs elif self.mode == "abs": z = F.abs(a) # exp elif self.mode == "exp": z = F.exp(a) # log elif self.mode == "log": z = F.log(a) elif self.mode == "fuse_add_relu": y = a + mge.tensor(self.data2) z = F.relu(y) elif self.mode == "fuse_mul_add3": y = a * mge.tensor(self.data1) z = y + mge.tensor(self.data2) elif self.mode == "fuse_add_sigmoid": y = a + mge.tensor(self.data2) z = F.sigmoid(y) else: raise NotImplementedError('no such elemwise mode "%s"' % self.mode) return z
def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes, im_info): # pylint: disable=too-many-statements def positive_bag_loss(logits, axis=1): weight = 1.0 / (1.0 - logits) weight /= weight.sum(axis=axis, keepdims=True) bag_prob = (weight * logits).sum(axis=1) return -layers.safelog(bag_prob) def negative_bag_loss(logits, gamma): return (logits**gamma) * (-layers.safelog(1.0 - logits)) pred_scores = F.sigmoid(pred_logits) box_prob_list = [] positive_losses = [] clamp_eps = 1e-7 bucket_size = self.cfg.bucket_size for bid in range(im_info.shape[0]): boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")] # id 0 is used for background classes, so -1 first labels = boxes_info[:, 4].astype("int32") - 1 pred_box = self.box_coder.decode(anchors, pred_offsets[bid]).detach() overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach() thresh1 = self.cfg.box_iou_threshold thresh2 = F.clip(overlaps.max(axis=1, keepdims=True), lower=thresh1 + clamp_eps, upper=1.0) gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1), lower=0, upper=1.0) image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach() # guarantee that nonzero_idx is not empty if gt_pred_prob.max() > clamp_eps: _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob) # since nonzeros is only 1 dim, use num_anchor to get real indices num_anchors = gt_pred_prob.shape[1] anchors_idx = nonzero_idx % num_anchors gt_idx = nonzero_idx // num_anchors image_boxes_prob[anchors_idx, labels[gt_idx]] = gt_pred_prob[gt_idx, anchors_idx] box_prob_list.append(image_boxes_prob) # construct bags for objects match_quality_matrix = layers.get_iou(boxes_info[:, :4], anchors).detach() num_gt = match_quality_matrix.shape[0] _, matched_idx = F.topk( match_quality_matrix, k=bucket_size, descending=True, no_sort=True, ) matched_idx = matched_idx.detach() matched_idx_flatten = matched_idx.reshape(-1) gather_idx = labels.reshape(-1, 1) gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size)) gather_src = pred_scores[bid, matched_idx_flatten] gather_src = gather_src.reshape(num_gt, bucket_size, -1) matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2) topk_anchors = anchors[matched_idx_flatten] boxes_broad_cast = F.broadcast_to( F.expand_dims(boxes_info[:, :4], axis=1), (num_gt, bucket_size, 4)).reshape(-1, 4) matched_offsets = self.box_coder.encode(topk_anchors, boxes_broad_cast) reg_loss = layers.smooth_l1_loss( pred_offsets[bid, matched_idx_flatten], matched_offsets, beta=self.cfg.smooth_l1_beta).sum( axis=-1) * self.cfg.reg_loss_weight matched_reg_scores = F.exp(-reg_loss) positive_losses.append( positive_bag_loss(matched_score * matched_reg_scores.reshape(-1, bucket_size), axis=1)) num_foreground = im_info[:, 4].sum() pos_loss = F.concat(positive_losses).sum() / F.maximum( 1.0, num_foreground) box_probs = F.stack(box_prob_list, axis=0) neg_loss = negative_bag_loss( pred_scores * (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum( 1.0, num_foreground * bucket_size) alpha = self.cfg.focal_loss_alpha pos_loss = pos_loss * alpha neg_loss = neg_loss * (1 - alpha) loss_dict = { "total_loss": pos_loss + neg_loss, "pos_loss": pos_loss, "neg_loss": neg_loss, } return loss_dict