def forward(self, input): """ Args: inputs: input images. Returns: Tuple[Tensor]: FPN feature. """ # backbone out_features = self.backbone(input) features = [out_features[f] for f in self.in_features] [x2, x1, x0] = features fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 f_out0 = self.upsample(fpn_out0) # 512/16 f_out0 = F.concat([f_out0, x1], 1) # 512->1024/16 f_out0 = self.C3_p4(f_out0) # 1024->512/16 fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 f_out1 = self.upsample(fpn_out1) # 256/8 f_out1 = F.concat([f_out1, x2], 1) # 256->512/8 pan_out2 = self.C3_p3(f_out1) # 512->256/8 p_out1 = self.bu_conv2(pan_out2) # 256->256/16 p_out1 = F.concat([p_out1, fpn_out1], 1) # 256->512/16 pan_out1 = self.C3_n3(p_out1) # 512->512/16 p_out0 = self.bu_conv1(pan_out1) # 512->512/32 p_out0 = F.concat([p_out0, fpn_out0], 1) # 512->1024/32 pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 outputs = (pan_out2, pan_out1, pan_out0) return outputs
def merge_rpn_score_box(self, rpn_cls_score_list, rpn_bbox_offsets_list): final_rpn_cls_score_list = [] final_rpn_bbox_offsets_list = [] for bid in range(self.cfg.batch_per_gpu): batch_rpn_cls_score_list = [] batch_rpn_bbox_offsets_list = [] for i in range(len(self.in_features)): rpn_cls_score = rpn_cls_score_list[i][bid] \ .dimshuffle(2, 3, 1, 0).reshape(-1, 2) rpn_bbox_offsets = rpn_bbox_offsets_list[i][bid] \ .dimshuffle(2, 3, 0, 1).reshape(-1, 4) batch_rpn_cls_score_list.append(rpn_cls_score) batch_rpn_bbox_offsets_list.append(rpn_bbox_offsets) batch_rpn_cls_score = F.concat(batch_rpn_cls_score_list, axis=0) batch_rpn_bbox_offsets = F.concat(batch_rpn_bbox_offsets_list, axis=0) final_rpn_cls_score_list.append(batch_rpn_cls_score) final_rpn_bbox_offsets_list.append(batch_rpn_bbox_offsets) final_rpn_cls_score = F.concat(final_rpn_cls_score_list, axis=0) final_rpn_bbox_offsets = F.concat(final_rpn_bbox_offsets_list, axis=0) return final_rpn_cls_score, final_rpn_bbox_offsets
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): box_corner = F.zeros_like(prediction) box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): # If none are remaining => process next image if not image_pred.shape[0]: continue # Get score and class with highest confidence class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_conf_squeeze = F.squeeze(class_conf) conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1) detections = detections[conf_mask] if not detections.shape[0]: continue nms_out_index = F.vision.nms( detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre, ) detections = detections[nms_out_index] if output[i] is None: output[i] = detections else: output[i] = F.concat((output[i], detections)) return output
def forward(self, xin, labels=None, imgs=None): outputs = [] assert not self.training for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( zip(self.cls_convs, self.reg_convs, self.strides, xin)): x = self.stems[k](x) cls_x = x reg_x = x cls_feat = cls_conv(cls_x) cls_output = self.cls_preds[k](cls_feat) reg_feat = reg_conv(reg_x) reg_output = self.reg_preds[k](reg_feat) obj_output = self.obj_preds[k](reg_feat) output = F.concat( [reg_output, F.sigmoid(obj_output), F.sigmoid(cls_output)], 1) outputs.append(output) self.hw = [x.shape[-2:] for x in outputs] # [batch, n_anchors_all, 85] outputs = F.concat([F.flatten(x, start_axis=2) for x in outputs], axis=2) outputs = F.transpose(outputs, (0, 2, 1)) if self.decode_in_inference: return self.decode_outputs(outputs) else: return outputs
def test_generator_batch(image, *, netG): # image: [1,100,3,180,320] B, T, _, h, w = image.shape biup = get_bilinear(image) netG.eval() forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] return res + biup
def forward(self, inputs): """ Args: inputs (Tensor): input image. Returns: Tuple[Tensor]: FPN output features.. """ # backbone out_features = self.backbone(inputs) x2, x1, x0 = [out_features[f] for f in self.in_features] # yolo branch 1 x1_in = self.out1_cbl(x0) x1_in = self.upsample(x1_in) x1_in = F.concat([x1_in, x1], 1) out_dark4 = self.out1(x1_in) # yolo branch 2 x2_in = self.out2_cbl(out_dark4) x2_in = self.upsample(x2_in) x2_in = F.concat([x2_in, x2], 1) out_dark3 = self.out2(x2_in) outputs = (out_dark3, out_dark4, x0) return outputs
def merge_rpn_score_box(self, rpn_cls_score_list, rpn_bbox_offset_list): final_rpn_cls_score_list = [] final_rpn_bbox_offset_list = [] for bid in range(rpn_cls_score_list[0].shape[0]): batch_rpn_cls_score_list = [] batch_rpn_bbox_offset_list = [] for i in range(len(self.in_features)): rpn_cls_scores = rpn_cls_score_list[i][bid].transpose( 1, 2, 0).flatten() rpn_bbox_offsets = (rpn_bbox_offset_list[i][bid].transpose( 2, 3, 0, 1).reshape(-1, 4)) batch_rpn_cls_score_list.append(rpn_cls_scores) batch_rpn_bbox_offset_list.append(rpn_bbox_offsets) batch_rpn_cls_scores = F.concat(batch_rpn_cls_score_list, axis=0) batch_rpn_bbox_offsets = F.concat(batch_rpn_bbox_offset_list, axis=0) final_rpn_cls_score_list.append(batch_rpn_cls_scores) final_rpn_bbox_offset_list.append(batch_rpn_bbox_offsets) final_rpn_cls_scores = F.concat(final_rpn_cls_score_list, axis=0) final_rpn_bbox_offsets = F.concat(final_rpn_bbox_offset_list, axis=0) return final_rpn_cls_scores, final_rpn_bbox_offsets
def forward(self, inputs): image = self.preprocess_image(inputs["image"]) features = self.backbone(image) features = [features[f] for f in self.in_features] box_cls, box_delta = self.head(features) box_cls_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, self.cfg.num_classes) for _ in box_cls ] box_delta_list = [ _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_delta ] anchors_list = [ self.anchor_gen(features[i], self.stride_list[i]) for i in range(5) ] all_level_box_cls = F.sigmoid(F.concat(box_cls_list, axis=1)) all_level_box_delta = F.concat(box_delta_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: box_gt_cls, box_gt_delta = self.get_ground_truth( all_level_anchors, inputs["gt_boxes"], inputs["im_info"][:, 4].astype(np.int32), ) rpn_cls_loss = layers.get_focal_loss( all_level_box_cls, box_gt_cls, alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ) rpn_bbox_loss = ( layers.get_smooth_l1_loss(all_level_box_delta, box_gt_delta, box_gt_cls) * self.cfg.reg_loss_weight ) total = rpn_cls_loss + rpn_bbox_loss return total, rpn_cls_loss, rpn_bbox_loss else: # currently not support multi-batch testing assert self.batch_size == 1 transformed_box = self.box_coder.decode( all_level_anchors, all_level_box_delta[0], ) transformed_box = transformed_box.reshape(-1, 4) scale_w = inputs["im_info"][0, 1] / inputs["im_info"][0, 3] scale_h = inputs["im_info"][0, 0] / inputs["im_info"][0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0 ) clipped_box = layers.get_clipped_box( transformed_box, inputs["im_info"][0, 2:4] ).reshape(-1, 4) return all_level_box_cls[0], clipped_box
def forward(self, It, S, D, pre_S, pre_D, pre_S_hat=None, pre_D_hat=None, pre_SD=None): B, _, H, W = It.shape if pre_S_hat is None: assert pre_D_hat is None and pre_SD is None pre_S_hat = megengine.tensor( np.zeros((B, self.hidden_channels, H, W), dtype=np.float32)) pre_D_hat = F.zeros_like(pre_S_hat) pre_SD = F.zeros_like(pre_S_hat) # pre_SD = self.hsa(It, pre_SD) # auto select S = F.concat([pre_S, S, pre_S_hat, pre_SD], axis=1) S = self.pre_SD_S(S) D = F.concat([pre_D, D, pre_D_hat, pre_SD], axis=1) D = self.pre_SD_D(D) for i in range(self.blocknums): S, D = self.SDBlocks[i](S, D) pre_SD = self.conv_SD(S + D) S = self.convS(S) D = self.convD(D) I = self.convHR(F.concat([S, D], axis=1)) return self.trans_HR(I), pre_SD, S, D, self.trans_S(S), self.trans_D(D)
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): anchors = F.concat(anchors_list, axis=0) labels_list = [] offsets_list = [] for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] overlaps = layers.get_iou(gt_boxes[:, :4], anchors) matched_indices, labels = self.matcher(overlaps) offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4]) # sample positive labels num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio) labels = layers.sample_labels(labels, num_positive, 1, -1) # sample negative labels num_positive = (labels == 1).sum().astype(np.int32) num_negative = self.cfg.num_sample_anchors - num_positive labels = layers.sample_labels(labels, num_negative, 0, -1) labels_list.append(labels) offsets_list.append(offsets) return ( F.concat(labels_list, axis=0).detach(), F.concat(offsets_list, axis=0).detach(), )
def fpn_anchor_target(boxes, im_info, all_anchors_list): final_labels_list = [] final_bbox_targets_list = [] for bid in range(config.batch_per_gpu): batch_labels_list = [] batch_bbox_targets_list = [] for i in range(len(all_anchors_list)): anchors_perlvl = all_anchors_list[i] rpn_labels_perlvl, rpn_bbox_targets_perlvl = fpn_anchor_target_opr_core_impl( boxes[bid], im_info[bid], anchors_perlvl) batch_labels_list.append(rpn_labels_perlvl) batch_bbox_targets_list.append(rpn_bbox_targets_perlvl) # here we samples the rpn_labels concated_batch_labels = F.concat(batch_labels_list, axis=0) concated_batch_bbox_targets = F.concat(batch_bbox_targets_list, axis=0) # sample labels num_positive = config.num_sample_anchors * config.positive_anchor_ratio concated_batch_labels = _bernoulli_sample_labels( concated_batch_labels, num_positive, 1, config.ignore_label) num_positive = F.equal(concated_batch_labels, 1).sum() num_negative = config.num_sample_anchors - num_positive concated_batch_labels = _bernoulli_sample_labels( concated_batch_labels, num_negative, 0, config.ignore_label) final_labels_list.append(concated_batch_labels) final_bbox_targets_list.append(concated_batch_bbox_targets) final_labels = F.concat(final_labels_list, axis=0) final_bbox_targets = F.concat(final_bbox_targets_list, axis=0) return F.zero_grad(final_labels), F.zero_grad(final_bbox_targets)
def train_generator_batch(image, label, *, gm, netG, netloss): B, T, _, h, w = image.shape biup = get_bilinear(image) netG.train() with gm: forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] loss = netloss(res + biup, label) gm.backward(loss) if dist.is_distributed(): loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() return loss
def fpn_rpn_reshape(pred_cls_score_list, pred_bbox_offsets_list): final_pred_bbox_offsets_list = [] final_pred_cls_score_list = [] batch_per_gpu = pred_cls_score_list[0].shape[0] for bid in range(batch_per_gpu): batch_pred_bbox_offsets_list = [] batch_pred_cls_score_list = [] for i in range(len(pred_cls_score_list)): pred_cls_score_perlvl = pred_cls_score_list[i][bid] \ .transpose(1, 2, 0).reshape(-1, 2) pred_bbox_offsets_perlvl = pred_bbox_offsets_list[i][bid] \ .transpose(1, 2, 0).reshape(-1, 4) batch_pred_cls_score_list.append(pred_cls_score_perlvl) batch_pred_bbox_offsets_list.append(pred_bbox_offsets_perlvl) batch_pred_cls_score = F.concat(batch_pred_cls_score_list, axis=0) batch_pred_bbox_offsets = F.concat(batch_pred_bbox_offsets_list, axis=0) final_pred_cls_score_list.append(batch_pred_cls_score) final_pred_bbox_offsets_list.append(batch_pred_bbox_offsets) final_pred_cls_score = F.concat(final_pred_cls_score_list, axis=0) final_pred_bbox_offsets = F.concat(final_pred_bbox_offsets_list, axis=0) return final_pred_cls_score, final_pred_bbox_offsets
def forward(self, It, S, D, pre_S, pre_D, pre_S_hat, pre_D_hat, pre_SD): """ args: It: the LR image for this time stamp S: the structure component of now LR image D: the detail component of now LR image pre_S: the structure component of pre LR image pre_D: the detail component of pre LR image pre_S_hat: the hidden state of structure component pre_D_hat: the hidden state of detail component pre_SD: the overall hidden state return: """ pre_SD = self.hsa(It, pre_SD) # auto select S = F.concat([pre_S, S, pre_S_hat, pre_SD], axis=1) S = self.pre_SD_S(S) D = F.concat([pre_D, D, pre_D_hat, pre_SD], axis=1) D = self.pre_SD_D(D) for i in range(self.blocknums): S, D = self.SDBlocks[i](S, D) pre_SD = self.conv_SD(S + D) S = self.convS(S) D = self.convD(D) I = self.convHR(F.concat([S, D], axis=1)) return self.trans_HR(I), pre_SD, S, D, self.trans_S(S), self.trans_D(D)
def forward(self, x, x_prev): x_relu = self.relu(x_prev) # path 1 x_path1 = self.path_1(x_relu) # path 2 x_path2 = self.path_2(x_relu) # final path x_left = self.final_path_bn(F.concat([x_path1, x_path2], 1)) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_left) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2 = x_comb_iter_2_left + x_left x_comb_iter_3_left = self.comb_iter_3_left(x_left) x_comb_iter_3_right = self.comb_iter_3_right(x_left) x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right x_comb_iter_4_left = self.comb_iter_4_left(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_right x_out = F.concat([x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1) return x_out
def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def forward(self, x): x1 = F.concat([self.conv1(x), x], axis=1) x2 = F.concat([self.conv2(x1), x1], axis=1) x3 = F.concat([self.conv3(x2), x2], axis=1) x4 = F.concat([self.conv4(x3), x3], axis=1) x5 = F.concat([self.conv5(x4), x4], axis=1) x_out = self.conv_last(x5) return x5, x_out
def forward(self, old_x): if self.stride == 1: x_proj, x = self.channel_shuffle(old_x) return F.concat((x_proj, self.branch_main(x)), 1) elif self.stride == 2: x_proj = old_x x = old_x return F.concat((self.branch_proj(x_proj), self.branch_main(x)), 1)
def roi_pool( rpn_fms, rois, stride, pool_shape, roi_type="roi_align", ): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) level_assignments = F.floor(canonical_level + F.log(box_area.sqrt() / canonical_box_size) / np.log(2)) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level # avoid empty assignment level_assignments = F.concat( [level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], ) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof(-1)))]) pool_list, inds_list = [], [] for i in range(num_fms): mask = level_assignments == i _, inds = F.cond_take(mask == 1, mask) level_rois = rois.ai[inds] if roi_type == "roi_pool": pool_fm = F.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif roi_type == "roi_align": pool_fm = F.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) fm_order = F.argsort(fm_order.reshape(1, -1))[1].reshape(-1) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature.ai[fm_order][:-num_fms] return pool_feature
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(gt_boxes.shape[0]): num_valid_boxes = im_info[bid, 4].astype("int32") gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid) gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois]) overlaps = layers.get_iou(all_rois[:, 1:5], gt_boxes_per_img) max_overlaps = overlaps.max(axis=1) gt_assignment = F.argmax(overlaps, axis=1).astype("int32") labels = gt_boxes_per_img[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0) bg_mask = ((max_overlaps >= self.cfg.bg_threshold_low) & (max_overlaps < self.cfg.bg_threshold_high)) num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio) fg_inds_mask = layers.sample_mask_from_labels( fg_mask, num_fg_rois, 1) num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum()) bg_inds_mask = layers.sample_mask_from_labels( bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[:min(self.cfg.num_rois, keep_inds.shape[0])] labels = labels[keep_inds].astype("int32") rois = all_rois[keep_inds] target_boxes = gt_boxes_per_img[gt_assignment[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.concat(return_rois, axis=0).detach(), F.concat(return_labels, axis=0).detach(), F.concat(return_bbox_targets, axis=0).detach(), )
def forward(self, old_x): if self.stride == 1: x_proj, x = self.channel_shuffle(old_x) return F.concat((x_proj, self.branch_main(x)), 1) elif self.stride == 2: x_proj = old_x x = old_x return F.concat((self.branch_proj(x_proj), self.branch_main(x)), 1) else: raise ValueError("use stride 1 or 2, current stride {}".format(self.stride))
def train_generator_batch(image, label, *, opt, netG, netloss): netG.train() B, T, _, H, W = image.shape HR_G = [] # first frame pre_SD = mge.tensor(np.zeros((B, hidden_channels, H, W), dtype=np.float32)) LR = F.concat([ F.add_axis(image[:, 2, ...], axis=1), F.add_axis(image[:, 1, ...], axis=1), image[:, 0:3, ...] ], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # first frame result HR_G.append(F.add_axis(imgHR, axis=1)) # second frame LR = F.concat([F.add_axis(image[:, 1, ...], axis=1), image[:, 0:4, ...]], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # second frame result HR_G.append(F.add_axis(imgHR, axis=1)) for t in range(2, T - 2): imgHR, pre_SD = netG(image[:, t - 2:t + 3, ...], pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) # T-2 frame LR = F.concat( [image[:, T - 4:T, ...], F.add_axis(image[:, -2, ...], axis=1)], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # T-2 frame result HR_G.append(F.add_axis(imgHR, axis=1)) # T-1 frame LR = F.concat([ image[:, T - 3:T, ...], F.add_axis(image[:, -2, ...], axis=1), F.add_axis(image[:, -3, ...], axis=1) ], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # T-1 frame result HR_G.append(F.add_axis(imgHR, axis=1)) HR_G = F.concat(HR_G, axis=1) # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W] loss = netloss(HR_G, label) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass return loss
def forward(self, LR, pre_HR, pre_SD): pre_SD = self.hsa(LR, pre_SD) # auto select for hidden SD # do mucan LR = self.deal_before_SD_block(LR) # [B, 5*24, H, W] S = F.concat([LR, pre_SD, pre_HR], axis = 1) # 5*24 + 48 + 64 S = self.pre_SD_S(S) S = self.convs(S) del LR hidden_state = self.hidden(F.concat([S, pre_SD], axis=1)) HR = self.tail(S) return self.trans_HR(HR), HR, hidden_state
def train_generator_batch(image, label, *, gm, netG, netloss): B, T, _, h, w = image.shape biup = get_bilinear(image) # np_weight = [0,-1,0,-1,4,-1,0,-1,0] # (1,1,3,3) # conv_weight = mge.tensor(np.array(np_weight).astype(np.float32)).reshape(1,1,3,3) # HR_mask = F.mean(label, axis=2, keepdims=False) # [B,T,H,W] 对T是做depthwise # HR_mask = HR_mask.reshape(B*T, 1, 4*h, 4*w) # HR_mask = F.conv2d(HR_mask, conv_weight, padding=1) # # HR_mask = (F.abs(HR_mask) > 0.1).astype("float32") # [B*T, 1, H, W] # HR_mask = HR_mask.reshape(B, T, 1, 4*h, 4*w) # HR_mask = 1 + HR_mask * 0.1 HR_mask = 1 netG.train() with gm: forward_hiddens = [] backward_hiddens = [] res = [] # 对所有的image提取特征 image = image.reshape(B * T, 3, h, w) image = netG.rgb(image).reshape(B, T, -1, h, w) # T=0 now_frame = image[:, 0, ...] hidden = now_frame forward_hiddens.append(now_frame) for i in range(1, T): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) forward_hiddens.append(hidden) # T=-1 now_frame = image[:, T - 1, ...] hidden = now_frame backward_hiddens.append(now_frame) for i in range(T - 2, -1, -1): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) backward_hiddens.append(hidden) # do upsample for all frames for i in range(T): res.append( netG.upsample( F.concat([forward_hiddens[i], backward_hiddens[T - i - 1]], axis=1))) res = F.stack(res, axis=1) # [B,T,3,H,W] res = res + biup loss = netloss(res, label, HR_mask) # 加上edge损失 # 探测label的edge map gm.backward(loss) if dist.is_distributed(): loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() return loss
def train_generator_batch(image, label, *, opt, netG, netloss): netG.train() B, T, _, H, W = image.shape # image image_S = image.reshape((B * T, -1, H, W)) image_S = F.interpolate(image_S, scale_factor=[0.25, 0.25]) image_S = F.interpolate(image_S, size=[H, W]) image_S = image_S.reshape((B, T, -1, H, W)) image_D = image - image_S # label label_S = label.reshape((B * T, -1, 4 * H, 4 * W)) label_S = F.interpolate(label_S, scale_factor=[0.25, 0.25]) label_S = F.interpolate(label_S, size=[4 * H, 4 * W]) label_S = label_S.reshape((B, T, -1, 4 * H, 4 * W)) label_D = label - label_S HR_G = [] HR_D = [] HR_S = [] pre_S_hat = mge.tensor( np.zeros((B, hidden_channels, H, W), dtype=np.float32)) pre_D_hat = F.zeros_like(pre_S_hat) pre_SD = F.zeros_like(pre_S_hat) imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, 0, ...], image_S[:, 0, ...], image_D[:, 0, ...], image_S[:, 1, ...], image_D[:, 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_D, axis=1)) HR_S.append(F.add_axis(img_S, axis=1)) for t in range(1, T): imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, t, ...], image_S[:, t, ...], image_D[:, t, ...], image_S[:, t - 1, ...], image_D[:, t - 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_S, axis=1)) HR_S.append(F.add_axis(img_D, axis=1)) HR_G = F.concat(HR_G, axis=1) HR_D = F.concat(HR_D, axis=1) HR_S = F.concat(HR_S, axis=1) # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W] loss = netloss(HR_G, HR_D, HR_S, label, label_D, label_S) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass return loss
def get_ground_truth(self, anchors, batched_gt_boxes, batched_valid_gt_box_number): total_anchors = anchors.shape[0] labels_cat_list = [] bbox_targets_list = [] for b_id in range(self.batch_size): gt_boxes = batched_gt_boxes[b_id, : batched_valid_gt_box_number[b_id]] overlaps = layers.get_iou(anchors, gt_boxes[:, :4]) argmax_overlaps = F.argmax(overlaps, axis=1) max_overlaps = overlaps.ai[ F.linspace(0, total_anchors - 1, total_anchors).astype(np.int32), argmax_overlaps, ] labels = mge.tensor([-1]).broadcast(total_anchors) labels = labels * (max_overlaps >= self.cfg.negative_thresh) labels = labels * (max_overlaps < self.cfg.positive_thresh) + ( max_overlaps >= self.cfg.positive_thresh ) bbox_targets = self.box_coder.encode( anchors, gt_boxes.ai[argmax_overlaps, :4] ) labels_cat = gt_boxes.ai[argmax_overlaps, 4] labels_cat = labels_cat * (1.0 - F.less_equal(F.abs(labels), 1e-5)) ignore_mask = F.less_equal(F.abs(labels + 1), 1e-5) labels_cat = labels_cat * (1 - ignore_mask) - ignore_mask # assign low_quality boxes if self.cfg.allow_low_quality: gt_argmax_overlaps = F.argmax(overlaps, axis=0) labels_cat = labels_cat.set_ai(gt_boxes[:, 4])[gt_argmax_overlaps] matched_low_bbox_targets = self.box_coder.encode( anchors.ai[gt_argmax_overlaps, :], gt_boxes[:, :4] ) bbox_targets = bbox_targets.set_ai(matched_low_bbox_targets)[ gt_argmax_overlaps, : ] labels_cat_list.append(F.add_axis(labels_cat, 0)) bbox_targets_list.append(F.add_axis(bbox_targets, 0)) return ( F.zero_grad(F.concat(labels_cat_list, axis=0)), F.zero_grad(F.concat(bbox_targets_list, axis=0)), )
def emd_loss(p_b0, p_c0, p_b1, p_c1, targets, labels): pred_box = F.concat([p_b0, p_b1], axis=1).reshape(-1, p_b0.shapeof()[-1]) pred_score = F.concat([p_c0, p_c1], axis=1).reshape(-1, p_c0.shapeof()[-1]) targets = targets.reshape(-1, 4) labels = labels.reshape(-1) fg_masks = F.greater(labels, 0) non_ignore_masks = F.greater_equal(labels, 0) # loss for regression loss_box_reg = smooth_l1_loss(pred_box, targets, config.rcnn_smooth_l1_beta) # loss for classification loss_cls = softmax_loss(pred_score, labels) loss = loss_cls * non_ignore_masks + loss_box_reg * fg_masks loss = loss.reshape(-1, 2).sum(axis=1) return loss.reshape(-1, 1)
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: loss_dict = self.get_losses(all_level_anchors, all_level_box_logits, all_level_box_offsets, gt_boxes, im_info) self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 transformed_box = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) transformed_box = transformed_box.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_box = layers.get_clipped_boxes(transformed_box, im_info[0, 2:4]).reshape( -1, 4) all_level_box_scores = F.sigmoid(all_level_box_logits) return all_level_box_scores[0], clipped_box
def train_generator_batch(optical, sar, label, *, opt, netG): netG.train() cls_score, offsets, ctr_score = netG(sar, optical) loss, loss_cls, loss_reg, loss_ctr = netG.loss(cls_score, offsets, ctr_score, label) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass # performance in the training data B, _, _, _ = cls_score.shape cls_score = F.sigmoid(cls_score) # * ctr_score cls_score = cls_score.reshape(B, -1) # find the max max_id = F.argmax(cls_score, axis=1) # (B, ) pred_box = get_box(netG.fm_ctr, offsets) # (B,4,H,W) pred_box = pred_box.reshape(B, 4, -1) output = [] for i in range(B): output.append(F.add_axis(pred_box[i, :, max_id[i]], axis=0)) # (1, 4) output = F.concat(output, axis=0) # (B, 4) return [ loss_cls, loss_reg, loss_ctr, F.norm(output[:, 0:2] - label[:, 0:2], p=2, axis=1).mean() ]
def forward(self, x): branch1 = self.branch1(x) branch2 = self.branch2(x) branch3 = self.branch3(x) branch4 = self.branch4(x) outputs = [branch1, branch2, branch3, branch4] return F.concat(outputs, 1)