def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None): anchors = torch.from_numpy(anchors_np).cuda() endpoints = self.backbone(input) P6 = self.conv6(endpoints['C5']) P7 = self.conv7(self.relu6(P6)) Ps = self.pyramid(endpoints) Ps.append(P6) Ps.append(P7) rpn_outs = [] for i, f in enumerate(Ps): rpn_outs.append(self.rpn(f)) rpn_logit, rpn_box = self._rerange(rpn_outs) rpn_prob = F.sigmoid( rpn_logit) if self.rpn_activation == 'sigmoid' else F.softmax( rpn_logit, dim=-1) rpn_prob.detach() if self.is_training: if rpn_targets is None: rpn_targets = compute_rpn_targets_in_batch( gt_boxes_list, anchors_np) rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda( rpn_targets) else: rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets else: rpn_labels = rpn_bbtargets = rpn_bbwghts = None return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts
def compute_rcnn_targets(self, rois, img_ids, gt_boxes_list): gt_img_inds = [ np.zeros((gt.shape[0], ), dtype=np.int64) + i for i, gt in enumerate(gt_boxes_list) ] gt_img_inds = np.concatenate(gt_img_inds, axis=0) gt_boxes = np.concatenate(gt_boxes_list, axis=0).astype(np.float32) np.set_printoptions(precision=0, suppress=True) if gt_boxes.size > 0: gt_boxes = everything2cuda(gt_boxes).view(-1, 5) gt_img_inds = everything2cuda(gt_img_inds).view(-1) rcnn_labels, rcnn_bbtargets, rcnn_bbwgts = self.roi_target( rois, img_ids, gt_boxes, gt_img_inds) else: num_rois = rois.size(0) rcnn_labels = torch.LongTensor(num_rois).zero_().cuda() rcnn_bbtargets = torch.FloatTensor(num_rois, 4).zero_().cuda() rcnn_bbwgts = torch.FloatTensor(num_rois, 4).zero_().cuda() rcnn_bbwghts = rcnn_bbwgts.view(-1, 4) rcnn_bbox_targets = rcnn_bbtargets.view(-1, 4) return rcnn_labels, rcnn_bbox_targets, rcnn_bbwghts
global_step = 0 timer = Timer() for ep in range(start_epoch, cfg.max_epoch): if ep in cfg.lr_decay_epoches and cfg.solver == 'SGD': lr *= cfg.lr_decay adjust_learning_rate(optimizer, lr) print('adjusting learning rate {:.6f}'.format(lr)) for step, batch in enumerate(train_data): timer.tic() input, anchors_np, im_scale_list, image_ids, gt_boxes_list, rpn_targets, _, _ = batch # gt_boxes_list = ScatterList(gt_boxes_list) input = everything2cuda(input) rpn_targets = everything2cuda(rpn_targets) # outs = model(input, gt_boxes_list, anchors_np, rpn_targets=rpn_targets) if cfg.model_type == 'maskrcnn': rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \ rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs # outputs = [] # targets = [] elif cfg.model_type == 'retinanet': # Thinking like this: single-stage detector take rpn results as final results rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts = outs #
def main(): # config model and lr num_anchors = len(cfg.anchor_ratios) * len(cfg.anchor_scales[0]) * len(cfg.anchor_shift) \ if isinstance(cfg.anchor_scales[0], list) else \ len(cfg.anchor_ratios) * len(cfg.anchor_scales) resnet = resnet50 if cfg.backbone == 'resnet50' else resnet101 detection_model = MaskRCNN if cfg.model_type.lower( ) == 'maskrcnn' else RetinaNet model = detection_model(resnet(pretrained=True, maxpool5=cfg.maxpool5), num_classes=cfg.num_classes, num_anchors=num_anchors, strides=cfg.strides, in_channels=cfg.in_channels, f_keys=cfg.f_keys, num_channels=256, is_training=False, activation=cfg.class_activation) lr = cfg.lr start_epoch = 0 if cfg.restore is not None: meta = load_net(cfg.restore, model) print(meta) if meta[0] >= 0: start_epoch = meta[0] + 1 lr = meta[1] print('Restored from %s, starting from %d epoch, lr:%.6f' % (cfg.restore, start_epoch, lr)) else: raise ValueError('restore is not set') model.cuda() model.eval() class_names = test_data.dataset.classes print('dataset len: {}'.format(len(test_data.dataset))) tb_dir = os.path.join(cfg.train_dir, cfg.backbone + '_' + cfg.datasetname, 'test', time.strftime("%h%d_%H")) writer = tbx.FileWriter(tb_dir) # main loop timer_all = Timer() timer_post = Timer() all_results1 = [] all_results2 = [] all_results_gt = [] for step, batch in enumerate(test_data): timer_all.tic() # NOTE: Targets is in NHWC order!! # input, anchors_np, im_scale_list, image_ids, gt_boxes_list = batch # input = everything2cuda(input) input_t, anchors_np, im_scale_list, image_ids, gt_boxes_list = batch input = everything2cuda(input_t, volatile=True) outs = model(input, gt_boxes_list=None, anchors_np=anchors_np) if cfg.model_type == 'maskrcnn': rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \ rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs outputs = [ rois, roi_img_ids, rpn_logit, rpn_box, rpn_prob, rcnn_logit, rcnn_box, rcnn_prob, anchors ] targets = [] elif cfg.model_type == 'retinanet': rpn_logit, rpn_box, rpn_prob, _, _, _ = outs outputs = [rpn_logit, rpn_box, rpn_prob] else: raise ValueError('Unknown model type: %s' % cfg.model_type) timer_post.tic() dets_dict = model.get_final_results( outputs, everything2cuda(anchors_np), score_threshold=0.01, max_dets=cfg.max_det_num * cfg.batch_size, overlap_threshold=cfg.overlap_threshold) if 'stage1' in dets_dict: Dets = dets_dict['stage1'] else: raise ValueError('No stage1 results:', dets_dict.keys()) Dets2 = dets_dict['stage2'] if 'stage2' in dets_dict else Dets t3 = timer_post.toc() t = timer_all.toc() formal_res1 = dataset.to_detection_format(copy.deepcopy(Dets), image_ids, im_scale_list) formal_res2 = dataset.to_detection_format(copy.deepcopy(Dets2), image_ids, im_scale_list) all_results1 += formal_res1 all_results2 += formal_res2 Dets_gt = [] for gb in gt_boxes_list: cpy_mask = gb[:, 4] >= 1 gb = gb[cpy_mask] n = cpy_mask.astype(np.int32).sum() res_gt = np.zeros((n, 6)) res_gt[:, :4] = gb[:, :4] res_gt[:, 4] = 1. res_gt[:, 5] = gb[:, 4] Dets_gt.append(res_gt) formal_res_gt = dataset.to_detection_format(Dets_gt, image_ids, im_scale_list) all_results_gt += formal_res_gt if step % cfg.log_image == 0: input_np = everything2numpy(input) summary_out = [] Is = single_shot.draw_detection(input_np, Dets, class_names=class_names) Is = Is.astype(np.uint8) summary_out += log_images(Is, image_ids, step, prefix='Detection/') Is = single_shot.draw_detection(input_np, Dets2, class_names=class_names) Is = Is.astype(np.uint8) summary_out += log_images(Is, image_ids, step, prefix='Detection2/') Imgs = single_shot.draw_gtboxes(input_np, gt_boxes_list, class_names=class_names) Imgs = Imgs.astype(np.uint8) summary_out += log_images(Imgs, image_ids, float(step), prefix='GT') for s in summary_out: writer.add_summary(s, float(step)) if step % cfg.display == 0: print(time.strftime("%H:%M:%S ") + 'Epoch %d iter %d: speed %.3fs (%.3fs)' % (0, step, t, t3) + ' ImageIds: ' + ', '.join(str(s) for s in image_ids), end='\r') res_dict = { 'stage1': all_results1, 'stage2': all_results2, 'gt': all_results_gt } return res_dict
def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None): batch_size = input.size(0) anchors = torch.from_numpy(anchors_np).cuda() endpoints = self.backbone(input) Ps = self.pyramid(endpoints) rpn_outs = [] for i, f in enumerate(Ps): rpn_outs.append(self.rpn(f)) rpn_logit, rpn_box = self._rerange(rpn_outs, last_dimension=2) rpn_prob = F.sigmoid( rpn_logit) if self.rpn_activation == 'sigmoid' else F.softmax( rpn_logit, dim=-1) rpn_prob.detach() if self.is_training: assert input.size(0) == len(gt_boxes_list), '%d vs %d' % ( input.size(0), len(gt_boxes_list)) if rpn_targets is None: rpn_targets = compute_rpn_targets_in_batch( gt_boxes_list, anchors_np) rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda( rpn_targets) # rpn_labels, rpn_bbtargets, rpn_bbwghts = self.compute_anchor_targets(anchors, gt_boxes_list) else: rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets rois, probs, roi_img_ids = self._stage_one_results( rpn_box, rpn_prob, anchors, top_n=20000 * batch_size, overlap_threshold=0.7, top_n_post_nms=2000) rois, roi_labels, roi_img_ids = sample_rois( rois, roi_img_ids, gt_boxes_list) else: rpn_labels = rpn_bbtargets = rpn_bbwghts = None rois, probs, roi_img_ids = self._stage_one_results( rpn_box, rpn_prob, anchors, top_n=6000 * batch_size, overlap_threshold=0.7) rois, probs, roi_img_ids = self._thresholding( rois, probs, roi_img_ids, 0.05) rcnn_feats = self.pyramid_roi_align(Ps, rois, roi_img_ids) rcnn_logit, rcnn_box = self.rcnn(rcnn_feats) rcnn_prob = F.sigmoid( rcnn_logit) if self.activation == 'sigmoid' else F.softmax( rcnn_logit, dim=-1) rcnn_prob.detach() if self.is_training: rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = self.compute_rcnn_targets( rois, roi_img_ids, gt_boxes_list) assert rcnn_labels.size(0) == rois.size(0) == roi_img_ids.size(0) else: rcnn_labels = rcnn_bbtargets = rcnn_bbwghts = None return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \ rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts
summary_out = [] global_step = 0 timer = Timer() for ep in range(start_epoch, cfg.max_epoch): if ep in cfg.lr_decay_epoches and cfg.solver == 'SGD': lr *= cfg.lr_decay adjust_learning_rate(optimizer, lr) print('adjusting learning rate %.6f' % lr) for step, batch in enumerate(train_data): timer.tic() input, anchors_np, im_scale_list, image_ids, gt_boxes_list, rpn_targets, _, _ = batch gt_boxes_list = ScatterList(gt_boxes_list) input = everything2cuda(input) rpn_targets = everything2cuda(rpn_targets) outs = model(input, gt_boxes_list, anchors_np, rpn_targets=rpn_targets) if cfg.model_type == 'maskrcnn': rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \ rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs outputs = [ rois, roi_img_ids, rpn_logit, rpn_box, rpn_prob, rcnn_logit, rcnn_box, rcnn_prob, anchors ] targets = [ rpn_labels, rpn_bbtargets, rpn_bbwghts, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts ] elif cfg.model_type == 'retinanet':
def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None): batch_size = input.size(0) # torch.from_numpy() : # The returned tensor and ndarray share the same memory. anchors = torch.from_numpy(anchors_np).cuda() endpoints = self.backbone(input) # Currently No ZoomNet. Ps = self.pyramid(endpoints) rpn_outs = [] # f means "Floor" in Feature Pyramids. for i, f in enumerate(Ps): rpn_outs.append(self.rpn(f)) rpn_logit, rpn_box = self._rerange(rpn_outs, last_dimension=2) rpn_prob = F.sigmoid(rpn_logit) if self.rpn_activation == 'sigmoid' \ else F.softmax(rpn_logit, dim=-1) # This is different from "rpn_prob.detach()" rpn_prob = rpn_prob.detach() if self.is_training: assert input.size(0) == len(gt_boxes_list), \ '{:d} vs {:d}'.format(input.size(0), len(gt_boxes_list)) if rpn_targets is None: # TODO: compute_rpn_targets_in_batch() rpn_targets = compute_rpn_targets_in_batch( gt_boxes_list, anchors_np) rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda( rpn_targets) else: rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets # end if-else # TODO: _stage_one_results() rois, probs, roi_img_ids = self._stage_one_results(rpn_box, rpn_prob, anchors, \ top_n=20000 * batch_size, \ overlap_threshold=0.7, \ top_n_post_nms=2000) rois, roi_labels, roi_img_ids = sample_rois( rois, roi_img_ids, gt_boxes_list) else: rpn_labels = rpn_bbtargets = rpn_bbwghts = None rois, probs, roi_img_ids = self._stage_one_results(rpn_box, rpn_prob, anchors, \ top_n=6000 * batch_size, \ overlap_threshold=0.7) rois, probs, roi_img_ids = self._thresholding( rois, probs, roi_img_ids, 0.05) # end if-else # TODO: pyramid_roi_align() rcnn_feats = self.pyramid_roi_align(Ps, rois, roi_img_ids) rcnn_logit, rcnn_box = self.rcnn(rcnn_feats) rcnn_prob = F.sigmoid( rcnn_logit) if self.activation == 'sigmoid' else F.softmax( rcnn_logit, dim=-1) rcnn_prob = rcnn_prob.detach() if self.is_training: rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = self.compute_rcnn_targets( ) assert rcnn_labels.size(0) == rois.size(0) == roi_img_ids.size(0), \ 'Dimension mismatch.' else: rcnn_labels = rcnn_bbtargets = rcnn_bbwghts = None return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \ rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts
def main(): # config model and lr num_anchors = len(cfg.anchor_ratios) * len(cfg.anchor_scales[0]) \ if isinstance(cfg.anchor_scales[0], list) else \ len(cfg.anchor_ratios) * len(cfg.anchor_scales) resnet = resnet50 if cfg.backbone == 'resnet50' else resnet101 detection_model = MaskRCNN if cfg.model_type.lower( ) == 'maskrcnn' else RetinaNet model = detection_model(resnet(pretrained=True), num_classes=cfg.num_classes, num_anchors=num_anchors, strides=cfg.strides, in_channels=cfg.in_channels, f_keys=cfg.f_keys, num_channels=256, is_training=False, activation=cfg.class_activation) lr = cfg.lr start_epoch = 0 if cfg.restore is not None: meta = load_net(cfg.restore, model) print(meta) if meta[0] >= 0: start_epoch = meta[0] + 1 lr = meta[1] print('Restored from %s, starting from %d epoch, lr:%.6f' % (cfg.restore, start_epoch, lr)) else: raise ValueError('restore is not set') model.cuda() model.eval() ANCHORS = np.vstack( [anc.reshape([-1, 4]) for anc in test_data.dataset.ANCHORS]) model.anchors = everything2cuda(ANCHORS.astype(np.float32)) class_names = test_data.dataset.classes print('dataset len: {}'.format(len(test_data.dataset))) tb_dir = os.path.join(cfg.train_dir, cfg.backbone + '_' + cfg.datasetname, 'test', time.strftime("%h%d_%H")) writer = tbx.FileWriter(tb_dir) summary_out = [] # main loop timer_all = Timer() timer_post = Timer() all_results1 = [] all_results2 = [] all_results_gt = [] for step, batch in enumerate(test_data): timer_all.tic() # NOTE: Targets is in NHWC order!! input, image_ids, gt_boxes_list, image_ori = batch input = everything2cuda(input) outs = model(input) timer_post.tic() dets_dict = model.get_final_results( score_threshold=0.05, max_dets=cfg.max_det_num * cfg.batch_size, overlap_threshold=cfg.overlap_threshold) if 'stage1' in dets_dict: Dets = dets_dict['stage1'] else: raise ValueError('No stage1 results:', dets_dict.keys()) Dets2 = dets_dict['stage2'] if 'stage2' in dets_dict else Dets t3 = timer_post.toc() t = timer_all.toc() formal_res1 = dataset.to_detection_format( copy.deepcopy(Dets), image_ids, ori_sizes=[im.shape for im in image_ori]) formal_res2 = dataset.to_detection_format( copy.deepcopy(Dets2), image_ids, ori_sizes=[im.shape for im in image_ori]) all_results1 += formal_res1 all_results2 += formal_res2 if step % cfg.log_image == 0: input_np = everything2numpy(input) summary_out = [] Is = single_shot.draw_detection(input_np, Dets, class_names=class_names) Is = Is.astype(np.uint8) summary_out += log_images(Is, image_ids, step, prefix='Detection/') Is = single_shot.draw_detection(input_np, Dets2, class_names=class_names) Is = Is.astype(np.uint8) summary_out += log_images(Is, image_ids, step, prefix='Detection2/') Imgs = single_shot.draw_gtboxes(input_np, gt_boxes_list, class_names=class_names) Imgs = Imgs.astype(np.uint8) summary_out += log_images(Imgs, image_ids, float(step), prefix='GT') for s in summary_out: writer.add_summary(s, float(step)) if step % cfg.display == 0: print(time.strftime("%H:%M:%S ") + 'Epoch %d iter %d: speed %.3fs (%.3fs)' % (0, step, t, t3) + ' ImageIds: ' + ', '.join(str(s) for s in image_ids), end='\r') res_dict = { 'stage1': all_results1, 'stage2': all_results2, 'gt': all_results_gt } return res_dict