def forward(self, x): # prepare probs, x_reg_pred, img_info = x batch_size = probs.size(0) feature_height, feature_width = probs.size(2), probs.size(3) # get bg and fg probs bg_probs = probs[:, :self.num_anchors, :, :] fg_probs = probs[:, self.num_anchors:, :, :] # get shift shift_x = np.arange(0, feature_width) * self.feature_stride shift_y = np.arange(0, feature_height) * self.feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(fg_probs).float() # get anchors anchors = self.anchors.type_as(fg_probs) anchors = anchors.view(1, self.num_anchors, 4) + shifts.view( shifts.size(0), 1, 4) anchors = anchors.view(1, self.num_anchors * shifts.size(0), 4).expand(batch_size, self.num_anchors * shifts.size(0), 4) # format x_reg_pred bbox_deltas = x_reg_pred.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # format fg_probs fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous() fg_probs = fg_probs.view(batch_size, -1) # convert anchors to proposals proposals = BBoxFunctions.anchors2Proposals(anchors, bbox_deltas) # clip predicted boxes to image proposals = BBoxFunctions.clipBoxes(proposals, img_info) # do nms scores = fg_probs _, order = torch.sort(scores, 1, True) output = scores.new(batch_size, self.post_nms_topN, 5).zero_() for i in range(batch_size): proposals_single = proposals[i] scores_single = scores[i] order_single = order[i] if self.pre_nms_topN > 0 and self.pre_nms_topN < scores.numel(): order_single = order_single[:self.pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) _, keep_idxs = nms(torch.cat((proposals_single, scores_single), 1), self.nms_thresh) keep_idxs = keep_idxs.long().view(-1) if self.post_nms_topN > 0: keep_idxs = keep_idxs[:self.post_nms_topN] proposals_single = proposals_single[keep_idxs, :] scores_single = scores_single[keep_idxs, :] num_proposals = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposals, 1:] = proposals_single return output
def forward(self, x): # parse x probs_list, x_reg_list, rpn_features_shapes, img_info = x # obtain proposals batch_size = probs_list[0].size(0) outputs = probs_list[0].new(batch_size, self.post_nms_topN, 5).zero_() for i in range(batch_size): output = [] for probs, x_reg, rpn_features_shape, anchor_generator, feature_stride in zip(probs_list, x_reg_list, rpn_features_shapes, self.anchor_generators, self.feature_strides): # --get fg probs fg_probs = probs[i, :, 0] # --get anchors anchors = anchor_generator.generate(feature_shape=rpn_features_shape, feature_stride=feature_stride, device=fg_probs.device).type_as(fg_probs) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4) # --format x_reg bbox_deltas = x_reg[i:i+1, ...] # --convert anchors to proposals proposals = BBoxFunctions.anchors2Proposals(anchors, bbox_deltas) # --clip predicted boxes to image proposals = BBoxFunctions.clipBoxes(proposals, img_info[i:i+1, ...]) # --do nms proposals = proposals[0] scores = fg_probs _, order = torch.sort(scores, 0, True) if self.pre_nms_topN > 0 and self.pre_nms_topN < scores.numel(): order = order[:self.pre_nms_topN] proposals = proposals[order] scores = scores[order].view(-1, 1) proposals = torch.cat((proposals, scores), dim=-1) _, keep_idxs = nms(proposals, self.nms_thresh) if self.post_nms_topN > 0: keep_idxs = keep_idxs[:self.post_nms_topN] proposals = proposals[keep_idxs] output.append(proposals) # --merge multi-level proposals output = torch.cat(output, dim=0) _, order = torch.sort(output[:, 4], 0, True) if (output.size(0) > self.post_nms_topN) and (self.post_nms_topN > 0): order = order[:self.post_nms_topN] output = output[order] proposals = output[:, :4] num_proposals = proposals.size(0) outputs[i, :, 0] = i outputs[i, :num_proposals, 1:] = proposals # return the proposal outputs return outputs
def test(): # prepare base things args = parseArgs() cfg, cfg_file_path = getCfgByDatasetAndBackbone( datasetname=args.datasetname, backbonename=args.backbonename) checkDir(cfg.TEST_BACKUPDIR) logger_handle = Logger(cfg.TEST_LOGFILE) use_cuda = torch.cuda.is_available() clsnames = loadclsnames(cfg.CLSNAMESPATH) # prepare dataset if args.datasetname == 'coco': dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=-1, use_color_jitter=False, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, mode='TEST', datasettype=args.datasettype, annfilepath=args.annfilepath) else: raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) # prepare model if args.backbonename.find('resnet') != -1: model = RetinanetFPNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) else: raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) if use_cuda: model = model.cuda() # load checkpoints checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) model.load_state_dict(checkpoints['model']) model.eval() # test mAP FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor results = [] img_ids = [] for batch_idx, samples in enumerate(dataloader): logger_handle.info('detect %s/%s...' % (batch_idx + 1, len(dataloader))) # --do detect img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples img_id, w_ori, h_ori, scale_factor = int( img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item() img_ids.append(img_id) with torch.no_grad(): output = model(x=img.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor)) anchors = output[0].data.view(1, -1, 4) preds_cls = output[1].data preds_reg = output[2].data # --parse the results preds_reg = preds_reg.view(-1, 4) * torch.FloatTensor( cfg.BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor( cfg.BBOX_NORMALIZE_MEANS).type(FloatTensor) preds_reg = preds_reg.view(1, -1, 4) boxes_pred = BBoxFunctions.decodeBboxes(anchors, preds_reg) boxes_pred = BBoxFunctions.clipBoxes( boxes_pred, torch.from_numpy( np.array( [h_ori * scale_factor, w_ori * scale_factor, scale_factor])).unsqueeze(0).type(FloatTensor).data) boxes_pred = boxes_pred.squeeze() scores = preds_cls.squeeze() thresh = 0.05 for j in range(cfg.NUM_CLASSES - 1): idxs = torch.nonzero(scores[:, j] > thresh).view(-1) if idxs.numel() > 0: cls_scores = scores[:, j][idxs] _, order = torch.sort(cls_scores, 0, True) cls_boxes = boxes_pred[idxs, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] cls_dets, _ = nms(cls_dets, args.nmsthresh) for cls_det in cls_dets: category_id = dataset.clsids2cococlsids_dict.get(j) x1, y1, x2, y2, score = cls_det x1 = x1.item() / scale_factor x2 = x2.item() / scale_factor y1 = y1.item() / scale_factor y2 = y2.item() / scale_factor bbox = [x1, y1, x2, y2] bbox[2] = bbox[2] - bbox[0] bbox[3] = bbox[3] - bbox[1] image_result = { 'image_id': img_id, 'category_id': int(category_id), 'score': float(score.item()), 'bbox': bbox } results.append(image_result) json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4) if args.datasettype in ['val2017']: dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH)
def demo(): # prepare base things args = parseArgs() cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename) checkDir(cfg.TEST_BACKUPDIR) logger_handle = Logger(cfg.TEST_LOGFILE) use_cuda = torch.cuda.is_available() clsnames = loadclsnames(cfg.CLSNAMESPATH) # prepare model if args.backbonename.find('resnet') != -1: model = FasterRCNNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) else: raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) if use_cuda: model = model.cuda() # load checkpoints checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) model.load_state_dict(checkpoints['model']) model.eval() # do detect FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor img = Image.open(args.imagepath) if args.datasetname == 'coco': input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL) else: raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) input_img = input_img.unsqueeze(0).type(FloatTensor) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor) img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor) num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor) with torch.no_grad(): output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes) rois = output[0].data[..., 1:5] cls_probs = output[1].data bbox_preds = output[2].data # parse the results if cfg.IS_CLASS_AGNOSTIC: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES) boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas) boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data) boxes_pred = boxes_pred.squeeze() scores = cls_probs.squeeze() thresh = 0.05 for j in range(1, cfg.NUM_CLASSES): idxs = torch.nonzero(scores[:, j] > thresh).view(-1) if idxs.numel() > 0: cls_scores = scores[:, j][idxs] _, order = torch.sort(cls_scores, 0, True) if cfg.IS_CLASS_AGNOSTIC: cls_boxes = boxes_pred[idxs, :] else: cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] _, keep_idxs = nms(cls_dets, args.nmsthresh) cls_dets = cls_dets[keep_idxs.view(-1).long()] for cls_det in cls_dets: if cls_det[-1] > args.confthresh: x1, y1, x2, y2 = cls_det[:4] x1 = x1.item() / scale_factor x2 = x2.item() / scale_factor y1 = y1.item() / scale_factor y2 = y2.item() / scale_factor label = clsnames[j-1] logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item())) color = (0, 255, 0) draw = ImageDraw.Draw(img) draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color) font = ImageFont.truetype('libs/font.TTF', 25) draw.text((x1+5, y1), label, fill=color, font=font) img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg'))