示例#1
0
 def forward(self, x):
     # prepare
     probs, x_reg_pred, img_info = x
     batch_size = probs.size(0)
     feature_height, feature_width = probs.size(2), probs.size(3)
     # get bg and fg probs
     bg_probs = probs[:, :self.num_anchors, :, :]
     fg_probs = probs[:, self.num_anchors:, :, :]
     # get shift
     shift_x = np.arange(0, feature_width) * self.feature_stride
     shift_y = np.arange(0, feature_height) * self.feature_stride
     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
     shifts = torch.from_numpy(
         np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                    shift_y.ravel())).transpose())
     shifts = shifts.contiguous().type_as(fg_probs).float()
     # get anchors
     anchors = self.anchors.type_as(fg_probs)
     anchors = anchors.view(1, self.num_anchors, 4) + shifts.view(
         shifts.size(0), 1, 4)
     anchors = anchors.view(1, self.num_anchors * shifts.size(0),
                            4).expand(batch_size,
                                      self.num_anchors * shifts.size(0), 4)
     # format x_reg_pred
     bbox_deltas = x_reg_pred.permute(0, 2, 3, 1).contiguous()
     bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
     # format fg_probs
     fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous()
     fg_probs = fg_probs.view(batch_size, -1)
     # convert anchors to proposals
     proposals = BBoxFunctions.anchors2Proposals(anchors, bbox_deltas)
     # clip predicted boxes to image
     proposals = BBoxFunctions.clipBoxes(proposals, img_info)
     # do nms
     scores = fg_probs
     _, order = torch.sort(scores, 1, True)
     output = scores.new(batch_size, self.post_nms_topN, 5).zero_()
     for i in range(batch_size):
         proposals_single = proposals[i]
         scores_single = scores[i]
         order_single = order[i]
         if self.pre_nms_topN > 0 and self.pre_nms_topN < scores.numel():
             order_single = order_single[:self.pre_nms_topN]
         proposals_single = proposals_single[order_single, :]
         scores_single = scores_single[order_single].view(-1, 1)
         _, keep_idxs = nms(torch.cat((proposals_single, scores_single), 1),
                            self.nms_thresh)
         keep_idxs = keep_idxs.long().view(-1)
         if self.post_nms_topN > 0:
             keep_idxs = keep_idxs[:self.post_nms_topN]
         proposals_single = proposals_single[keep_idxs, :]
         scores_single = scores_single[keep_idxs, :]
         num_proposals = proposals_single.size(0)
         output[i, :, 0] = i
         output[i, :num_proposals, 1:] = proposals_single
     return output
示例#2
0
	def forward(self, x):
		# parse x
		probs_list, x_reg_list, rpn_features_shapes, img_info = x
		# obtain proposals
		batch_size = probs_list[0].size(0)
		outputs = probs_list[0].new(batch_size, self.post_nms_topN, 5).zero_()
		for i in range(batch_size):
			output = []
			for probs, x_reg, rpn_features_shape, anchor_generator, feature_stride in zip(probs_list, x_reg_list, rpn_features_shapes, self.anchor_generators, self.feature_strides):
				# --get fg probs
				fg_probs = probs[i, :, 0]
				# --get anchors
				anchors = anchor_generator.generate(feature_shape=rpn_features_shape, feature_stride=feature_stride, device=fg_probs.device).type_as(fg_probs)
				num_anchors = anchors.size(0)
				anchors = anchors.view(1, num_anchors, 4)
				# --format x_reg
				bbox_deltas = x_reg[i:i+1, ...]
				# --convert anchors to proposals
				proposals = BBoxFunctions.anchors2Proposals(anchors, bbox_deltas)
				# --clip predicted boxes to image
				proposals = BBoxFunctions.clipBoxes(proposals, img_info[i:i+1, ...])
				# --do nms
				proposals = proposals[0]
				scores = fg_probs
				_, order = torch.sort(scores, 0, True)
				if self.pre_nms_topN > 0 and self.pre_nms_topN < scores.numel():
					order = order[:self.pre_nms_topN]
				proposals = proposals[order]
				scores = scores[order].view(-1, 1)
				proposals = torch.cat((proposals, scores), dim=-1)
				_, keep_idxs = nms(proposals, self.nms_thresh)
				if self.post_nms_topN > 0:
					keep_idxs = keep_idxs[:self.post_nms_topN]
				proposals = proposals[keep_idxs]
				output.append(proposals)
			# --merge multi-level proposals
			output = torch.cat(output, dim=0)
			_, order = torch.sort(output[:, 4], 0, True)
			if (output.size(0) > self.post_nms_topN) and (self.post_nms_topN > 0):
				order = order[:self.post_nms_topN]
			output = output[order]
			proposals = output[:, :4]
			num_proposals = proposals.size(0)
			outputs[i, :, 0] = i
			outputs[i, :num_proposals, 1:] = proposals
		# return the proposal outputs
		return outputs
示例#3
0
def test():
    # prepare base things
    args = parseArgs()
    cfg, cfg_file_path = getCfgByDatasetAndBackbone(
        datasetname=args.datasetname, backbonename=args.backbonename)
    checkDir(cfg.TEST_BACKUPDIR)
    logger_handle = Logger(cfg.TEST_LOGFILE)
    use_cuda = torch.cuda.is_available()
    clsnames = loadclsnames(cfg.CLSNAMESPATH)
    # prepare dataset
    if args.datasetname == 'coco':
        dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR,
                              image_size_dict=cfg.IMAGESIZE_DICT,
                              max_num_gt_boxes=-1,
                              use_color_jitter=False,
                              img_norm_info=cfg.IMAGE_NORMALIZE_INFO,
                              mode='TEST',
                              datasettype=args.datasettype,
                              annfilepath=args.annfilepath)
    else:
        raise ValueError('Unsupport datasetname <%s> now...' %
                         args.datasetname)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0)
    # prepare model
    if args.backbonename.find('resnet') != -1:
        model = RetinanetFPNResNets(mode='TEST',
                                    cfg=cfg,
                                    logger_handle=logger_handle)
    else:
        raise ValueError('Unsupport backbonename <%s> now...' %
                         args.backbonename)
    if use_cuda:
        model = model.cuda()
    # load checkpoints
    checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
    model.load_state_dict(checkpoints['model'])
    model.eval()
    # test mAP
    FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
    results = []
    img_ids = []
    for batch_idx, samples in enumerate(dataloader):
        logger_handle.info('detect %s/%s...' %
                           (batch_idx + 1, len(dataloader)))
        # --do detect
        img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples
        img_id, w_ori, h_ori, scale_factor = int(
            img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item()
        img_ids.append(img_id)
        with torch.no_grad():
            output = model(x=img.type(FloatTensor),
                           gt_boxes=gt_boxes.type(FloatTensor),
                           img_info=img_info.type(FloatTensor),
                           num_gt_boxes=num_gt_boxes.type(FloatTensor))
        anchors = output[0].data.view(1, -1, 4)
        preds_cls = output[1].data
        preds_reg = output[2].data
        # --parse the results
        preds_reg = preds_reg.view(-1, 4) * torch.FloatTensor(
            cfg.BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(
                cfg.BBOX_NORMALIZE_MEANS).type(FloatTensor)
        preds_reg = preds_reg.view(1, -1, 4)
        boxes_pred = BBoxFunctions.decodeBboxes(anchors, preds_reg)
        boxes_pred = BBoxFunctions.clipBoxes(
            boxes_pred,
            torch.from_numpy(
                np.array(
                    [h_ori * scale_factor, w_ori * scale_factor,
                     scale_factor])).unsqueeze(0).type(FloatTensor).data)
        boxes_pred = boxes_pred.squeeze()
        scores = preds_cls.squeeze()
        thresh = 0.05
        for j in range(cfg.NUM_CLASSES - 1):
            idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
            if idxs.numel() > 0:
                cls_scores = scores[:, j][idxs]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = boxes_pred[idxs, :]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                cls_dets, _ = nms(cls_dets, args.nmsthresh)
                for cls_det in cls_dets:
                    category_id = dataset.clsids2cococlsids_dict.get(j)
                    x1, y1, x2, y2, score = cls_det
                    x1 = x1.item() / scale_factor
                    x2 = x2.item() / scale_factor
                    y1 = y1.item() / scale_factor
                    y2 = y2.item() / scale_factor
                    bbox = [x1, y1, x2, y2]
                    bbox[2] = bbox[2] - bbox[0]
                    bbox[3] = bbox[3] - bbox[1]
                    image_result = {
                        'image_id': img_id,
                        'category_id': int(category_id),
                        'score': float(score.item()),
                        'bbox': bbox
                    }
                    results.append(image_result)
    json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4)
    if args.datasettype in ['val2017']:
        dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH)
示例#4
0
def demo():
	# prepare base things
	args = parseArgs()
	cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename)
	checkDir(cfg.TEST_BACKUPDIR)
	logger_handle = Logger(cfg.TEST_LOGFILE)
	use_cuda = torch.cuda.is_available()
	clsnames = loadclsnames(cfg.CLSNAMESPATH)
	# prepare model
	if args.backbonename.find('resnet') != -1:
		model = FasterRCNNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle)
	else:
		raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename)
	if use_cuda:
		model = model.cuda()
	# load checkpoints
	checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
	model.load_state_dict(checkpoints['model'])
	model.eval()
	# do detect
	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
	img = Image.open(args.imagepath)
	if args.datasetname == 'coco':
		input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL)
	else:
		raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname)
	input_img = input_img.unsqueeze(0).type(FloatTensor)
	gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor)
	img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor)
	num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor)
	with torch.no_grad():
		output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes)
	rois = output[0].data[..., 1:5]
	cls_probs = output[1].data
	bbox_preds = output[2].data
	# parse the results
	if cfg.IS_CLASS_AGNOSTIC:
		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
		box_deltas = box_deltas.view(1, -1, 4)
	else:
		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
		box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES)
	boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas)
	boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data)
	boxes_pred = boxes_pred.squeeze()
	scores = cls_probs.squeeze()
	thresh = 0.05
	for j in range(1, cfg.NUM_CLASSES):
		idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
		if idxs.numel() > 0:
			cls_scores = scores[:, j][idxs]
			_, order = torch.sort(cls_scores, 0, True)
			if cfg.IS_CLASS_AGNOSTIC:
				cls_boxes = boxes_pred[idxs, :]
			else:
				cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4]
			cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
			cls_dets = cls_dets[order]
			_, keep_idxs = nms(cls_dets, args.nmsthresh)
			cls_dets = cls_dets[keep_idxs.view(-1).long()]
			for cls_det in cls_dets:
				if cls_det[-1] > args.confthresh:
					x1, y1, x2, y2 = cls_det[:4]
					x1 = x1.item() / scale_factor
					x2 = x2.item() / scale_factor
					y1 = y1.item() / scale_factor
					y2 = y2.item() / scale_factor
					label = clsnames[j-1]
					logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item()))
					color = (0, 255, 0)
					draw = ImageDraw.Draw(img)
					draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color)
					font = ImageFont.truetype('libs/font.TTF', 25)
					draw.text((x1+5, y1), label, fill=color, font=font)
	img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg'))