示例#1
0
def test():
    # prepare base things
    args = parseArgs()
    cfg, cfg_file_path = getCfgByDatasetAndBackbone(
        datasetname=args.datasetname, backbonename=args.backbonename)
    checkDir(cfg.TEST_BACKUPDIR)
    logger_handle = Logger(cfg.TEST_LOGFILE)
    use_cuda = torch.cuda.is_available()
    clsnames = loadclsnames(cfg.CLSNAMESPATH)
    # prepare dataset
    if args.datasetname == 'coco':
        dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR,
                              image_size_dict=cfg.IMAGESIZE_DICT,
                              max_num_gt_boxes=-1,
                              use_color_jitter=False,
                              img_norm_info=cfg.IMAGE_NORMALIZE_INFO,
                              mode='TEST',
                              datasettype=args.datasettype,
                              annfilepath=args.annfilepath)
    else:
        raise ValueError('Unsupport datasetname <%s> now...' %
                         args.datasetname)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0)
    # prepare model
    if args.backbonename.find('resnet') != -1:
        model = RetinanetFPNResNets(mode='TEST',
                                    cfg=cfg,
                                    logger_handle=logger_handle)
    else:
        raise ValueError('Unsupport backbonename <%s> now...' %
                         args.backbonename)
    if use_cuda:
        model = model.cuda()
    # load checkpoints
    checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
    model.load_state_dict(checkpoints['model'])
    model.eval()
    # test mAP
    FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
    results = []
    img_ids = []
    for batch_idx, samples in enumerate(dataloader):
        logger_handle.info('detect %s/%s...' %
                           (batch_idx + 1, len(dataloader)))
        # --do detect
        img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples
        img_id, w_ori, h_ori, scale_factor = int(
            img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item()
        img_ids.append(img_id)
        with torch.no_grad():
            output = model(x=img.type(FloatTensor),
                           gt_boxes=gt_boxes.type(FloatTensor),
                           img_info=img_info.type(FloatTensor),
                           num_gt_boxes=num_gt_boxes.type(FloatTensor))
        anchors = output[0].data.view(1, -1, 4)
        preds_cls = output[1].data
        preds_reg = output[2].data
        # --parse the results
        preds_reg = preds_reg.view(-1, 4) * torch.FloatTensor(
            cfg.BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(
                cfg.BBOX_NORMALIZE_MEANS).type(FloatTensor)
        preds_reg = preds_reg.view(1, -1, 4)
        boxes_pred = BBoxFunctions.decodeBboxes(anchors, preds_reg)
        boxes_pred = BBoxFunctions.clipBoxes(
            boxes_pred,
            torch.from_numpy(
                np.array(
                    [h_ori * scale_factor, w_ori * scale_factor,
                     scale_factor])).unsqueeze(0).type(FloatTensor).data)
        boxes_pred = boxes_pred.squeeze()
        scores = preds_cls.squeeze()
        thresh = 0.05
        for j in range(cfg.NUM_CLASSES - 1):
            idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
            if idxs.numel() > 0:
                cls_scores = scores[:, j][idxs]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = boxes_pred[idxs, :]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                cls_dets, _ = nms(cls_dets, args.nmsthresh)
                for cls_det in cls_dets:
                    category_id = dataset.clsids2cococlsids_dict.get(j)
                    x1, y1, x2, y2, score = cls_det
                    x1 = x1.item() / scale_factor
                    x2 = x2.item() / scale_factor
                    y1 = y1.item() / scale_factor
                    y2 = y2.item() / scale_factor
                    bbox = [x1, y1, x2, y2]
                    bbox[2] = bbox[2] - bbox[0]
                    bbox[3] = bbox[3] - bbox[1]
                    image_result = {
                        'image_id': img_id,
                        'category_id': int(category_id),
                        'score': float(score.item()),
                        'bbox': bbox
                    }
                    results.append(image_result)
    json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4)
    if args.datasettype in ['val2017']:
        dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH)
示例#2
0
def train():
    # prepare base things
    args = parseArgs()
    cfg, cfg_file_path = getCfgByDatasetAndBackbone(
        datasetname=args.datasetname, backbonename=args.backbonename)
    checkDir(cfg.TRAIN_BACKUPDIR)
    logger_handle = Logger(cfg.TRAIN_LOGFILE)
    use_cuda = torch.cuda.is_available()
    is_multi_gpus = cfg.IS_MULTI_GPUS
    # prepare dataset
    if args.datasetname == 'coco':
        dataset = COCODataset(rootdir=cfg.DATASET_ROOT_DIR,
                              image_size_dict=cfg.IMAGESIZE_DICT,
                              max_num_gt_boxes=cfg.MAX_NUM_GT_BOXES,
                              use_color_jitter=cfg.USE_COLOR_JITTER,
                              img_norm_info=cfg.IMAGE_NORMALIZE_INFO,
                              mode='TRAIN',
                              datasettype='train2017')
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=cfg.BATCHSIZE,
            sampler=NearestRatioRandomSampler(dataset.img_ratios,
                                              cfg.BATCHSIZE),
            num_workers=cfg.NUM_WORKERS,
            collate_fn=COCODataset.paddingCollateFn,
            pin_memory=cfg.PIN_MEMORY)
    else:
        raise ValueError('Unsupport backbonename <%s> now...' %
                         args.backbonename)
    # prepare model
    if args.backbonename.find('resnet') != -1:
        model = RetinanetFPNResNets(mode='TRAIN',
                                    cfg=cfg,
                                    logger_handle=logger_handle)
    else:
        raise ValueError('Unsupport backbonename <%s> now...' %
                         args.backbonename)
    start_epoch = 1
    end_epoch = cfg.MAX_EPOCHS
    if use_cuda:
        model = model.cuda()
    # prepare optimizer
    learning_rate_idx = 0
    if cfg.IS_USE_WARMUP:
        learning_rate = cfg.LEARNING_RATES[learning_rate_idx] / 3
    else:
        learning_rate = cfg.LEARNING_RATES[learning_rate_idx]
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=learning_rate,
                          momentum=cfg.MOMENTUM,
                          weight_decay=cfg.WEIGHT_DECAY)
    # check checkpoints path
    if args.checkpointspath:
        checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
        model.load_state_dict(checkpoints['model'])
        optimizer.load_state_dict(checkpoints['optimizer'])
        start_epoch = checkpoints['epoch'] + 1
        for epoch in range(1, start_epoch):
            if epoch in cfg.LR_ADJUST_EPOCHS:
                learning_rate_idx += 1
    # data parallel
    if is_multi_gpus:
        model = nn.DataParallel(model)
    # print config
    logger_handle.info('Dataset used: %s, Number of images: %s' %
                       (args.datasetname, len(dataset)))
    logger_handle.info('Backbone used: %s' % args.backbonename)
    logger_handle.info('Checkpoints used: %s' % args.checkpointspath)
    logger_handle.info('Config file used: %s' % cfg_file_path)
    # train
    FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
    for epoch in range(start_epoch, end_epoch + 1):
        # --set train mode
        if is_multi_gpus:
            model.module.setTrain()
        else:
            model.setTrain()
        # --adjust learning rate
        if epoch in cfg.LR_ADJUST_EPOCHS:
            learning_rate_idx += 1
            adjustLearningRate(optimizer=optimizer,
                               target_lr=cfg.LEARNING_RATES[learning_rate_idx],
                               logger_handle=logger_handle)
        # --log info
        logger_handle.info('Start epoch %s, learning rate is %s...' %
                           (epoch, cfg.LEARNING_RATES[learning_rate_idx]))
        # --train epoch
        for batch_idx, samples in enumerate(dataloader):
            if (epoch == 1) and (cfg.IS_USE_WARMUP) and (
                    batch_idx == cfg.NUM_WARMUP_STEPS):
                assert learning_rate_idx == 0, 'BUGS may exist...'
                adjustLearningRate(
                    optimizer=optimizer,
                    target_lr=cfg.LEARNING_RATES[learning_rate_idx],
                    logger_handle=logger_handle)
            optimizer.zero_grad()
            img_ids, imgs, gt_boxes, img_info, num_gt_boxes = samples
            output = model(x=imgs.type(FloatTensor),
                           gt_boxes=gt_boxes.type(FloatTensor),
                           img_info=img_info.type(FloatTensor),
                           num_gt_boxes=num_gt_boxes.type(FloatTensor))
            anchors, preds_cls, preds_reg, loss_cls, loss_reg = output
            loss = loss_cls.mean() + loss_reg.mean()
            logger_handle.info('[EPOCH]: %s/%s, [BATCH]: %s/%s, [LEARNING_RATE]: %s, [DATASET]: %s \n\t [LOSS]: loss_cls %.4f, loss_reg %.4f, total %.4f' % \
                 (epoch, end_epoch, (batch_idx+1), len(dataloader), cfg.LEARNING_RATES[learning_rate_idx], args.datasetname, loss_cls.mean().item(), loss_reg.mean().item(), loss.mean().item()))
            loss.backward()
            clipGradients(model.parameters(), cfg.GRAD_CLIP_MAX_NORM,
                          cfg.GRAD_CLIP_NORM_TYPE)
            optimizer.step()
        # --save model
        if (epoch % cfg.SAVE_INTERVAL == 0) or (epoch == end_epoch):
            state_dict = {
                'epoch':
                epoch,
                'model':
                model.module.state_dict()
                if is_multi_gpus else model.state_dict(),
                'optimizer':
                optimizer.state_dict()
            }
            savepath = os.path.join(cfg.TRAIN_BACKUPDIR,
                                    'epoch_%s.pth' % epoch)
            saveCheckpoints(state_dict, savepath, logger_handle)
示例#3
0
def demo():
	# prepare base things
	args = parseArgs()
	cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename)
	checkDir(cfg.TEST_BACKUPDIR)
	logger_handle = Logger(cfg.TEST_LOGFILE)
	use_cuda = torch.cuda.is_available()
	clsnames = loadclsnames(cfg.CLSNAMESPATH)
	# prepare model
	if args.backbonename.find('resnet') != -1:
		model = FasterRCNNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle)
	else:
		raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename)
	if use_cuda:
		model = model.cuda()
	# load checkpoints
	checkpoints = loadCheckpoints(args.checkpointspath, logger_handle)
	model.load_state_dict(checkpoints['model'])
	model.eval()
	# do detect
	FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
	img = Image.open(args.imagepath)
	if args.datasetname == 'coco':
		input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL)
	else:
		raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname)
	input_img = input_img.unsqueeze(0).type(FloatTensor)
	gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor)
	img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor)
	num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor)
	with torch.no_grad():
		output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes)
	rois = output[0].data[..., 1:5]
	cls_probs = output[1].data
	bbox_preds = output[2].data
	# parse the results
	if cfg.IS_CLASS_AGNOSTIC:
		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
		box_deltas = box_deltas.view(1, -1, 4)
	else:
		box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor)
		box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES)
	boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas)
	boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data)
	boxes_pred = boxes_pred.squeeze()
	scores = cls_probs.squeeze()
	thresh = 0.05
	for j in range(1, cfg.NUM_CLASSES):
		idxs = torch.nonzero(scores[:, j] > thresh).view(-1)
		if idxs.numel() > 0:
			cls_scores = scores[:, j][idxs]
			_, order = torch.sort(cls_scores, 0, True)
			if cfg.IS_CLASS_AGNOSTIC:
				cls_boxes = boxes_pred[idxs, :]
			else:
				cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4]
			cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
			cls_dets = cls_dets[order]
			_, keep_idxs = nms(cls_dets, args.nmsthresh)
			cls_dets = cls_dets[keep_idxs.view(-1).long()]
			for cls_det in cls_dets:
				if cls_det[-1] > args.confthresh:
					x1, y1, x2, y2 = cls_det[:4]
					x1 = x1.item() / scale_factor
					x2 = x2.item() / scale_factor
					y1 = y1.item() / scale_factor
					y2 = y2.item() / scale_factor
					label = clsnames[j-1]
					logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item()))
					color = (0, 255, 0)
					draw = ImageDraw.Draw(img)
					draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color)
					font = ImageFont.truetype('libs/font.TTF', 25)
					draw.text((x1+5, y1), label, fill=color, font=font)
	img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg'))