def demo(): # prepare base things args = parseArgs() cfg, cfg_file_path = getCfgByDatasetAndBackbone(datasetname=args.datasetname, backbonename=args.backbonename) checkDir(cfg.TEST_BACKUPDIR) logger_handle = Logger(cfg.TEST_LOGFILE) use_cuda = torch.cuda.is_available() clsnames = loadclsnames(cfg.CLSNAMESPATH) # prepare model if args.backbonename.find('resnet') != -1: model = FasterRCNNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) else: raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) if use_cuda: model = model.cuda() # load checkpoints checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) model.load_state_dict(checkpoints['model']) model.eval() # do detect FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor img = Image.open(args.imagepath) if args.datasetname == 'coco': input_img, scale_factor, target_size = COCODataset.preprocessImage(img, use_color_jitter=False, image_size_dict=cfg.IMAGESIZE_DICT, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL) else: raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) input_img = input_img.unsqueeze(0).type(FloatTensor) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 0]).unsqueeze(0).type(FloatTensor) img_info = torch.from_numpy(np.array([target_size[0], target_size[1], scale_factor])).unsqueeze(0).type(FloatTensor) num_gt_boxes = torch.FloatTensor([0]).unsqueeze(0).type(FloatTensor) with torch.no_grad(): output = model(x=input_img, gt_boxes=gt_boxes, img_info=img_info, num_gt_boxes=num_gt_boxes) rois = output[0].data[..., 1:5] cls_probs = output[1].data bbox_preds = output[2].data # parse the results if cfg.IS_CLASS_AGNOSTIC: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_STDS).type(FloatTensor) + torch.FloatTensor(cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4*cfg.NUM_CLASSES) boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas) boxes_pred = BBoxFunctions.clipBoxes(boxes_pred, img_info.data) boxes_pred = boxes_pred.squeeze() scores = cls_probs.squeeze() thresh = 0.05 for j in range(1, cfg.NUM_CLASSES): idxs = torch.nonzero(scores[:, j] > thresh).view(-1) if idxs.numel() > 0: cls_scores = scores[:, j][idxs] _, order = torch.sort(cls_scores, 0, True) if cfg.IS_CLASS_AGNOSTIC: cls_boxes = boxes_pred[idxs, :] else: cls_boxes = boxes_pred[idxs][:, j*4: (j+1)*4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] _, keep_idxs = nms(cls_dets, args.nmsthresh) cls_dets = cls_dets[keep_idxs.view(-1).long()] for cls_det in cls_dets: if cls_det[-1] > args.confthresh: x1, y1, x2, y2 = cls_det[:4] x1 = x1.item() / scale_factor x2 = x2.item() / scale_factor y1 = y1.item() / scale_factor y2 = y2.item() / scale_factor label = clsnames[j-1] logger_handle.info('Detect a %s in confidence %.4f...' % (label, cls_det[-1].item())) color = (0, 255, 0) draw = ImageDraw.Draw(img) draw.line([(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)], width=2, fill=color) font = ImageFont.truetype('libs/font.TTF', 25) draw.text((x1+5, y1), label, fill=color, font=font) img.save(os.path.join(cfg.TEST_BACKUPDIR, 'demo_output.jpg'))
def train(): # prepare base things args = parseArgs() cfg, cfg_file_path = getCfgByDatasetAndBackbone( datasetname=args.datasetname, backbonename=args.backbonename) checkDir(cfg.TRAIN_BACKUPDIR) logger_handle = Logger(cfg.TRAIN_LOGFILE) use_cuda = torch.cuda.is_available() is_multi_gpus = cfg.IS_MULTI_GPUS if is_multi_gpus: assert use_cuda # prepare dataset if args.datasetname == 'coco': dataset = COCODataset( rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=cfg.MAX_NUM_GT_BOXES, use_color_jitter=cfg.USE_COLOR_JITTER, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TRAIN', datasettype='train2017') dataloader = torch.utils.data.DataLoader( dataset, batch_size=cfg.BATCHSIZE, sampler=NearestRatioRandomSampler(dataset.img_ratios, cfg.BATCHSIZE), num_workers=cfg.NUM_WORKERS, collate_fn=COCODataset.paddingCollateFn, pin_memory=cfg.PIN_MEMORY) else: raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) # prepare model if args.backbonename.find('resnet') != -1: model = FasterRCNNResNets(mode='TRAIN', cfg=cfg, logger_handle=logger_handle) else: raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) start_epoch = 1 end_epoch = cfg.MAX_EPOCHS if use_cuda: model = model.cuda() # prepare optimizer learning_rate_idx = 0 if cfg.IS_USE_WARMUP: learning_rate = cfg.LEARNING_RATES[learning_rate_idx] / 3 else: learning_rate = cfg.LEARNING_RATES[learning_rate_idx] optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY) # check checkpoints path if args.checkpointspath: checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) model.load_state_dict(checkpoints['model']) optimizer.load_state_dict(checkpoints['optimizer']) start_epoch = checkpoints['epoch'] + 1 for epoch in range(1, start_epoch): if epoch in cfg.LR_ADJUST_EPOCHS: learning_rate_idx += 1 # data parallel if is_multi_gpus: model = nn.DataParallel(model) # print config logger_handle.info('Dataset used: %s, Number of images: %s' % (args.datasetname, len(dataset))) logger_handle.info('Backbone used: %s' % args.backbonename) logger_handle.info('Checkpoints used: %s' % args.checkpointspath) logger_handle.info('Config file used: %s' % cfg_file_path) # train FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor for epoch in range(start_epoch, end_epoch + 1): # --set train mode if is_multi_gpus: model.module.setTrain() else: model.setTrain() # --adjust learning rate if epoch in cfg.LR_ADJUST_EPOCHS: learning_rate_idx += 1 adjustLearningRate(optimizer=optimizer, target_lr=cfg.LEARNING_RATES[learning_rate_idx], logger_handle=logger_handle) # --log info logger_handle.info('Start epoch %s, learning rate is %s...' % (epoch, cfg.LEARNING_RATES[learning_rate_idx])) # --train epoch for batch_idx, samples in enumerate(dataloader): if (epoch == 1) and (cfg.IS_USE_WARMUP) and ( batch_idx == cfg.NUM_WARMUP_STEPS): assert learning_rate_idx == 0, 'BUGS may exist...' adjustLearningRate( optimizer=optimizer, target_lr=cfg.LEARNING_RATES[learning_rate_idx], logger_handle=logger_handle) optimizer.zero_grad() img_ids, imgs, gt_boxes, img_info, num_gt_boxes = samples output = model(x=imgs.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor)) rois, cls_probs, bbox_preds, rpn_cls_loss, rpn_reg_loss, loss_cls, loss_reg = output loss = rpn_cls_loss.mean() + rpn_reg_loss.mean() + loss_cls.mean( ) + loss_reg.mean() logger_handle.info('[EPOCH]: %s/%s, [BTACH]: %s/%s, [LEARNING_RATE]: %s, [DATASET]: %s \n\t [LOSS]: rpn_cls_loss %.4f, rpn_reg_loss %.4f, loss_cls %.4f, loss_reg %.4f, total %.4f' % \ (epoch, end_epoch, (batch_idx+1), len(dataloader), cfg.LEARNING_RATES[learning_rate_idx], args.datasetname, rpn_cls_loss.mean().item(), rpn_reg_loss.mean().item(), loss_cls.mean().item(), loss_reg.mean().item(), loss.item())) loss.backward() optimizer.step() # --save model if (epoch % cfg.SAVE_INTERVAL == 0) or (epoch == end_epoch): state_dict = { 'epoch': epoch, 'model': model.module.state_dict() if is_multi_gpus else model.state_dict(), 'optimizer': optimizer.state_dict() } savepath = os.path.join(cfg.TRAIN_BACKUPDIR, 'epoch_%s.pth' % epoch) saveCheckpoints(state_dict, savepath, logger_handle)
def test(): # prepare base things args = parseArgs() cfg, cfg_file_path = getCfgByDatasetAndBackbone( datasetname=args.datasetname, backbonename=args.backbonename) checkDir(cfg.TEST_BACKUPDIR) logger_handle = Logger(cfg.TEST_LOGFILE) use_cuda = torch.cuda.is_available() clsnames = loadclsnames(cfg.CLSNAMESPATH) # prepare dataset if args.datasetname == 'coco': dataset = COCODataset( rootdir=cfg.DATASET_ROOT_DIR, image_size_dict=cfg.IMAGESIZE_DICT, max_num_gt_boxes=-1, use_color_jitter=False, img_norm_info=cfg.IMAGE_NORMALIZE_INFO, use_caffe_pretrained_model=cfg.USE_CAFFE_PRETRAINED_MODEL, mode='TEST', datasettype=args.datasettype, annfilepath=args.annfilepath) else: raise ValueError('Unsupport datasetname <%s> now...' % args.datasetname) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) # prepare model if args.backbonename.find('resnet') != -1: model = FasterRCNNResNets(mode='TEST', cfg=cfg, logger_handle=logger_handle) else: raise ValueError('Unsupport backbonename <%s> now...' % args.backbonename) if use_cuda: model = model.cuda() # load checkpoints checkpoints = loadCheckpoints(args.checkpointspath, logger_handle) model.load_state_dict(checkpoints['model']) model.eval() # test mAP FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor results = [] img_ids = [] for batch_idx, samples in enumerate(dataloader): logger_handle.info('detect %s/%s...' % (batch_idx + 1, len(dataloader))) # --do detect img_id, img, w_ori, h_ori, gt_boxes, img_info, num_gt_boxes = samples img_id, w_ori, h_ori, scale_factor = int( img_id.item()), w_ori.item(), h_ori.item(), img_info[0][-1].item() img_ids.append(img_id) with torch.no_grad(): output = model(x=img.type(FloatTensor), gt_boxes=gt_boxes.type(FloatTensor), img_info=img_info.type(FloatTensor), num_gt_boxes=num_gt_boxes.type(FloatTensor)) rois = output[0].data[..., 1:5] cls_probs = output[1].data bbox_preds = output[2].data # --parse the results if cfg.IS_CLASS_AGNOSTIC: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor( cfg.TEST_BBOX_NORMALIZE_STDS).type( FloatTensor) + torch.FloatTensor( cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = bbox_preds.view(-1, 4) * torch.FloatTensor( cfg.TEST_BBOX_NORMALIZE_STDS).type( FloatTensor) + torch.FloatTensor( cfg.TEST_BBOX_NORMALIZE_MEANS).type(FloatTensor) box_deltas = box_deltas.view(1, -1, 4 * cfg.NUM_CLASSES) boxes_pred = BBoxFunctions.decodeBboxes(rois, box_deltas) boxes_pred = BBoxFunctions.clipBoxes( boxes_pred, torch.from_numpy( np.array( [h_ori * scale_factor, w_ori * scale_factor, scale_factor])).unsqueeze(0).type(FloatTensor).data) boxes_pred = boxes_pred.squeeze() scores = cls_probs.squeeze() thresh = 0.05 for j in range(1, cfg.NUM_CLASSES): idxs = torch.nonzero(scores[:, j] > thresh).view(-1) if idxs.numel() > 0: cls_scores = scores[:, j][idxs] _, order = torch.sort(cls_scores, 0, True) if cfg.IS_CLASS_AGNOSTIC: cls_boxes = boxes_pred[idxs, :] else: cls_boxes = boxes_pred[idxs][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] _, keep_idxs = nms(cls_dets, args.nmsthresh) cls_dets = cls_dets[keep_idxs.view(-1).long()] for cls_det in cls_dets: category_id = dataset.clsids2cococlsids_dict.get(j) x1, y1, x2, y2, score = cls_det x1 = x1.item() / scale_factor x2 = x2.item() / scale_factor y1 = y1.item() / scale_factor y2 = y2.item() / scale_factor bbox = [x1, y1, x2, y2] bbox[2] = bbox[2] - bbox[0] bbox[3] = bbox[3] - bbox[1] image_result = { 'image_id': img_id, 'category_id': int(category_id), 'score': float(score.item()), 'bbox': bbox } results.append(image_result) json.dump(results, open(cfg.TEST_BBOXES_SAVE_PATH, 'w'), indent=4) if args.datasettype in ['val2017']: dataset.doDetectionEval(img_ids, cfg.TEST_BBOXES_SAVE_PATH)