start = time.time()
  max_per_image = 100

  vis = args.vis

  if vis:
    thresh = 0.05
  else:
    thresh = 0.0

  save_name = 'faster_rcnn_10'
  num_images = len(imdb.image_index)
  all_boxes = [[[] for _ in xrange(num_images)]
               for _ in xrange(imdb.num_classes)]

  output_dir = get_output_dir(imdb, save_name)
  dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                        imdb.num_classes, training=False, normalize = False)
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
                            shuffle=False, num_workers=0,
                            pin_memory=True)

  data_iter = iter(dataloader)

  _t = {'im_detect': time.time(), 'misc': time.time()}
  det_file = os.path.join(output_dir, 'detections.pkl')

  fasterRCNN.eval()
  empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0))
  for i in range(num_images):
示例#2
0
    graphRCNN.eval()  ## evaluation mode

    start = time.time()
    max_per_image = 300  ## todo:??
    # thresh = -np.inf
    thresh = 0.01
    vis = args.vis

    save_name = 'graph_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [
        [[] for _ in xrange(num_images)]  ## todo:??
        for _ in xrange(imdb.num_classes)
    ]

    output_dir = get_output_dir(imdb, save_name)  ## todo??

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                          imdb.num_classes, training=False, normalize = False)

    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=True)  ## todo:pin_memory? shuffle?

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')
def test_model_while_training(fasterRCNN, args):

    # args = parse_args()
    # args = set_dataset_args(args, test=True)
    # np.random.seed(cfg.RNG_SEED)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.TRAIN.USE_FLIPPED = False

    # args.imdbval_name = 'clipart_test'

    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name_target, False)

    # breakpoint()

    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # if args.cuda:
    #   fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    thresh = 0.0

    save_name = args.load_name.split('/')[-1]
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          imdb.num_classes, training=False, normalize = False, path_return=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        #print(data[0].size())
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        # d_pred = d_pred.data
        path = data[4]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        # misc_toc = time.time()

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s \r' \
            .format(i + 1, num_images, detect_time))
        sys.stdout.flush()

    imdb.evaluate_detections(all_boxes, output_dir)
示例#4
0
def eval_result(args, logger, epoch, output_dir):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    args.batch_size = 1
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)

    imdb.competition_mode(on=True)

    load_name = os.path.join(output_dir,
                             'thundernet_epoch_{}.pth'.format(epoch, ))

    layer = int(args.net.split("_")[1])
    _RCNN = snet(imdb.classes,
                 layer,
                 pretrained_path=None,
                 class_agnostic=args.class_agnostic)

    _RCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
    _RCNN.load_state_dict(checkpoint['model'])

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # hm = torch.FloatTensor(1)
    # reg_mask = torch.LongTensor(1)
    # wh = torch.FloatTensor(1)
    # offset = torch.FloatTensor(1)
    # ind = torch.LongTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        # hm = hm.cuda()
        # reg_mask = reg_mask.cuda()
        # wh = wh.cuda()
        # offset = offset.cuda()
        # ind = ind.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)
        # hm = Variable(hm)
        # reg_mask = Variable(reg_mask)
        # wh = Variable(wh)
        # offset = Variable(offset)
        # ind = Variable(ind)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        _RCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = True

    if vis:
        thresh = 0.5
    else:
        thresh = 0.5

    save_name = args.net
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(args.dataset, save_name)
    # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
    #                          imdb.num_classes, training=False, normalize=False)
    # dataset = roibatchLoader(roidb, imdb.num_classes, training=False)
    dataset = Detection(roidb,
                        num_classes=imdb.num_classes,
                        transform=BaseTransform(cfg.TEST.SIZE,
                                                cfg.PIXEL_MEANS),
                        training=False)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    _RCNN.eval()

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):

        data = next(data_iter)

        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])
            # hm.resize_(data[4].size()).copy_(data[4])
            # reg_mask.resize_(data[5].size()).copy_(data[5])
            # wh.resize_(data[6].size()).copy_(data[6])
            # offset.resize_(data[7].size()).copy_(data[7])
            # ind.resize_(data[8].size()).copy_(data[8])

        det_tic = time.time()
        with torch.no_grad():
            time_measure, \
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes,
                               # hm,reg_mask,wh,offset,ind
                               )

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1,
                                                 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        # pred_boxes /= data[1][0][2].item()
        pred_boxes[:, :, 0::2] /= data[1][0][2].item()
        pred_boxes[:, :, 1::2] /= data[1][0][3].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                #keep = gpu_nms(cls_dets.cpu().numpy(), cfg.TEST.NMS)
                #keep = torch.from_numpy(np.array(keep))

                cls_dets_np = cls_dets.cpu().numpy()
                keep = cpu_soft_nms(cls_dets_np,
                                    sigma=0.7,
                                    Nt=0.5,
                                    threshold=0.4,
                                    method=0)
                cls_dets_np = cls_dets_np[keep]

                #cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    vis_detections(im2show, imdb.classes[j],
                                   color_list[j - 1].tolist(), cls_dets_np,
                                   0.6)
                all_boxes[j][i] = cls_dets_np
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write(
            'im_detect: {:d}/{:d} Detect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s) NMS: {:.3f}s\n' \
            .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2],
                    time_measure[3], nms_time))
        sys.stdout.flush()

        if vis and i % 200 == 0 and args.use_tfboard:
            im2show = im2show[:, :, ::-1]
            logger.add_image('pred_image_{}'.format(i),
                             trans.ToTensor()(Image.fromarray(
                                 im2show.astype('uint8'))),
                             global_step=i)

            # cv2.imwrite('result.png', im2show)
            # pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    ap_50 = imdb.evaluate_detections(all_boxes, output_dir)
    logger.add_scalar("map_50", ap_50, global_step=epoch)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
示例#5
0
    catgory = Variable(catgory)
    gt_boxes = Variable(gt_boxes)

    # record time
    start = time.time()

    # visiualization
    vis = args.vis
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0
    max_per_image = 100

    # create output Directory
    output_dir_vu = get_output_dir(
        imdb_vu, "{}-seen{}".format(args.model_type, args.seen))

    fasterRCNN.eval()
    for avg in range(args.average):
        dataset_vu.query_position = avg
        dataloader_vu = torch.utils.data.DataLoader(dataset_vu,
                                                    batch_size=1,
                                                    shuffle=False,
                                                    num_workers=0,
                                                    pin_memory=True)

        data_iter_vu = iter(dataloader_vu)

        # total quantity of testing images, each images include multiple detect class
        num_images_vu = len(imdb_vu.image_index)
        num_detect = len(ratio_index_vu[0])
def validation(val_dataloader, epoch, model_name, val_imdb, args):
    val_imdb.competition_mode(on=True)
    print('Start Validation')
    val_fasterRCNN = resnet(val_imdb.classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    val_fasterRCNN.create_architecture()

    print("load checkpoint %s" % model_name)
    checkpoint = torch.load(model_name)
    val_fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    if args.cuda:
        val_im_data = torch.FloatTensor(1).cuda()
        val_im_info = torch.FloatTensor(1).cuda()
        val_num_boxes = torch.LongTensor(1).cuda()
        val_gt_boxes = torch.FloatTensor(1).cuda()
        val_fasterRCNN.cuda()
        cfg.CUDA = True
    else:
        val_im_data = torch.FloatTensor(1)
        val_im_info = torch.FloatTensor(1)
        val_num_boxes = torch.LongTensor(1)
        val_gt_boxes = torch.FloatTensor(1)

    val_im_data = Variable(val_im_data)
    val_im_info = Variable(val_im_info)
    val_num_boxes = Variable(val_num_boxes)
    val_gt_boxes = Variable(val_gt_boxes)

    start = time.time()
    # 每张图像最大目标检测数量
    max_per_image = 100

    thresh = 0.0

    save_name = 'val_' + args.exp_group
    num_images = len(val_imdb.image_index)
    # 创建[[[],[]...[]],[[],[]...[]]] 1,2,200
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(val_imdb.num_classes)]

    output_dir = get_output_dir(val_imdb, save_name)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    save_dir = os.path.join(output_dir, f"PRCurves_{args.exp_group}")
    os.makedirs(save_dir, exist_ok=True)
    det_file = os.path.join(save_dir, f'epoch_{epoch}_detections.pkl')

    val_fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i, data in enumerate(val_dataloader):
        with torch.no_grad():
            val_im_data.resize_(data[0].size()).copy_(data[0])
            val_im_info.resize_(data[1].size()).copy_(data[1])
            val_gt_boxes.resize_(data[2].size()).copy_(data[2])
            val_num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        val_rois, val_cls_prob, val_bbox_pred, \
        val_rpn_loss_cls, val_rpn_loss_box, val_RCNN_loss_cls, \
        val_RCNN_loss_bbox, val_rois_label = val_fasterRCNN(val_im_data, val_im_info, val_gt_boxes, val_num_boxes)

        scores = val_cls_prob.data
        boxes = val_rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = val_bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(val_imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, val_im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, val_imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([
                all_boxes[j][i][:, -1] for j in range(1, val_imdb.num_classes)
            ])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, val_imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format(
            i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map = val_imdb.evaluate_detections(all_boxes, epoch, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))

    return map
示例#7
0
    vis = args.vis  #Visualization mode

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    #Init bbox as list of lists (num_class * num_images per class(?) lists)
    all_boxes = [
        [[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)
    ]  # E.g. bounding boxes belonging to j th class and i th image is appended into all_boxes[j][i]

    output_dir = get_output_dir(imdb,
                                save_name)  # Output directory for results
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          imdb.num_classes, training=False, normalize = False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
示例#8
0
def val(epoch, fasterRCNN, cfg):
    print('=== start val in epoch {} ==='.format(epoch))

    # [val set]
    cfg.TRAIN.USE_FLIPPED = False
    cfg.USE_GPU_NMS = args.cuda
    imdb_val, roidb_val, ratio_list_val, ratio_index_val = combined_roidb(
        args.imdbval_name, False)
    imdb_val.competition_mode(on=True)
    val_size = len(roidb_val)
    print('{:d} val roidb entries'.format(len(roidb_val)))
    cfg.TRAIN.USE_FLIPPED = True  # change again for training

    # [val dataset]
    dataset_val = roibatchLoader(roidb_val, ratio_list_val, ratio_index_val, 1, \
                                 imdb_val.num_classes, training=False, normalize_as_imagenet=True)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=0)

    # print(' == forcibly insert checkpoint loading == ')
    # load_name = './models/ImgNet_pre/vgg16/coco/train_all/imagenet_0/head_1.pth'
    # print('load {}'.format(load_name))
    # checkpoint = torch.load(load_name)
    # fasterRCNN.load_state_dict(checkpoint['model'])

    output_dir = get_output_dir(imdb_val, 'val_in_training')
    data_iter_val = iter(dataloader_val)
    num_images = len(imdb_val.image_index)
    thresh = 0.0
    max_per_image = 100
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb_val.num_classes)]

    # import ipdb; ipdb.set_trace()
    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):
        data = next(data_iter_val)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred = fasterRCNN(im_data, im_info, gt_boxes,
                                               num_boxes)
        # rois_val, cls_prob_val, bbox_pred_val, \
        # rpn_loss_cls_val, rpn_loss_box_val, \
        # RCNN_loss_cls_val, RCNN_loss_bbox_val, \
        # rois_label_val = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

    print('Evaluating detections')
    mAP = imdb_val.evaluate_detections(all_boxes, output_dir, result_file=None)
    del dataset_val, dataloader_val
    return mAP
            fasterRCNN.cuda()

        max_per_image = 100

        vis = args.vis

        if vis:
            thresh = 0.05
            print("Amb vis")
        else:
            thresh = 0.0
            print("Sense vis")

        save_name = 'faster_rcnn_10'

        output_dir = get_output_dir(imdb, save_name, epoch)
        print(output_dir)
        dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                              imdb.num_classes, training=False, normalize = False)
        num_images = len(dataset)

        all_boxes = [[[] for _ in xrange(num_images)]
                     for _ in xrange(imdb.num_classes)]
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=0,
                                                 pin_memory=True)

        data_iter = iter(dataloader)
示例#10
0
def test_adv(step_size=0.01,
             num_steps=0,
             dataset='coco',
             batch_size=1,
             weights='weights/voc_pretrained.npy',
             save=False,
             grad_cam=False):
    if save:
        p_t1 = 'detect_adv_normal'
        if not os.path.exists(p_t1):
            os.makedirs(p_t1)

    cfg_file = 'cfgs/res50.yml'
    cfg_from_file(cfg_file)
    cfg.POOLING_MODE = 'align'
    cfg.TRAIN.USE_FLIPPED = False
    init_seeds(cfg.RNG_SEED)

    if dataset == "pascal_voc":
        imdb_name = "voc_2007_trainval"
        imdbval_name = "voc_2007_test"
        set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif dataset == "pascal_voc_0712":
        imdb_name = "voc_2007_trainval+voc_2012_trainval"
        imdbval_name = "voc_2007_test"
        set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif dataset == "coco":
        imdb_name = "coco_2014_train+coco_2014_valminusminival"
        imdbval_name = "coco_2014_minival"
        set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif dataset == "imagenet":
        imdb_name = "imagenet_train"
        imdbval_name = "imagenet_val"
        set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif dataset == "vg":
        imdb_name = "vg_150-50-50_minitrain"
        imdbval_name = "vg_150-50-50_minival"
        set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdbval_name,
                                                          training=False)
    imdb.competition_mode(on=True)
    print('{:d} roidb entries'.format(len(roidb)))

    model = resnet(imdb.classes, 50, pretrained=False, class_agnostic=False)
    print("load checkpoint %s" % (weights))
    if weights.endswith('.pt'):
        checkpoint = torch.load(weights)
        checkpoint['model'] = {
            k: v
            for k, v in checkpoint['model'].items()
            if model.state_dict()[k].numel() == v.numel()
        }
        model.load_state_dict(checkpoint['model'], strict=True)
    elif weights.endswith('.npy'):
        checkpoint = np.load(weights, allow_pickle=True).item()
        model_dict = {
            k: torch.from_numpy(checkpoint[k])
            for k in checkpoint.keys() if model.state_dict()[k].numel() ==
            torch.from_numpy(checkpoint[k]).numel()
        }
        model.load_state_dict(model_dict, strict=True)
    # load_state_dict(fpn.state_dict(), checkpoint['state_dict'])
    model.cuda().eval()
    del checkpoint
    print('load model successfully!')
    if not grad_cam:
        for param in model.parameters():
            param.requires_grad = False
    model_adv = PGD(model)

    max_per_image = 100
    vis = False
    thresh = 0.001
    iou_thre = 0.5
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(imdb.classes))]

    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    save_name = 'v1'
    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, \
                             imdb.num_classes, training=False, normalize=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    im_data = torch.FloatTensor(1).cuda()
    im_info = torch.FloatTensor(1).cuda()
    num_boxes = torch.LongTensor(1).cuda()
    gt_boxes = torch.FloatTensor(1).cuda()

    for i in range(num_images):

        data = next(data_iter)

        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        if vis or save:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)

        with torch.enable_grad():
            if num_steps * step_size > 0:
                im_adv = model_adv.adv_sample_infer(im_data,
                                                    im_info,
                                                    gt_boxes,
                                                    num_boxes,
                                                    step_size,
                                                    num_steps=num_steps)
            else:
                im_adv = im_data
            if save:
                file_name = imdb.image_path_at(i).split('/')[-1]

            if grad_cam:
                model.eval()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, conv_output = model(im_adv, im_info, gt_boxes, num_boxes, grad_cam=True)

                one_hot_output = torch.zeros_like(cls_prob)
                one_hot_output[0][:, 0:-1] = 1

                model.zero_grad()
                cls_prob.backward(gradient=one_hot_output, retain_graph=True)
                guided_gradients = model.gradients.cpu().data.numpy()[0]
                target = conv_output.cpu().data.numpy()[0]
                ws = np.mean(guided_gradients,
                             axis=(1, 2))  # take averages for each gradient
                # create empty numpy array for cam
                cam = np.ones(target.shape[1:], dtype=np.float32)
                # multiply each weight with its conv output and then, sum
                for l, w in enumerate(ws):
                    cam += w * target[l, :, :]
                cam = np.maximum(cam, 0)
                cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)
                                             )  # normalize between 0-1
                cam = np.uint8(cam * 255)  # scale between 0-255 to visualize
                im_rgb = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
                cam = np.uint8(
                    Image.fromarray(cam).resize(
                        (im_rgb.shape[1], im_rgb.shape[0]),
                        Image.ANTIALIAS)) / 255
                original_image = Image.fromarray(im_rgb)
                save_class_activation_images(original_image, cam, file_name)


        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = model(im_adv, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(batch_size, -1,
                                     4 * (imdb.num_classes - 1))
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        scores = scores.squeeze()
        pred_boxes /= data[1][0][2].item()
        pred_boxes = pred_boxes.squeeze()
        nms_cfg = {'type': 'nms', 'iou_threshold': iou_thre}
        det_bboxes, det_labels = multiclass_nms(pred_boxes, scores, thresh,
                                                nms_cfg, max_per_image)
        keep = det_bboxes[:, 4] > thresh
        det_bboxes = det_bboxes[keep]
        det_labels = det_labels[keep]

        for j in xrange(0, imdb.num_classes - 1):
            inds = torch.nonzero(det_labels == j, as_tuple=False).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_dets = det_bboxes[inds]
                if vis or save:
                    im2show = vis_detections(im2show,
                                             imdb.classes[j],
                                             cls_dets.cpu().numpy(),
                                             color=colors[int(j)])
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([
                all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes - 1)
            ])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(0, imdb.num_classes - 1):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        if save:
            cv2.imwrite(os.path.join(p_t1, file_name.replace('jpg', 'png')),
                        im2show)
        elif vis:
            cv2.imwrite('result.png', im2show)
        if i % 200 == 0:
            print(i, 'waiting.....')

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    mAP = imdb.evaluate_detections(all_boxes, output_dir)
    return mAP
示例#11
0
def run(args):
    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY
    try:
        xrange  # Python 2
    except NameError:
        xrange = range  # Python 3

    #args = parse_args()

    print('Called with args:')
    print(args)

    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    #pdb.set_trace()
    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          imdb.num_classes, training=False, normalize = False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                # cls_dets = cls_dets[order]
                # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                # cls_dets = cls_dets[keep.view(-1).long()]
                cls_dets = cls_dets[order]
                # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                keep = softnms_cpu_torch(cls_dets)
                # cls_dets = cls_dets[keep.view(-1).long()]
                cls_dets = keep
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            #cv2.imshow('test', im2show)
            #cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
示例#12
0
def main(scene_img_path, query_img_path):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "coco":
        args.imdb_name = "coco_2017_train"
        args.imdbval_name = "coco_2017_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
    args.cfg_file = "cfgs/{}_{}.yml".format(
        args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format(
            args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False)
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False, seen=args.seen)
    # imdb_vs.competition_mode(on=True)
    imdb_vu.competition_mode(on=True)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res50':
        fasterRCNN = resnet(imdb_vu.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        query = query.cuda()
        im_info = im_info.cuda()
        catgory = catgory.cuda()
        gt_boxes = gt_boxes.cuda()

    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    max_per_image = 100

    thresh = 0.05

    # output_dir_vs = get_output_dir(imdb_vs, 'faster_rcnn_seen')
    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')
    all_weight = np.zeros((len(ratio_index_vu[0]), 1024))
    all_times = np.zeros((imdb_vu.num_classes))

    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu.num_classes,
                                training=False,
                                seen=args.seen)
    fasterRCNN.eval()

    avg = 0
    dataset_vu.query_position = avg

    num_images_vu = len(imdb_vu.image_index)

    all_boxes = [[[] for _ in xrange(num_images_vu)]
                 for _ in xrange(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir_vu,
                            'detections_%d_%d.pkl' % (args.seen, avg))
    print(det_file)

    i = 0
    index = 0

    data = [0, 0, 0, 0, 0]
    # version = 'custom'      # coco is completed
    # if version == 'coco':
    #     im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000397133.jpg')
    #     query_im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000007816.jpg')
    #     query_im = crop(query_im, [505.54, 53.01, 543.08, 164.09], size=128)
    # else:
    im = imread(scene_img_path)
    im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR)
    query_im = imread(query_img_path)
    query_im = cv2.resize(query_im,
                          dsize=(640, 480),
                          interpolation=cv2.INTER_LINEAR)
    _im = np.copy(im)
    _query_im = np.copy(query_im)
    # make im_data

    im, im_scale = prep_im_for_blob(im, target_size=600)
    im = torch.tensor(im)
    im = torch.unsqueeze(im, 0)
    im = im.transpose(1, 3)
    im_data = im.transpose(2, 3)

    # make query data

    query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128)
    query_im = torch.tensor(query_im)
    query_im = torch.unsqueeze(query_im, 0)
    query_im = query_im.transpose(1, 3)
    query = query_im.transpose(2, 3)

    im_data = data[0] = im_data.cuda()
    query = data[1] = query.cuda()
    im_info = data[2] = torch.tensor([[600, 899, 1.4052]])
    gt_boxes = data[3] = torch.rand(1, 4, 5)  # don't care
    catgory = data[4] = torch.tensor([1])

    det_tic = time.time()
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, _, RCNN_loss_bbox, \
    rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory)

    # all_weight[data[4],:] = all_weight[data[4],:] + weight.view(-1).detach().cpu().numpy()
    all_weight[i, :] = weight.view(-1).detach().cpu().numpy()
    all_times[data[4]] = all_times[data[4]] + 1

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(1, -1, 4)

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= data[2][0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()

    im2show = np.copy(_im)

    inds = torch.nonzero(scores > thresh).view(-1)
    # if there is det
    if inds.numel() > 0:
        cls_scores = scores[inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = pred_boxes[inds, :]

        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        cls_dets = cls_dets[order]
        keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
        cls_dets = cls_dets[keep.view(-1).long()]
        all_boxes[data[4]][index] = cls_dets.cpu().numpy()

        im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8)
        _im2show = np.concatenate((im2show, _query_im), axis=1)
        plt.imshow(_im2show)
        plt.show()

    # Limit to max_per_image detections *over all classes*
    if max_per_image > 0:
        try:
            image_scores = all_boxes[data[4]][index][:, -1]
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]

                keep = np.where(
                    all_boxes[data[4]][index][:, -1] >= image_thresh)[0]
                all_boxes[data[4]][index] = all_boxes[data[4]][index][keep, :]
        except:
            pass

    misc_toc = time.time()

    o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy()
    o_query *= [0.229, 0.224, 0.225]
    o_query += [0.485, 0.456, 0.406]
    o_query *= 255
    o_query = o_query[:, :, ::-1]

    (h, w, c) = im2show.shape
    o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR)
    o_query = cv2.cvtColor(o_query, cv2.COLOR_BGR2RGB)

    im2show = np.concatenate((im2show, o_query), axis=1)
    im2show = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)

    cv2.imwrite('./test_img/%d.png' % (i), im2show)
    gt_boxes = Variable(gt_boxes)

    # record time
    start = time.time()

    # visiualization
    vis = args.vis
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0
    max_per_image = 100

    # create output Directory
    output_dir_vu = get_output_dir(
        imdb_vu,
        'faster_rcnn_sketch_oneshot_nn_gated_bmm_max_pool_attention_concat_att_incremental_full_model_pyramid_attention_4_levels_addative_fusion_1_13_12370_full'
    )
    # faster_rcnn_image_oneshot_mixup_v2_1_16_8628.pth
    fasterRCNN.eval()
    for avg in range(args.average):
        dataset_vu.query_position = avg
        dataloader_vu = torch.utils.data.DataLoader(dataset_vu,
                                                    batch_size=1,
                                                    shuffle=False,
                                                    num_workers=0,
                                                    pin_memory=True)

        data_iter_vu = iter(dataloader_vu)

        # total quantity of testing images, each images include multiple detect class
        num_images_vu = len(imdb_vu.image_index)
示例#14
0
def get_ready(query_img_path):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "coco":
        args.imdb_name = "coco_2017_train"
        args.imdbval_name = "coco_2017_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
    args.cfg_file = "cfgs/{}_{}.yml".format(
        args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format(
            args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False)
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False, seen=args.seen)
    # imdb_vs.competition_mode(on=True)
    imdb_vu.competition_mode(on=True)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res50':
        fasterRCNN = resnet(imdb_vu.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')

    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu.num_classes,
                                training=False,
                                seen=args.seen)
    fasterRCNN.eval()

    avg = 0
    dataset_vu.query_position = avg

    num_images_vu = len(imdb_vu.image_index)

    all_boxes = [[[] for _ in xrange(num_images_vu)]
                 for _ in xrange(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir_vu,
                            'detections_%d_%d.pkl' % (args.seen, avg))
    print(det_file)

    # make query data
    query_im = imread(query_img_path)
    query_im = cv2.resize(query_im,
                          dsize=(640, 480),
                          interpolation=cv2.INTER_LINEAR)
    _query_im = np.copy(query_im)
    query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128)
    query_im = torch.tensor(query_im)
    query_im = torch.unsqueeze(query_im, 0)
    query_im = query_im.transpose(1, 3)
    query = query_im.transpose(2, 3)
    query = query.cuda()

    return fasterRCNN, all_boxes, query, _query_im
    catgory = Variable(catgory)
    gt_boxes = Variable(gt_boxes)

    # record time
    start = time.time()

    # visiualization
    vis = args.vis
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0
    max_per_image = 100

    # create output Directory
    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')

    fasterRCNN.eval()
    for avg in range(args.average):
        dataset_vu.query_position = avg
        dataloader_vu = torch.utils.data.DataLoader(dataset_vu,
                                                    batch_size=1,
                                                    shuffle=False,
                                                    num_workers=0,
                                                    pin_memory=True)

        data_iter_vu = iter(dataloader_vu)

        # total quantity of testing images, each images include multiple detect class
        num_images_vu = len(imdb_vu.image_index)
        num_detect = len(ratio_index_vu[0])
示例#16
0
    start = time.time()
    max_per_image = 100

    vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(roidb)
    all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(num_classes)]
    #pdb.set_trace()
    output_dir = get_output_dir('valoiddata', save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          num_classes, training=False, normalize = False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
示例#17
0
def evaluator(model, args, evl_rec=False):

    fasterRCNN = model
    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = False

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)

    # These models are pytorch pretrained with RGB channel
    rgb = True if args.net in ('res18', 'res34', 'inception') else False

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
               imdb.num_classes, training=False, normalize = False, rgb=rgb)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)
    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    if evl_rec:
        true_postive, ground_truth = 0.0, 0.0
        recall = AverageMeter()

    for i in range(num_images):

        data = next(data_iter)
        im_data.data.resize_(data[0].size()).copy_(data[0])
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                          + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        if evl_rec:
            # evluate rpn recall only
            boxes_per_img = boxes.squeeze().cpu().numpy() / data[1][0][2].item(
            )
            #pdb.set_trace()
            #TP, GT = evaluate_final_recall(pred_boxes.squeeze().cpu().numpy(), i, imdb, thr=0.5)
            TP, GT = evaluate_recall(boxes_per_img, i, imdb, thr=0.5)
            recall.update(TP, GT)

            sys.stdout.write('TP/GT: {}/{} | Recall: {:.3f} \r'.format(
                TP, GT, recall.avg))
            sys.stdout.flush()
            continue

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
          .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            #cv2.imshow('test', im2show)
            #cv2.waitKey(0)

    if evl_rec:
        print('\r\nThe average rpn recall is: {:.4f}'.format(recall.avg))
        return recall.avg

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    mAP = imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return mAP
示例#18
0
def eval_one_dataloader(save_dir_test_out, dataloader_t, fasterRCNN, device, imdb, target_num=0,
                        class_agnostic=False, thresh=0.0, max_per_image=100, return_ap_class=False):

    save_name = save_dir_test_out + '_test_in_'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    data_iter = iter(dataloader_t)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    #fasterRCNN.training = False
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)

        im_data = data[0].to(device)
        im_info = data[1].to(device)
        gt_boxes = data[2].to(device)
        num_boxes = data[3].to(device)
        with torch.no_grad():
            if isinstance(fasterRCNN, frcnn_htcn) or isinstance(fasterRCNN, frcnn_htcn_m):
                det_tic = time.time()
                rois , cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, target_num=target_num)
            elif isinstance(fasterRCNN, frcnn_saito):
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            else:
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        # d_pred = d_pred.data
        # path = data[4]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()  # [1, 300, 2] -> [300, 2]
        pred_boxes = pred_boxes.squeeze()  # [1, 300, 8] -> [300, 8]
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1)  # [300]
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]  # [300]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]  # [300, 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)  # [300, 5]
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)  # [N, 1]
                cls_dets = cls_dets[keep.view(-1).long()]  # [N, 5]

                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])  # [M,]
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
        #                  .format(i + 1, num_images, detect_time, nms_time))
        # sys.stdout.flush()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map, ap_per_class = imdb.evaluate_detections(all_boxes, output_dir)
    #fasterRCNN.training =
    del scores
    del boxes
    del all_boxes
    del pred_boxes
    del rois
    del cls_prob
    del bbox_pred
    del rpn_loss_cls
    del rpn_loss_box
    del RCNN_loss_cls
    del RCNN_loss_bbox
    del rois_label


    if return_ap_class:
        return map, ap_per_class
    return map
示例#19
0
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 300
    thresh = 0.01
    vis = args.vis

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                          imdb.num_classes, training=False, normalize = False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
示例#20
0
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda

    ##################### output path
    save_name = 'faster_rcnn_10' + '_train_log'
    output_path = get_output_dir(args.imdb_name, save_name)
    # log output
    log_output = output_path + '/trian.log'
    Log = Logger.getLogger("train.log")
    ##################### model path
    output_dir = args.save_dir + '/' + args.net + "/" + args.dataset
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    ###################### dump parameters
    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)
    with open(output_path + '/parameters.pkl', 'wb') as f:
        pickle.dump(cfg, f)
    Log.info(cfg)