Exemplo n.º 1
0
def _deltas_and_proposals_to_bboxes(deltas, proposals, im_info):
    """
    This function obtain the prediction boxes based on the predicted deltas and proposals.
    :param deltas: Variable or tensor, [bs, num_box, 4*num_class] or [bs, num_box, 4]
        based on whether use class agnostic.
    :param proposals: [bs, num_box, 4], each proposal is denoted as [x1, y1, x2, y2]
    :return: [bs, num_box, 4]
    """
    if isinstance(deltas, Variable):
        if deltas.data.is_cuda:
            box_normalize_std = Variable(
                deltas.data.new(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda())
            box_normalize_mean = Variable(
                deltas.data.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda())
        else:
            box_normalize_std = Variable(
                deltas.data.new(cfg.TRAIN.BBOX_NORMALIZE_STDS))
            box_normalize_mean = Variable(
                deltas.data.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
    else:
        if deltas.is_cuda:
            box_normalize_std = deltas.new(
                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda()
            box_normalize_mean = deltas.new(
                cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        else:
            box_normalize_std = deltas.new(cfg.TRAIN.BBOX_NORMALIZE_STDS)
            box_normalize_mean = deltas.new(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

    bs, num_box = deltas.size()[0], deltas.size()[1]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            deltas = deltas.view(-1,
                                 4) * box_normalize_std + box_normalize_mean
            deltas = deltas.view(bs, proposals.size()[1], -1)
        pred_boxes = bbox_transform_inv_one_class(proposals,
                                                  deltas)  # x1, y1 ,x2 ,y2

    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(proposals, (1, num_box))

    pred_boxes = clip_boxes(pred_boxes, im_info, bs)

    return pred_boxes
Exemplo n.º 2
0
    def _rois_bbox(self, rois, bbox, num_classes, im_info):

        rois = rois.repeat(1, num_classes)
        stds = torch.tensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda()
        stds = stds.repeat(1, num_classes)
        means = torch.tensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        means = means.repeat(1, num_classes)
        bbox *= stds
        bbox += means
        bbox = bbox.view(-1, 4)
        rois = rois.view(-1, 5)
        rois_label, anchors = torch.split(rois, [1, 4], 1)
        anchors = bbox_transform_inv(anchors.unsqueeze(0), bbox.unsqueeze(0), 1)
        anchors = clip_boxes(anchors, im_info, 1)
        rois = torch.cat([rois_label.view(-1,1), anchors.squeeze(0)], 1).view(-1, 5 * num_classes)

        return rois
Exemplo n.º 3
0
def loop():

    args = parse_args()

    print('Called with args:')
    print(args)

    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_test"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']

    args.cfg_file = "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))



    # initilize the network here.
    if args.net == 'vgg16':
        fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
    fpn.create_architecture()
    print('load model successfully!')
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    if args.cuda:
        cfg.CUDA = True
    if args.cuda:
        fpn.cuda()
    start = time.time()
    max_per_image = 100
    vis =True #args.vis

    if vis:
        thresh = 0.0
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)



    for h in range(200):
        dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                                 imdb.num_classes, training=False, normalize=False)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
                                                 shuffle=False, num_workers=0,
                                                 pin_memory=True)

        data_iter = iter(dataloader)

        _t = {'im_detect': time.time(), 'misc': time.time()}
        det_file = os.path.join(output_dir, 'detections.pkl')
        input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
        if not os.path.exists(input_dir):
            raise Exception('There is no input directory for loading network from ' + input_dir)
        load_name = os.path.join(input_dir,
                                 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fpn.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']


        fpn.eval()
        empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
        for i in range(num_images):
            data = data_iter.next()
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            det_tic = time.time()
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data  # 1*300*10
            boxes = rois.data[:, :, 1:5]  # 1*300*4

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # 1*300*40
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = boxes

            pred_boxes /= data[1][0][2].cuda()

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            if vis:
                im = cv2.imread(imdb.image_path_at(i))
                im2show = np.copy(im)
            for j in range(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([all_boxes[j][i][:, -1]
                                          for j in range(1, imdb.num_classes)])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(i + 1, num_images, detect_time, nms_time))
            sys.stdout.flush()

            if vis:
                cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show)
                #pdb.set_trace()
                # cv2.imshow('test', im2show)
                # cv2.waitKey(0)
            del data
            del pred_boxes
            del scores
            torch.cuda.empty_cache()

        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

        print('Evaluating detections')
        aps, clss = imdb.evaluate_detections(all_boxes, output_dir)
        #print(aps)
        with open("result.txt", 'a+') as f:
            # print(args.checkepoch)
            lp=""
            cc=0
            for b in clss:
                if cc!=len(clss)-1:
                    lp=lp+"'"+str(b) + ":" + str(aps[cc])+"',"
                else:
                    lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'"
                cc=cc+1

            sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch)
            # print(sp)
            f.write(sp + "\n")
        end = time.time()
        print("test time: %0.4fs" % (end - start))

        args.checkepoch = args.checkepoch + 1

        del data_iter
        del dataset
        del dataloader

        torch.cuda.empty_cache()
        #torch.empty_cache()
        gc.collect()
Exemplo n.º 4
0
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= data[1][0][2].item()

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()

            for j in range(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
Exemplo n.º 5
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
def interest(im2show, data, fpn, all_position, i, all_boxes, r_w, r_h, rat_w,
             rat_h):

    for key, value in all_position.items():
        x = int(((value[2] - value[0]) / 2 + value[0]) * rat_w)
        y = int(((value[3] - value[1]) / 2 + value[1]) * rat_h)
        data_tem = data[0][:, :, y - int(HIGHT / 2):y + int(HIGHT / 2),
                           x - int(WIDTH / 2):x + int(WIDTH / 2)]
        #print(data[0].shape())
        w = len(data_tem[0][0][0])
        h = len(data_tem[0][0])
        print("INER", w, h)
        if w <= 0 or h <= 0:
            return None
        if args.cuda:
            data_tem1 = torch.from_numpy(np.array([[h, w,
                                                    w / h]])).float().cuda()
            data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1,
                                                    1]])).float().cuda()
            data_tem3 = torch.from_numpy(np.array([1])).long().cuda()
        else:
            data_tem1 = torch.from_numpy(np.array([[h, w, w / h]])).float()
            data_tem2 = torch.from_numpy(np.array([[1, 1, 1, 1, 1]])).float()
            data_tem3 = torch.from_numpy(np.array([1])).long()
        im_data.data.resize_(data_tem.size()).copy_(data_tem)
        im_info.data.resize_(data_tem1.size()).copy_(data_tem1)
        gt_boxes.data.resize_(data_tem2.size()).copy_(data_tem2)
        num_boxes.data.resize_(data_tem3.size()).copy_(data_tem3)
        rois, cls_prob, bbox_pred, \
        _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes)
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]  # 忽略掉前面一个数值,后面都是BOX
        if cfg.TEST.BBOX_REG:
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                if args.class_agnostic:
                    if args.cuda:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if args.cuda:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            pred_boxes = boxes
        pred_boxes /= data_tem1[0][2]
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        for j in range(1, imdb.num_classes):  # 遍历每一类
            inds = torch.nonzero(scores[:, j] > 0.6).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)  # 排序分数列表降低序
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                for c in range(len(cls_boxes)):  # 调整,获取小图片在大图片里面的坐标
                    cls_boxes[c][0] = (cls_boxes[c][0] + x -
                                       int(WIDTH / 2)) / rat_w
                    cls_boxes[c][1] = (cls_boxes[c][1] + y -
                                       int(HIGHT / 2)) / rat_h
                    cls_boxes[c][2] = (cls_boxes[c][2] + x -
                                       int(WIDTH / 2)) / rat_w
                    cls_boxes[c][3] = (cls_boxes[c][3] + y -
                                       int(HIGHT / 2)) / rat_h

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                     1)  # 追加
                cls_dets = cls_dets[order]  # 将torch.tensor 按给定的训练排序
                keep = nms(cls_dets, cfg.TEST.NMS, args.cuda)  # 非极大值抑制,获取要保留的
                cls_dets = cls_dets[keep.view(-1).long()]  # 从tensor里面拿出对应的数据结构

                if all_boxes[j][i] == []:
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = np.vstack(
                        (all_boxes[j][i], cls_dets.cpu().numpy()))
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]  # batch_size x num_rois x 4
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(
            generate_anchors_all_pyramids(
                self._fpn_scales, self._anchor_ratios, feat_shapes,
                self._fpn_feature_strides,
                self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors,
                               4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()

        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            #print(cfg.CUDA)
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh, cfg.CUDA)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Exemplo n.º 8
0
    def predict(cls, im_in):
        """For the input, do the predictions and return them.
        Args:
            im_in (a PIL image): The data on which to do the predictions."""

        assert len(im_in.shape) == 3, "RGB images only"

        if cls.model is None:
            cls.model = cls.get_model()
        thresh = 0.05

        with torch.no_grad():

            blobs, im_scales = _get_image_blob(im_in)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_data = Variable(
                torch.from_numpy(im_blob).permute(0, 3, 1, 2).cuda())

            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)
            im_info = Variable(torch.from_numpy(im_info_np).cuda())

            gt_boxes = Variable(torch.zeros(1, 1, 5).cuda())
            num_boxes = Variable(torch.zeros(1).cuda())
            rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = cls.model(
                im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(cls.model.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            result = dict()
            for j in range(1, len(cls.model.classes)):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets,
                               cfg.TEST.NMS,
                               force_cpu=not cfg.USE_GPU_NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    result[cls.model.classes[j]] = cls_dets.cpu().numpy(
                    ).tolist()
            return {
                'pred': result,
                'metrics': {
                    'rpn_loss_cls': rpn_loss_cls,
                    'rpn_loss_box': rpn_loss_box,
                    'RCNN_loss_cls': RCNN_loss_cls,
                    'RCNN_loss_bbox': RCNN_loss_bbox,
                    'rois_label': rois_label
                }
            }
Exemplo n.º 9
0
def eval_result(args, logger, epoch, output_dir):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    args.batch_size = 1
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False, root_path=args.data_root)

    imdb.competition_mode(on=True)

    load_name = os.path.join(output_dir,
                             'thundernet_epoch_{}.pth'.format(epoch, ))

    layer = int(args.net.split("_")[1])
    _RCNN = snet(imdb.classes,
                 layer,
                 pretrained_path=None,
                 class_agnostic=args.class_agnostic)

    _RCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
    _RCNN.load_state_dict(checkpoint['model'])

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    # hm = torch.FloatTensor(1)
    # reg_mask = torch.LongTensor(1)
    # wh = torch.FloatTensor(1)
    # offset = torch.FloatTensor(1)
    # ind = torch.LongTensor(1)
    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        # hm = hm.cuda()
        # reg_mask = reg_mask.cuda()
        # wh = wh.cuda()
        # offset = offset.cuda()
        # ind = ind.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)
        # hm = Variable(hm)
        # reg_mask = Variable(reg_mask)
        # wh = Variable(wh)
        # offset = Variable(offset)
        # ind = Variable(ind)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        _RCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = True

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'thundernet'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
    #                          imdb.num_classes, training=False, normalize=False)
    # dataset = roibatchLoader(roidb, imdb.num_classes, training=False)
    dataset = Detection(roidb,
                        num_classes=imdb.num_classes,
                        transform=BaseTransform(cfg.TEST.SIZE,
                                                cfg.PIXEL_MEANS),
                        training=False)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    _RCNN.eval()

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):

        data = next(data_iter)

        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])
            # hm.resize_(data[4].size()).copy_(data[4])
            # reg_mask.resize_(data[5].size()).copy_(data[5])
            # wh.resize_(data[6].size()).copy_(data[6])
            # offset.resize_(data[7].size()).copy_(data[7])
            # ind.resize_(data[8].size()).copy_(data[8])

        det_tic = time.time()
        with torch.no_grad():
            time_measure, \
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes,
                               # hm,reg_mask,wh,offset,ind
                               )

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1,
                                                 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        # pred_boxes /= data[1][0][2].item()
        pred_boxes[:, :, 0::2] /= data[1][0][2].item()
        pred_boxes[:, :, 1::2] /= data[1][0][3].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)

                # keep = soft_nms(cls_dets.cpu().numpy(), Nt=0.5, method=2)
                # keep = torch.as_tensor(keep, dtype=torch.long)

                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    vis_detections(im2show, imdb.classes[j],
                                   color_list[j - 1].tolist(),
                                   cls_dets.cpu().numpy(), 0.6)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write(
            'im_detect: {:d}/{:d}\tDetect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s)\tNMS: {:.3f}s\r' \
            .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2],
                    time_measure[3], nms_time))
        sys.stdout.flush()

        if vis and i % 200 == 0 and args.use_tfboard:
            im2show = im2show[:, :, ::-1]
            logger.add_image('pred_image_{}'.format(i),
                             trans.ToTensor()(Image.fromarray(
                                 im2show.astype('uint8'))),
                             global_step=i)

            # cv2.imwrite('result.png', im2show)
            # pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    ap_50 = imdb.evaluate_detections(all_boxes, output_dir)
    logger.add_scalar("map_50", ap_50, global_step=epoch)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
Exemplo n.º 10
0
def evaluation(name, net=None, vis=False, cuda=True, class_agnostic=False):
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    if not net:

        input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
        # input_dir = 'weight'
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)
        # load_name = os.path.join(input_dir,
        #                          'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

        load_name = os.path.join(
            input_dir, 'faster_rcnn_{}_best.pth'.format(cfg['POOLING_MODE']))

        # initilize the network here.
        if args.net == 'vgg16':
            fasterRCNN = vgg16(imdb.classes,
                               pretrained=False,
                               class_agnostic=args.class_agnostic)
        elif args.net == 'res101':
            fasterRCNN = resnet(imdb.classes,
                                101,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res50':
            fasterRCNN = resnet(imdb.classes,
                                50,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res152':
            fasterRCNN = resnet(imdb.classes,
                                152,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        else:
            print("network is not defined")
            pdb.set_trace()

        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')

    else:

        fasterRCNN = net

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if cuda:
        cfg.CUDA = True

    if cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    # vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                             imdb.num_classes, training=False, normalize=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                         .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map = imdb.evaluate_detections(all_boxes, output_dir)
    # print(map)
    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return map
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        # input输入形式为tuple = (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)
        # 0-8为anchor的背景得分, 9-17为anchor的前景得分
        scores = input[0][:, self._num_anchors:, :, :]  # [1,9,53,37]
        bbox_deltas = input[1]  # [1,36,53,37]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # nms之前保存的建议区域数量,检测阶段为6000
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  # 通过nms后保存的建议区域数量,检测阶段为300
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # ms的阈值 检测阶段0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # 建议区域的最小宽度或高度,检测阶段为16

        batch_size = bbox_deltas.size(0)  # batch_size = 1

        feat_height, feat_width = scores.size(2), scores.size(3)  # 53,37
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # 从坐标向量中返回坐标矩阵,元素交叉
        # torch.from_numpy将np数据转化为tensor,将tensor转化为np:tensor.numpy()
        # ravel()函数与flatten()函数功能类似,将多维数组降一维,np.flatten返回拷贝,不会影响原始数据,np.ravel返回视图view
        # np.vstack按垂直方向(行顺序)堆叠数组构成一个新的数组
        # shift_x,shift_y为[37,53]矩阵,展平后堆叠再转置,得到[1961,3]tensor
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(),shift_y.ravel(),shift_x.ravel(),shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()  #contiguous()把tensor变为连续分布形式

        A = self._num_anchors
        K = shifts.size(0)
        # 9个anchor,每个包含四个坐标偏移值,宽高中心点坐标
        self._anchors = self._anchors.type_as(scores)  # [9,4]
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)  # [1961, 9, 4]
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)  # [1, 17649, 4]

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchor
        bbox_deltas = bbox_deltas.permute(0,2,3,1).contiguous()  # [1, 53, 37, 36]
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)  # [1, 17649, 4]
        # Same story for the score
        scores = scores.permute(0,2,3,1).contiguous()  # permute将维度换位
        scores = scores.view(batch_size, -1)    # [1, 17649]
        # Convert anchors into proposals via bbox transformations
        # 根据anchor和偏移量计算proposals,delta表示偏移量,返回左上和右下顶点的坐标(x1,y1,x2,y2)
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
        # clip predicted boxes to image,将proposals限制在图片范围内,超出边界,则将边界赋值
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)  # _ is scores after sort,order is index after scores

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            # 从[1,17949,4]转换到[17649,4],从[1, 17649]转换到[17649]
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]
            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            # numel函数返回元素个数
            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]  # 测试阶段取前6000个得分的索引

            # 取前6000的索引对应的区域和得分,[6000,4],[6000,1],这里会重新生成proposals_single的下标0:5999
            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            # torch.cat 在第1维度拼接区域和得分矩阵,[6000,5]
            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
            # keep_idx_i 返回通过nms阈值限制之后的索引,该索引基于6000的下标[102,1]或[561,1]
            keep_idx_i = keep_idx_i.long().view(-1)

            # 取该索引的前300个建议区域
            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.,将不足300的建议区域补0
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Exemplo n.º 12
0
def val_batch(batch_num, batch):

    with torch.no_grad():
        if using_spinn:
            spinn_res = spinn(batch.phrases)
        else:
            spinn_res = batch.phrases
        result = obj_detector(batch.imgs,
                              batch.im_sizes,
                              batch.gt_boxes,
                              use_gt_boxes=True)
        rois, bbox_pred, cls_prob, rois_label, rpn_label, pooled_feat = result

        # scene detection part
        res_scene = scene_detector(rois.data,
                                   bbox_pred,
                                   batch.im_sizes,
                                   cls_prob,
                                   pooled_feat,
                                   spinn_res,
                                   rois_label,
                                   batch.gt_boxes,
                                   batch.gt_rels,
                                   use_gt_boxes=True)
    rois, roi_pair_proposals, obj_cls_prob, rel_cls_prob, roi_rel_pairs_score, _, _ = res_scene

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    scale = batch.im_sizes[0][0][2]
    pred_boxes = boxes / scale
    pred_boxes = pred_boxes.squeeze()
    use_gt_boxes = True
    if not use_gt_boxes:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor(
                    cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

            box_deltas = box_deltas.view(1, -1, 4)
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, batch.im_sizes, 1)

    scores = scores.squeeze()

    theimg = cv2.imread(batch.im_fn[0])

    # im = theimg.copy()
    # for n in range(len(batch.gt_classes[0])):
    #     box = batch.gt_boxes[0, n, :] / batch.im_sizes[0][0][2]  # batch.im_sizes[0][0][2]
    #     box = box.cpu().numpy()
    #     bbox = tuple(int(np.round(x)) for x in box[:4])
    #     cv2.rectangle(im, bbox[0:2], bbox[2:4], (0, 204, 0), 2)
    #     class_name = val.ind_to_classes[batch.gt_classes[0][n]]
    #     cv2.putText(im, '%s:' % (class_name), (bbox[0], bbox[1] + 15), cv2.FONT_HERSHEY_PLAIN,
    #                 1.0, (0, 0, 255), thickness=1)
    # plt.figure(0)
    # plt.imshow(im)
    # plt.show()
    # plt.pause(0.1)
    # index_fg = (rpn_label == 1).nonzero().squeeze()
    # for n in range(len(index_fg)):
    #     rois_boxes = boxes[0,index_fg[n],:4].cpu().numpy()
    #     rois_boxes /= batch.im_sizes[0][0][2]
    #     rois_boxes = tuple(int(np.round(x)) for x in rois_boxes)
    #     if (rois_boxes[0] or rois_boxes[2]) < 0 or (rois_boxes[0] or rois_boxes[2]) > im.shape[1]\
    #         or (rois_boxes[1] or rois_boxes[3]) < 0 or (rois_boxes[1] or rois_boxes[3]) > im.shape[0]:
    #         print('out of boundary')
    #     cv2.rectangle(im, rois_boxes[0:2], rois_boxes[2:4], (204, 0, 0), 2)
    #
    #     plt.imshow(im)
    #     plt.show()
    #     plt.pause(0.1)
    # theimg = cv2.resize(theimg,(int(im_scale * theimg.shape[0]), int(im_scale * theimg.shape[1])))
    im2show = np.copy(theimg)
    # draw2 = ImageDraw.Draw(theimg2)
    ## ============================================================================================================================
    # visualilze rois detection from Faster RCNN
    ## ============================================================================================================================
    # v, class_index = torch.max(scores, 1)
    # for j in range(1, len(val.ind_to_classes)):
    #     bboxs_index = (class_index == j).nonzero().squeeze()
    #     if bboxs_index.numel() > 0:
    #         cls_boxes = pred_boxes[bboxs_index]
    #         cls_scores = v[bboxs_index]
    #         _, order = torch.sort(cls_scores, 0, True)
    #
    #         if (len(cls_scores.size()) == 0):
    #             # print(cls_scores)
    #             cls_scores = cls_scores.unsqueeze(0)
    #             cls_boxes = cls_boxes.unsqueeze(0)
    #             continue
    #
    #         cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
    #         cls_dets = cls_dets[order]
    #         # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
    #         keep = nms(cls_boxes[order, :], cls_scores[order], 0.5)  # )cfg.TEST.NMS
    #         cls_dets = cls_dets[keep.view(-1).long()]
    #
    #         im2show_ = vis_detections(im2show, val.ind_to_classes[j], cls_dets.cpu().numpy(), 0.1)
    #         plt.figure(3)
    #         plt.clf()
    #         plt.imshow(im2show_)
    #         plt.title('vis_pre_detections')
    #         plt.pause(0.1)
    # theimg2.show('obj_detection')
    im = theimg.copy()
    gt_boxes = batch.gt_boxes.data[0]
    rel_cnt, rel_corrent_cnt, gt_rel_rois, gt_rel_labels = \
        eval_relations_recall(im, gt_boxes, scale, batch.gt_rels.data[0], pred_boxes.data, obj_cls_prob.data[0],roi_pair_proposals.view(-1, 2),
                               rel_cls_prob.data[0], top_Ns, roi_rel_pairs_score, val,vis=False)

    return rel_cnt, rel_corrent_cnt
Exemplo n.º 13
0
    def forward(self,
                rois,
                bbox_pred,
                im_info,
                obj_cls_score,
                obj_cls_feat,
                spinn_res,
                rois_obj_label,
                gt_boxes,
                gt_relation,
                use_gt_boxes=False):

        batch_size = rois.size(0)
        num_boxes = min(gt_boxes.size(0), cfg.MAX_NUM_GT_BOXES)

        #batch normalization
        _obj_cls_feat = obj_cls_feat  #self.bn_obj(obj_cls_feat)

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED and not use_gt_boxes:
            box_deltas = bbox_pred.data
            # conversly normalize targets by a precomputed mean and stdev this is done in RCNN_proposal_target
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(bbox_pred.size(0), -1, 4)
            boxes = rois.data[:, :, 1:5]
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info, 1)
            rois.data[:, :, 1:5] = pred_boxes

        if spinn_res is not None:
            #todo: when the spinn_res are all zero matrix
            encoder_res = self.encoder(spinn_res)
            self.res_spinn.append(encoder_res)
        else:
            encoder_res = None

        relpn_feats = _obj_cls_feat.view(
            rois.size(0), rois.size(1),
            _obj_cls_feat.size(1))  #todo: last size wether 1 or 2

        roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls, relpn_eval= \
            self.RELPN(rois.data, relpn_feats, encoder_res, im_info, gt_boxes.data, gt_relation.data, num_boxes, use_gt_boxes)

        if not self.training:
            if batch_size == 1:
                valid = roi_rel_pairs.sum(2).view(-1).nonzero().view(-1)
                roi_rel_pairs = roi_rel_pairs[:, valid, :]
                roi_pair_proposals = roi_pair_proposals[:, valid, :]
                roi_rel_pairs_score = roi_rel_pairs_score[:, valid, :]

        size_per_batch = _obj_cls_feat.size(0) / batch_size
        # xxx = torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals) * size_per_batch
        roi_pair_proposals = roi_pair_proposals + \
                             torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals) * size_per_batch

        roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
        ind_subject = roi_pair_proposals_v[:, 0]
        ind_object = roi_pair_proposals_v[:, 1]

        if self.training:
            roi_rel_pairs, rois_rel_label, roi_pair_keep = \
                self.RELPN_proposal_target(roi_rel_pairs, gt_boxes.data, gt_relation.data, num_boxes)

            rois_rel_label = Variable(rois_rel_label.view(-1))
            xxx = torch.arange(0, roi_pair_keep.size(0)).view(
                roi_pair_keep.size(0), 1).cuda() * roi_pair_proposals_v.size(0)
            x = xxx / batch_size
            # roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \
            #                                 * roi_pair_proposals_v.size(0) / batch_size
            roi_pair_keep = roi_pair_keep + x.float()
            roi_pair_keep = roi_pair_keep.view(-1).long()

            ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0]
            ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1]

        _obj_cls_feat_sub = self.gcn_head_rel_sub(_obj_cls_feat)
        x_sobj = _obj_cls_feat_sub[ind_subject]  #1500 x 4096 , 640
        x_oobj = _obj_cls_feat_sub[ind_object]

        pred_feat = torch.cat((x_sobj, x_oobj), 1)

        # compute object classification probability
        #pred_feat = self.gcn_head_rel_fc(_pred_feat)
        # pred_feat = self.bn_rel(pred_feat)

        ## ============================================================================================================================
        ##                  GCN
        ## ============================================================================================================================
        if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0:  # true

            x_obj_gcn, x_pred_gcn = self.GRCNN_gcn_feat(
                _obj_cls_feat, pred_feat, ind_subject, ind_object)
            # x_obj_gcn = self.fcsub(_obj_cls_feat)
            # #x_obj_gcn = self.bn_obj(x_obj_gcn)
            #
            # x_pred_gcn = self.fcrel(pred_feat)
            # x_pred_gcn = self.bn_rel(x_pred_gcn)
            ## ============================================================================================================================
            ## LSTM endoder Layer
            ## ============================================================================================================================
            # self.encoder.reset_state()

            # if cfg.GCN_HAS_ATTENTION:  # true
            #
            #     attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj, None)  # N_rel x 1
            #     attend_score = attend_score.view(1, x_pred_relpn.size(0))
            #
            #
            # # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
            # # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
            # # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
            # # NOTE exclude one roi feature from another.
            # # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
            # # cooccurence, such as bus are more frequently appear on the road.
            # # assert x_obj.size() == x_att.size(), "the numbers of object features and attribute features should be the same"
            #
            # size_per_batch = obj_cls_feat.size(0) / batch_size
            #
            # map_obj_obj = obj_cls_feat.data.new(obj_cls_feat.size(0), obj_cls_feat.size(0)).fill_(0.0)
            # eye_mat = torch.eye(int(size_per_batch)).type_as(obj_cls_feat.data)
            # for i in range(batch_size):
            #     a = int(i * size_per_batch)  # size_per_batch 128
            #     b = int((i + 1) * size_per_batch)
            #     c = map_obj_obj[a:b, a:b]
            #     c.fill_(1.0)
            #     map_obj_obj[a:b, a:b].fill_(1.0)
            #     map_obj_obj[a:b, a:b] = map_obj_obj[a:b,
            #                             a:b] - eye_mat  # 256x256 block diagnal matrix, diagnal elements are 0
            # #todo: change adjacent matrix
            # map_obj_obj = Variable(map_obj_obj)
            #
            # map_sobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_relpn.size(0)).zero_())
            # map_sobj_rel.scatter_(0, Variable(ind_subject.contiguous().view(1, x_pred_relpn.size(0))), attend_score)
            # map_oobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_relpn.size(0)).zero_())
            # map_oobj_rel.scatter_(0, Variable(ind_object.contiguous().view(1, x_pred_relpn.size(0))), attend_score)
            # map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2)
            #
            # gcnstart = time.time()
            #
            # x_obj_gcn = obj_cls_feat
            # x_pred_gcn = x_pred_relpn
            # for i in range(cfg.GCN_LAYERS):  # cfg.GCN_LAYERS
            #     # pass graph representation to gcn
            #     x_obj_gcn, x_pred_gcn = self.GRCNN_gcn_feat(x_obj_gcn, x_pred_gcn, map_obj_obj, map_obj_rel)
            #
            #     x_sobj = x_obj_gcn[ind_subject]
            #     x_oobj = x_obj_gcn[ind_object]
            #     attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj, None)  # N_rel x 1
            #     attend_score = attend_score.view(1, x_pred_gcn.size(0))
            #
            #     map_sobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_gcn.size(0)).zero_())
            #     map_sobj_rel.scatter_(0, Variable(ind_subject.contiguous().view(1, x_pred_gcn.size(0))), attend_score)
            #     map_oobj_rel = Variable(obj_cls_feat.data.new(obj_cls_feat.size(0), x_pred_gcn.size(0)).zero_())
            #     map_oobj_rel.scatter_(0, Variable(ind_object.contiguous().view(1, x_pred_gcn.size(0))), attend_score)
            #     map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2)
            # 256x4096
            # pdb.set_trace()
            # compute object classification loss
            gcn_obj_cls_score = self.GRCNN_obj_cls_score(x_obj_gcn)
            gcn_obj_cls_prob = F.softmax(gcn_obj_cls_score, 1)

            gcn_rel_cls_score = self.GRCNN_rel_cls_score(x_pred_gcn)
            gcn_rel_cls_prob = F.softmax(gcn_rel_cls_score, dim=1)
            ## ============================================================================================================================
            ##                  LOSS function
            ## ============================================================================================================================
            if self.training:

                if cfg.GCN_LAYERS > 0:
                    # object classification los
                    self.GRCNN_loss_obj_cls = F.cross_entropy(
                        gcn_obj_cls_score, rois_obj_label.long())
                    # relation classification los
                    self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0))
                    self.rel_bg_cnt = rois_rel_label.data.numel(
                    ) - self.rel_fg_cnt
                    self.GRCNN_loss_rel_cls = F.cross_entropy(
                        gcn_rel_cls_score, rois_rel_label.long())
                    grcnn_loss = self.GRCNN_loss_obj_cls + self.GRCNN_loss_rel_cls  # used only for rpn relpn training

            relpn_loss = relpn_loss_cls
            gcn_obj_cls_prob = gcn_obj_cls_prob.view(batch_size, rois.size(1),
                                                     -1)

            gcn_rel_cls_prob = gcn_rel_cls_prob.view(
                batch_size, int(gcn_rel_cls_prob.size(0) / batch_size), -1)
            ## ============================================================================================================================
            ##                  Return Values
            ## ============================================================================================================================

            if cfg.HAS_RELATIONS:
                if self.training:  # true use this option
                    return rois, gcn_obj_cls_prob, gcn_rel_cls_prob, self.GRCNN_loss_obj_cls, relpn_loss, grcnn_loss, relpn_eval
                    # return rois, bbox_pred_frcnn, obj_cls_prob_frcnn, att_cls_prob, rel_cls_prob, rpn_loss, relpn_loss, grcnn_loss
                else:

                    return rois, roi_pair_proposals, gcn_obj_cls_prob, gcn_rel_cls_prob, roi_rel_pairs_score, 0, 0
        else:
            return relpn_eval, relpn_loss_cls
def validate_virat(val_loader, S_RAD, epoch, num_class, num_segments, vis,
                   session, batch_size, input_data, cfg, log, dataset):
    val_iters_per_epoch = int(np.round(len(val_loader)))
    im_data, im_info, num_boxes, gt_boxes = input_data
    S_RAD.eval()
    all_boxes = [[[[] for _ in range(num_class)]
                  for _ in range(batch_size * num_segments)]
                 for _ in range(val_iters_per_epoch)]
    #limit the number of proposal per image across all the class
    max_per_image = cfg.MAX_DET_IMG

    #dict  with matched detections and its score @class_idx
    eval_target = {one: 1 for one in activity2id_person}
    e = {one: {} for one in eval_target}  # cat_id -> imgid -> {"dm","dscores"}

    #unique image id
    imgid = 0
    num_gt = [0 for _ in range(num_class)]
    for step, data in enumerate(val_loader):

        im_data.resize_(data[0].size()).copy_(data[0])
        gt_boxes.resize_(data[1].size()).copy_(data[1])
        num_boxes.resize_(data[2].size()).copy_(data[2])
        im_info.resize_(data[3].size()).copy_(data[3])
        im_data = im_data.view(-1, im_data.size(2), im_data.size(3),
                               im_data.size(4))
        im_info = im_info.view(-1, 3)
        gt_boxes = gt_boxes.view(-1, cfg.MAX_NUM_GT_BOXES, num_class + 4)
        num_boxes = num_boxes.view(-1)

        #evaluate /inference cpde
        start = time.time()
        rois, cls_prob, bbox_pred = S_RAD(im_data, im_info, gt_boxes,
                                          num_boxes)
        torch.cuda.synchronize()
        end_time = time.time() - start
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        #batch_size = rois.shape[0]
        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class)

        #transforms the image to x1,y1,x2,y2, format and clips the coord to images
        pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0])
        pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0])

        #gt boxes
        gtbb = gt_boxes[:, :, 0:4]
        gtlabels = gt_boxes[:, :, 4:]
        #pred_boxes /= data[3][0][1][2].item()
        #gtbb /= data[3][0][1][2].item()

        #move the groudtruth to cpu
        gtbb = gtbb.cpu().numpy()
        gtlabels = gtlabels.cpu().numpy()
        #count = 0

        for image in range(pred_boxes.shape[0]):
            box = [None for _ in range(num_class)]
            imgid += 1
            for class_id in range(1, num_class):
                inds = torch.nonzero(
                    scores[image, :,
                           class_id] > cfg.VIRAT.SCORE_THRES).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[image, inds, class_id]
                    #arranging in descending order
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[image, inds,
                                           class_id * 4:(class_id + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order, :]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1)]
                    all_boxes[step][image][class_id] = cls_dets.cpu().numpy()

                #collect groud truth boxes for the image
                index = np.unique(np.nonzero(gtbb[image])[0])
                gtbox = gtbb[image][index]
                label = gtlabels[image][index]

                #take groundtruth box only if the label =1 for that class
                box[class_id] = [
                    gtbox[i] for i in range(len(label)) if label[i, class_id]
                ]
                num_gt[class_id] += np.sum(len(box[class_id]))
            match_dt_gt(e, imgid, all_boxes[step][image], box,
                        activity2id_person)
            if (step + 1) % 50 == 0:
                output = ('Test: [{0}/{1}]\t'.format(step,
                                                     (val_iters_per_epoch)))
                print(output)

    aps = aggregate_eval(e, maxDet=max_per_image)
    mAP = (mean(aps[target] for target in aps.keys()))

    for k, v in aps.items():
        output = ('class: [{0}] - {1}'.format(k, v))
        log.write(output + '\n')
        print(output)
    mAPout = ('mAP at epoch {0}: {1}'.format(epoch, mAP))
    print('mAP at epoch {0}: {1} \n'.format(epoch, mAP))
    log.write(mAPout + '\n')
    log.flush()
def validate_voc(val_loader, S_RAD, epoch, num_class, num_segments, session,
                 batch_size, cfg, log, dataset, pathway, eval_metrics):
    val_iters_per_epoch = int(np.round(len(val_loader)))
    S_RAD.eval()
    all_boxes = [[[[] for _ in range(num_class)]
                  for _ in range(batch_size * num_segments)]
                 for _ in range(val_iters_per_epoch)]
    bbox = [[[[] for _ in range(num_class)]
             for _ in range(batch_size * num_segments)]
            for _ in range(val_iters_per_epoch)]
    #limit the number of proposal per image across all the class
    max_per_image = cfg.MAX_DET_IMG

    #confusion matrix
    conf_mat = ConfusionMatrix(num_classes=num_class,
                               CONF_THRESHOLD=0.8,
                               IOU_THRESHOLD=0.2,
                               dataset=dataset)

    num_gt = [0 for _ in range(num_class)]

    #data_iter = iter(val_loader)
    for step, data in enumerate(val_loader):

        #evaluate /inference code
        #start_time = time.time()
        rois, cls_prob, bbox_pred = S_RAD(data)
        #torch.cuda.synchronize()
        #end_time = time.time() - start_time

        if dataset == 'ucfsport':
            class_dict = act2id
        elif dataset == 'jhmdb':
            class_dict = jhmdbact2id
        elif dataset == 'ucf24':
            class_dict = ucf24act2id
        elif dataset == 'urfall':
            class_dict = fallactivity2id
        elif dataset == 'imfd':
            class_dict = imfallactivity2id
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = bbox_pred.data
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(scores.shape[0], -1, 4 * num_class)

        #transforms the image to x1,y1,x2,y2, format and clips the coord to images
        pred_boxes = bbox_transform_inv(boxes, box_deltas, scores.shape[0])
        if pathway == "two_pathway":
            im_info = data[0][3].view(-1, 3).to(device="cuda")
            gt_boxes = (data[0][1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                        num_class + 4)).to(device="cuda")
        else:
            im_info = data[3].view(-1, 3).to(device="cuda")
            gt_boxes = (data[1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                     num_class + 4)).to(device="cuda")
            pred_boxes = clip_boxes(pred_boxes, im_info.data, scores.shape[0])

        #gt boxes
        gtbb = gt_boxes[:, :, 0:4]
        gtlabels = gt_boxes[:, :, 4:]

        #move the groudtruth to cpu
        gtbb = gtbb.cpu().numpy()
        gtlabels = gtlabels.cpu().numpy()
        #count = 0

        for image in range(pred_boxes.shape[0]):
            for class_id in range(1, num_class):
                inds = torch.nonzero(scores[image, :, class_id] > 0).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[image, inds, class_id]
                    #arranging in descending order
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[image, inds,
                                           class_id * 4:(class_id + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    cls_dets = cls_dets[order, :]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1)]
                    all_boxes[step][image][class_id] = cls_dets.cpu().numpy()

                #collect groud truth boxes for the image
                index = np.unique(np.nonzero(gtbb[image])[0])
                gtbox = gtbb[image][index]
                label = gtlabels[image][index]

                #take groundtruth box only if the label =1 for that class
                bbox[step][image][class_id] = [
                    gtbox[i] for i in range(len(label)) if label[i, class_id]
                ]
                num_gt[class_id] += np.sum(len(bbox[step][image][class_id]))
                if eval_metrics:
                    if len(bbox[step][image][class_id]) > 0 and len(
                            all_boxes[step][image][class_id]) > 0:
                        conf_mat.process_batch(all_boxes[step][image],
                                               bbox[step][image])

    if eval_metrics:
        result = conf_mat.return_matrix()
        print(result)
        conf_mat.plot(result)

    ap = [None for _ in range(num_class)]

    #calculate fp anf tp for each detections
    for cls_id in range(1, num_class):

        tpfp = []
        class_det = []
        for video in range(len(all_boxes)):
            for batch in range(len(all_boxes[0])):
                tp_fp = (tpfp_default(all_boxes[video][batch][cls_id],\
                   bbox[video][batch][cls_id],iou_thr=0.5))
                if (len(tp_fp) > 0
                        and len(all_boxes[video][batch][cls_id]) > 0):
                    tpfp.append(tp_fp)
                    class_det.append(all_boxes[video][batch][cls_id])
        assert len(tpfp) == len(class_det)
        tp, fp = tuple(zip(*tpfp))

        # sort all det bboxes by score, also sort tp and fp
        cls_det = np.vstack(class_det)
        num_dets = cls_det.shape[0]
        sort_inds = np.argsort(-cls_det[:, -1])
        tp = np.hstack(tp)[:, sort_inds]
        fp = np.hstack(fp)[:, sort_inds]

        # calculate recall and precision with tp and fp
        tp = np.cumsum(tp, axis=1)
        fp = np.cumsum(fp, axis=1)
        eps = np.finfo(np.float32).eps
        recalls = tp / np.maximum(num_gt[cls_id], eps)
        precisions = tp / np.maximum((tp + fp), eps)

        #ROC curve visualisation
        if eval_metrics:
            import matplotlib.pyplot as plt
            colors = [
                'ac', 'navy', 'gold', 'turquoise', 'red', 'green', 'black',
                'brown', 'darkorange', 'cornflowerblue', 'teal'
            ]
            plt.plot(recalls[0, :],
                     precisions[0, :],
                     color=colors[cls_id],
                     lw=2,
                     label='class {}'.format(cls_id))

        ap[cls_id] = average_precision(recalls[0, :],
                                       precisions[0, :],
                                       mode='area')

    #Plot ROC Curve
    if eval_metrics:
        fig = plt.gcf()
        fig.subplots_adjust(bottom=0.25)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Extension of Precision-Recall curve to multi-class')
        plt.legend(loc="best")
        plt.show()

    for k, v in class_dict.items():
        #print("Average precision per class:")
        out = ("class [{0}]:{1}   |gt:{2}".format(k, ap[v], num_gt[v]))
        print(out)
        log.write(out + '\n')
    mAP = ("mAP for epoch [{0}] is : {1}".format(epoch, mean(ap[1:])))
    print(mAP)
    log.write(mAP + '\n')
    log.flush()
    print("----------------------------------------------")
Exemplo n.º 16
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, fg_scores, rpn_reg_loss = \
            self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        rpn_prior_loss = torch.FloatTensor([0.]).cuda()

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            if self.rpn_prior_weight != 0.:
                for i in range(batch_size):
                    gt_num = num_boxes[i].detach().cpu().item()
                    score = fg_scores[i]
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)  # p * log(p)
                    rpn_prior_loss += (-1. * log_score.sum() / float(gt_num))

                rpn_prior_loss /= batch_size
                rpn_prior_loss *= self.rpn_prior_weight
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = torch.FloatTensor([0.]).cuda()
            rpn_loss_bbox = torch.FloatTensor([0.]).cuda()

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        head_reg_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_reg_weight != 0.:
            head_reg_loss = (pooled_feat**2).mean() * self.head_reg_weight

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # sample loc data
        normal_dist = torch.randn(bbox_pred.size(0), 4).float().cuda()
        log_sigma_2 = bbox_pred[:, :4]
        miu = bbox_pred[:, 4:]
        sigma = torch.exp(log_sigma_2 / 2.)
        sample_loc_data = normal_dist * sigma * self.sample_sigma + miu
        bbox_pred = sample_loc_data

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = torch.FloatTensor([0.]).cuda()
        RCNN_loss_bbox = torch.FloatTensor([0.]).cuda()

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        head_prior_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_prior_weight != 0.:
            scores = cls_prob.data  # [batch, num_rois, classes]
            scores_gradient = cls_prob  # [batch, num_rois, classes]
            boxes = rois.data[:, :, 1:5]  # [batch, num_rois, 4]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # [batch, num_rois, 4]
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if self.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1,
                                                     4 * len(self.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
            else:
                # Simply repeat the boxes, once for each class
                print("no use bbox head in IB")
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_info[:, 2].data[:, None,
                                             None]  # [batch, num_rois, 4]
            loss_count = 0.
            gt_classes = gt_boxes[:, :, -1].data  # [batch, num(0 pad to 20)]
            for i in range(batch_size):
                for j in range(1, len(self.classes)):  # skip background class
                    if not (gt_classes[i] == j).any():  # no such class in gt
                        continue
                    # there are gt for this class
                    inds = torch.nonzero(
                        scores[i, :, j] > self.nms_threshold).view(-1)
                    if inds.numel() == 0:
                        continue
                    cls_scores = scores[i, :, j][inds]  # [num]
                    cls_scores_gradient = scores_gradient[i, :, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if self.class_agnostic:
                        cls_boxes = pred_boxes[i, inds, :]  # [num, 4]
                    else:
                        cls_boxes = pred_boxes[i, inds][:, j * 4:(j + 1) * 4]
                    cls_scores_gradient = cls_scores_gradient[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    score = cls_scores_gradient[keep.view(
                        -1).long()]  # [num_keep]
                    gt_num = (gt_classes[i] == j).sum().detach().cpu().item()
                    if score.size(0) <= gt_num:
                        continue
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)
                    head_prior_loss += (-1. * log_score.sum() / float(gt_num))
                    loss_count += 1.

            head_prior_loss /= loss_count
            head_prior_loss *= self.head_prior_weight

        return rois, cls_prob, bbox_pred, \
               rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
               rpn_prior_loss, rpn_reg_loss, head_prior_loss, head_reg_loss