Exemplo n.º 1
0
def get_bounding_boxes(rois, cls_prob, bbox_pred, im_info, allBoundingBoxes,
                       index):
    global nusc_classes
    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    thresh = 0.05
    if cfg.TRAIN.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(1, -1, 4)
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_info[0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    bounding_boxes = []
    for j in range(1, len(nusc_classes)):
        inds = torch.nonzero(scores[:, j] > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds, :]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], 0.3)
            cls_dets = cls_dets[keep.view(-1).long()]
            dets = cls_dets.cpu().numpy()
            #pdb.set_trace()
            for i in range(np.minimum(10, dets.shape[0])):
                bbox = list(
                    int(np.round(x)) for x in cls_dets.cpu().numpy()[i, :4])
                bbox = bbox + [j]
                score = dets[i, -1]
                if score > 0.3:
                    bounding_boxes += [bbox]
                    bb = BoundingBox(index,
                                     j,
                                     bbox[0],
                                     bbox[1],
                                     bbox[2],
                                     bbox[3],
                                     CoordinatesType.Absolute,
                                     None,
                                     BBType.Detected,
                                     score,
                                     format=BBFormat.XYWH)
                    allBoundingBoxes.addBoundingBox(bb)
    return allBoundingBoxes
Exemplo n.º 2
0
def generate_pseudo_label(output_dir, sp_dir, q_im_path, model, num_shot):
    # data = list of [im, cls]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    q_im = np.asarray(Image.open(q_im_path))[:, :, :3]
    if num_shot > 1:
        final_dets = None
        for i in range(num_shot): 
            sp_im_path = os.path.join(sp_dir, f'shot_{i+1}.jpg')
            sp_im = np.asarray(Image.open(sp_im_path))[:, :, :3]
            cls_dets = run_detection(sp_im, q_im, model)
            if final_dets is not None:
                final_dets = torch.cat((final_dets, cls_dets), 0)
            else:
                final_dets = cls_dets
        _, order = torch.sort(final_dets[:, 4], 0, True)
        final_dets = final_dets[order]
        keep = nms(final_dets[:, :4], final_dets[:, 4], cfg.TEST.NMS)
        final_dets = final_dets[keep.view(-1).long()]
    else:
        sp_im_path = os.path.join(sp_dir, 'shot_1.jpg')
        sp_im = np.asarray(Image.open(sp_im_path))[:, :, :3]
        final_dets = run_detection(sp_im, q_im, model)
    return final_dets
Exemplo n.º 3
0
def postprocess_dets(scores, bboxes, rois, im_info, pooled_features):
    """
    Postprocess detections to get meaningful results.

    Inputs:
        - scores: tensor, (N, num_classes + 1)
        - bboxes: tensor, (1, N, 4 * (num_classes + 1))
        - rois: tensor, (1, N, 5)
        - im_info: tensor, (1, 3)
    Outputs:
        - tensor (Ndets, 6), like (xmin, ymin, xmax, ymax, score, class)
    """
    num_classes = scores.shape[1]  # including bg
    use_cuda = USE_CUDA and torch.cuda.is_available()

    # Apply bounding-box regression deltas
    std = torch.FloatTensor((0.1, 0.1, 0.2, 0.2))
    std = std.cuda() if use_cuda else std
    bboxes = bboxes.view(-1, 4) * std
    bboxes = bboxes.view(1, -1, 4 * num_classes)
    bboxes = bbox_transform_inv(rois[:, :, 1:5], bboxes, 1)
    bboxes = clip_boxes(bboxes, im_info, 1)
    bboxes /= im_info[0][-1]
    bboxes = bboxes[0]  # (N, 4 * (num_classes + 1))

    # Class-wise nms
    detections = []
    detection_features = []
    for cid in range(1, num_classes):
        inds = torch.nonzero(scores[:, cid] > 0.05).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, cid][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = bboxes[inds][:, cid * 4:(cid + 1) * 4]
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # TODO: Use inds -> order -> keep to filter the pooled features
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], 0.3)
            cls_dets = cls_dets[keep.view(-1).long()]  # (keep, 5)
            class_ids = torch.ones(len(cls_dets), 1) * (cid - 1)
            cls_dets = torch.cat((
                cls_dets, class_ids.cuda() if use_cuda else class_ids), dim=1)
            detections.append(cls_dets)
            detection_features.append(pooled_features[inds][order][keep])
    return torch.cat(detections, dim=0), torch.cat(detection_features, dim=0)
def nms_for_results(result_json, nms_threshold, output_json):
    all_boxes = json.load(open(result_json, "r"))
    print("Before NMS:", len(all_boxes))
    # reformat
    all_data = {}
    for item in all_boxes:
        imgid = item["image_id"]
        if imgid not in all_data:
            all_data[imgid] = []
        all_data[imgid].append(item)

    num_images = len(all_data)

    after_nms = []
    for i, imgid in enumerate(all_data.keys()):  #
        all_items = all_data[imgid]

        all_items.sort(key=lambda x: x["score"], reverse=True)
        pred_boxes = list(map(lambda x: xywh2xyxy(x["bbox"]), all_items))
        cls_scores = list(map(lambda x: x["score"], all_items))

        pred_boxes = Variable(torch.Tensor(pred_boxes))
        cls_scores = Variable(torch.Tensor(cls_scores))

        cls_dets = torch.cat((pred_boxes, cls_scores.unsqueeze(1)), 1)

        keep = nms(pred_boxes, cls_scores, nms_threshold)
        keep = keep.view(-1).long().cpu()

        keep_items = list(map(lambda x: all_items[x], keep))

        after_nms.extend(keep_items)

    print("After NMS:", len(after_nms))
    with open(output_json, "w") as f:
        json.dump(after_nms, f)
Exemplo n.º 5
0
    def __call__(self, ori_img):
        thresh = 0.5
        allbox = []

        assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
        if len(ori_img.shape) == 2:
            ori_img = ori_img[:, :, np.newaxis]
            ori_img = np.concatenate((ori_img, ori_img, ori_img), axis=2)

        blobs, im_scales = _get_image_blob(ori_img)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)
        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.device == "cuda":
            im_data = im_data.cuda()
            im_info = im_info.cuda()
            num_boxes = num_boxes.cuda()
            gt_boxes = gt_boxes.cuda()

        # make variable
        im_data = Variable(im_data, volatile=True)
        im_info = Variable(im_info, volatile=True)
        num_boxes = Variable(num_boxes, volatile=True)
        gt_boxes = Variable(gt_boxes, volatile=True)

        with torch.no_grad():
            im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.resize_(1, 1, 5).zero_()
            num_boxes.resize_(1).zero_()

        # infer
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.net(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.device == "cuda":
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_class))
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        im2show = np.copy(ori_img)
        for j in xrange(1, len(self.pascal_class)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show = vis_detections(im2show, self.pascal_class[j],
                                         cls_dets.cpu().numpy(), 0.5)
        return im2show, pred_boxes, scores, cls_dets.cpu().numpy()
Exemplo n.º 6
0
def stomata_count(fasterRCNN, image, cuda, pascal_classes):
    if cuda:
        cfg.USE_GPU_NMS = True
    im_in = image
    if len(im_in.shape) == 2:
        im_in = im_in[:, :, np.newaxis]
        im_in = np.concatenate((im_in, im_in, im_in), axis=2)

    blobs, im_scales = _get_image_blob(im_in)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    # initilize the tensor holder here.
    im_data = torch.FloatTensor()
    im_info = torch.FloatTensor()
    num_boxes = torch.LongTensor()
    gt_boxes = torch.FloatTensor()

    # ship to cuda
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    class_agnostic = False
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if class_agnostic:
                if cuda:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if cuda:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    num_stomata = 0
    label_stomata = np.copy(image)
    for j in xrange(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:, j] > int(0.5)).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            dets = cls_dets.cpu().numpy()
            label_stomata, num_stomata = vis_detections(label_stomata, pascal_classes[j], dets, 0.9)
    return num_stomata, label_stomata
Exemplo n.º 7
0
def val(epoch, fasterRCNN, cfg):
    print('=== start val in epoch {} ==='.format(epoch))

    # [val set]
    cfg.TRAIN.USE_FLIPPED = False
    cfg.USE_GPU_NMS = args.cuda
    imdb_val, roidb_val, ratio_list_val, ratio_index_val = combined_roidb(
        args.imdbval_name, False)
    imdb_val.competition_mode(on=True)
    val_size = len(roidb_val)
    print('{:d} val roidb entries'.format(len(roidb_val)))
    cfg.TRAIN.USE_FLIPPED = True  # change again for training

    # [val dataset]
    dataset_val = roibatchLoader(roidb_val, ratio_list_val, ratio_index_val, 1, \
                                 imdb_val.num_classes, training=False, normalize_as_imagenet=True)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=0)

    # print(' == forcibly insert checkpoint loading == ')
    # load_name = './models/ImgNet_pre/vgg16/coco/train_all/imagenet_0/head_1.pth'
    # print('load {}'.format(load_name))
    # checkpoint = torch.load(load_name)
    # fasterRCNN.load_state_dict(checkpoint['model'])

    output_dir = get_output_dir(imdb_val, 'val_in_training')
    data_iter_val = iter(dataloader_val)
    num_images = len(imdb_val.image_index)
    thresh = 0.0
    max_per_image = 100
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb_val.num_classes)]

    # import ipdb; ipdb.set_trace()
    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):
        data = next(data_iter_val)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred = fasterRCNN(im_data, im_info, gt_boxes,
                                               num_boxes)
        # rois_val, cls_prob_val, bbox_pred_val, \
        # rpn_loss_cls_val, rpn_loss_box_val, \
        # RCNN_loss_cls_val, RCNN_loss_bbox_val, \
        # rois_label_val = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

    print('Evaluating detections')
    mAP = imdb_val.evaluate_detections(all_boxes, output_dir, result_file=None)
    del dataset_val, dataloader_val
    return mAP
Exemplo n.º 8
0
def main(cv2_img, fasterRCNN, all_boxes, query, _query_im):

    index = 0
    data = [0, 0, 0, 0, 0]
    im = cv2_img
    im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR)

    _im = np.copy(im)

    # make im_data
    im, im_scale = prep_im_for_blob(im, target_size=600)
    im = torch.tensor(im)
    im = torch.unsqueeze(im, 0)
    im = im.transpose(1, 3)
    im_data = im.transpose(2, 3)

    im_data = data[0] = im_data.cuda()
    im_info = data[2] = torch.tensor([[600, 899, 1.4052]])
    gt_boxes = data[3] = torch.rand(1, 4, 5)  # don't care
    catgory = data[4] = torch.tensor([1])



    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, _, RCNN_loss_bbox, \
    rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(1, -1, 4)

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= data[2][0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    im2show = np.copy(_im)

    inds = torch.nonzero(scores > thresh).view(-1)
    # if there is det
    if inds.numel() > 0:
        cls_scores = scores[inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = pred_boxes[inds, :]

        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        cls_dets = cls_dets[order]
        keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
        cls_dets = cls_dets[keep.view(-1).long()]
        all_boxes[data[4]][index] = cls_dets.cpu().numpy()

        im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8)
        _im2show = np.concatenate((im2show, _query_im), axis=1)
        plt.imshow(_im2show)
        plt.show()
Exemplo n.º 9
0
    def forward(self, input, use_gt_boxes=False):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        rois = input[0]
        im_info = input[1]
        roi_feat = input[2]
        nlp_features = input[3]
        cfg_key = input[4]

        ####################################################
        ### ###
        ####################################################
        assert roi_feat.dim() == 3, "roi_feat must be B x N x D shape"
        B = roi_feat.size(0)
        N = roi_feat.size(1)
        D = roi_feat.size(2)

        if cfg.TRAIN.RELPN_WITH_BBOX_INFO:
            rois_nm = rois.new(rois.size(0), rois.size(1), 4)
            xx = im_info[:, 1]
            yy = im_info[:, 0]
            rois_nm[:, :, :2] = rois[:, :, 1:3]  #/ xx[:,None]
            rois_nm[:, :, 2:] = rois[:, :, 3:5]  #/ yy[:,None]
            roi_feat4prop = torch.cat((roi_feat, Variable(rois_nm)), 2)
            D += 4
        else:
            roi_feat4prop = roi_feat  #
        #roi_feat4prop = roi_feat4prop.view(B * N, D)
        roi_feat4prop = self.sub_feat(
            roi_feat4prop)  # feat dim reduction to 256
        batch_size = rois.size(0)
        pre_nms_topN = cfg[cfg_key].RELPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[
            cfg_key].RELPN_POST_NMS_TOP_N  #cfg[cfg_key].RELPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RELPN_NMS_THRESH
        min_size = cfg[cfg_key].RELPN_MIN_SIZE

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        ####################################################
        ###  Method 1.Use coorelation with nlp fv to compute the scores   ###
        ####################################################
        map_x = np.arange(0, rois.size(1))
        map_y = np.arange(0, rois.size(1))
        map_x_g, map_y_g = np.meshgrid(map_x, map_y)
        map_yx = torch.from_numpy(
            np.vstack((map_y_g.ravel(), map_x_g.ravel())).transpose()).cuda()
        proposals = map_yx.expand(batch_size,
                                  rois.size(1) * rois.size(1),
                                  2)  # B x (N * N) x 2

        # filter diagnal entries
        keep = self._filter_diag(proposals)
        proposals = proposals.contiguous().view(
            -1, 2)[keep.nonzero().squeeze(), :].contiguous().view(
                batch_size, -1, 2).contiguous()
        # -------------using NN to encode the pair feature----------------

        # TODO: add new score method:
        all_box_pairs_fet = []  # bs x pairs_Num x 151
        all_box_pairs_roi = []  # bs x pairs_Num x 8
        all_box_pairs_score = []
        post_nms_topN = proposals.size(
            1
        )  #todo with/o proposal, during training if we comment out this line, res is bad.
        output = rois.new(batch_size, post_nms_topN, 9).zero_()
        output_score = rois.new(batch_size, post_nms_topN, 1).zero_()
        output_proposals = proposals.new(batch_size, post_nms_topN, 2).zero_()
        nlp_features_repeated = nlp_features.unsqueeze(1).repeat(
            1, proposals.size(1), 1)

        for b in range(batch_size):  #batch_size
            #torch.cuda.empty_cache()
            # proposals_subject_roi_i = rois[b][proposals[b, :, 0], :][:, 1:5]
            # proposals_object_roi_i = rois[b][proposals[b, :, 1], :][:, 1:5]
            proposals_subject_fet_i = roi_feat4prop[b][proposals[
                b, :, 0], :]  # [:, 1:5]
            proposals_object_fet_i = roi_feat4prop[b][proposals[
                b, :, 1], :]  # [:, 1:5]

            # -------------using NN to encode the pair feature----------------
            #all_box_pairs_fet.append(torch.cat((proposals_subject_fet_i, proposals_object_fet_i), 1))

            # -------------using bi-lstm to encode the pair feature----------------
            # Get the output from the LSTM. inputs pairNum x 2 x 256

            box_pairs_fet = (torch.cat(
                (proposals_subject_fet_i, proposals_object_fet_i),
                0)).view(2, -1, self.sub_feat_size)
            outputs, state = self.lstm_encoder(box_pairs_fet, None)

            # Return the Encoder's output.
            # sequence x minibatch x features length
            select_features = self.lstm_out(outputs[-1, :, :])
            scores_i = F.cosine_similarity(select_features,
                                           nlp_features_repeated[b],
                                           dim=1,
                                           eps=1e-6)
            #     all_box_pairs_score.append(scores_i)
            #
            # for b in range(batch_size):
            #     scores_i = all_box_pairs_score[b]
            proposals_i = proposals[b]
            _, order_i = torch.sort(scores_i,
                                    descending=True)  # from big to small
            if pre_nms_topN > 0 and pre_nms_topN < scores_i.numel():
                order_single = order_i[:pre_nms_topN]
            else:
                order_single = order_i

            proposals_single = proposals_i[order_single, :]
            scores_single = scores_i[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            if not use_gt_boxes:

                proposals_subject = rois[b][proposals_single[:, 0], :][:, 1:5]
                proposals_object = rois[b][proposals_single[:, 1], :][:, 1:5]

                rel_rois_final = torch.cat(
                    (proposals_subject, proposals_object), 1)

                keep_idx_i = nms(rel_rois_final, scores_single.squeeze(1),
                                 1).long().view(-1)

                keep_idx_i = keep_idx_i.long().view(-1)

                if post_nms_topN > 0:
                    keep_idx_i = keep_idx_i[:post_nms_topN]
                proposals_single = proposals_single[keep_idx_i, :]
                scores_single = scores_single[keep_idx_i, :]
            else:
                proposals_single = proposals_single[:post_nms_topN, :]
                scores_single = scores_single[:post_nms_topN, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[b, :num_proposal, 0] = b
            output[b, :num_proposal,
                   1:5] = rois[b][proposals_single[:, 0], :][:, 1:5]
            output[b, :num_proposal,
                   5:] = rois[b][proposals_single[:, 1], :][:, 1:5]
            output_score[b, :num_proposal, 0] = scores_single.squeeze()
            output_proposals[b, :num_proposal, :] = proposals_single
        return output, output_proposals, output_score
def get_detections_from_im(fasterRCNN,
                           classes,
                           im_file,
                           args,
                           conf_thresh=0.2):
    """obtain the image_info for each image,
    im_file: the path of the image

    return: dict of {'image_id', 'image_h', 'image_w', 'num_boxes', 'boxes', 'features'}
    boxes: the coordinate of each box
    """
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()

    #load images
    # im = cv2.imread(im_file)
    im_in = np.array(imread(im_file))
    if len(im_in.shape) == 2:
        im_in = im_in[:, :, np.newaxis]
        im_in = np.concatenate((im_in, im_in, im_in), axis=2)
    # rgb -> bgr
    im = im_in[:, :, ::-1]

    vis = True

    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()
    # pdb.set_trace()
    det_tic = time.time()

    # the region features[box_num * 2048] are required.
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label, pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, pool_feat = True)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()

    max_conf = torch.zeros((pred_boxes.shape[0]))
    if args.cuda > 0:
        max_conf = max_conf.cuda()

    if vis:
        im2show = np.copy(im)
    for j in xrange(1, len(classes)):
        inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            index = inds[order[keep]]
            max_conf[index] = torch.where(scores[index, j] > max_conf[index],
                                          scores[index, j], max_conf[index])
            if vis:
                im2show = vis_detections(im2show, classes[j],
                                         cls_dets.cpu().numpy(), 0.5)

    if args.cuda > 0:
        keep_boxes = torch.where(max_conf >= conf_thresh, max_conf,
                                 torch.tensor(0.0).cuda())
    else:
        keep_boxes = torch.where(max_conf >= conf_thresh, max_conf,
                                 torch.tensor(0.0))
    keep_boxes = torch.squeeze(torch.nonzero(keep_boxes), dim=-1)
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]

    objects = torch.argmax(scores[keep_boxes][:, 1:], dim=1)
    box_dets = np.zeros((len(keep_boxes), 4))
    boxes = pred_boxes[keep_boxes]
    name_list = []
    box_caption_feature = np.zeros((len(keep_boxes), 300))
    box_caption_mask = np.ones(len(keep_boxes))
    for i in range(len(keep_boxes)):
        kind = objects[i] + 1
        bbox = boxes[i, kind * 4:(kind + 1) * 4]
        tmp_dets = np.array(bbox.cpu())
        if (tmp_dets[2] - tmp_dets[0]) * (tmp_dets[3] - tmp_dets[1]) <= 10:
            box_caption_mask[i] = 0
        class_name = classes[1:][objects[i]]
        box_dets[i] = tmp_dets
        name_list.append(class_name)
        doc = nlp1(class_name)
        token_vector = nlp2(doc[0].text).vector
        box_caption_feature[i, :] = token_vector

    return {
        'image_h': np.size(im, 0),
        'image_w': np.size(im, 1),
        'num_boxes': len(keep_boxes),
        #'boxes': box_dets, # region shape 4 * 36, 4 is the xy positions
        #'features': (pooled_feat[keep_boxes].cpu()).detach().numpy(),
        'text': name_list,
        #'text_feature': box_caption_feature,
        # 'text_mask': box_caption_mask
    }
Exemplo n.º 11
0
def run_model(support_im_paths, query_path, cnt_shot, output_path_folder):
    # support
    # support_root_dir = 'datasets/supports'
    # class_dir = 'horse'
    # n_shot = 2
    # im_paths = list(Path(os.path.join(support_root_dir, class_dir)).glob('*.jpg'))
    CWD = os.getcwd()

    print(support_im_paths)
    n_shot = len(support_im_paths)
    random.seed(0)
    im_path_list = random.sample(support_im_paths, k=n_shot)
    im_list = []
    #fig = plt.figure(num=None, figsize=(8, 8), dpi=50, facecolor='w', edgecolor='k')
    for i, im_path in enumerate(im_path_list):
        im = Image.open(im_path)
        im_list.append(np.asarray(im))
    support_data = support_im_preprocess(im_list, cfg, 320, n_shot)

    # query
    im = np.asarray(Image.open(query_path))
    im2show = im.copy()
    query_data, im_info, gt_boxes, num_boxes = query_im_preprocess(im, cfg)

    # prepare data
    data = [query_data, im_info, gt_boxes, num_boxes, support_data]
    im_data, im_info, num_boxes, gt_boxes, support_ims = prepare_variable()
    with torch.no_grad():
        im_data.resize_(data[0].size()).copy_(data[0])
        im_info.resize_(data[1].size()).copy_(data[1])
        gt_boxes.resize_(data[2].size()).copy_(data[2])
        num_boxes.resize_(data[3].size()).copy_(data[3])
        support_ims.resize_(data[4].size()).copy_(data[4])

    # model
    cfg_from_list(
        ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'])
    model_dir = os.path.join(CWD, 'models')
    load_path = os.path.join(model_dir,
                             'faster_rcnn_{}_{}_{}.pth'.format(1, 11, 34467))

    model = get_model('multi', load_path, n_shot)

    start_time = time.time()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims, gt_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    box_deltas = bbox_pred.data

    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(1, -1, 4)

    pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
    pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    # re-scale boxes to the origin img scale
    pred_boxes /= data[1][0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    thresh = 0.05
    inds = torch.nonzero(scores[:, 1] > thresh).view(-1)
    cls_scores = scores[:, 1][inds]
    _, order = torch.sort(cls_scores, 0, True)
    cls_boxes = pred_boxes[inds, :]
    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
    cls_dets = cls_dets[order]
    keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
    cls_dets = cls_dets[keep.view(-1).long()]

    for i in range(cls_dets.shape[0]):
        w = cls_dets[i, 2] - cls_dets[i, 0]
        h = cls_dets[i, 3] - cls_dets[i, 1]
        if w > 0.5 * im2show.shape[1] or h > 0.5 * im2show.shape[0]:
            cls_dets[i, 4] = 0

    end_time = time.time()

    im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.5)

    output_path = os.path.join(output_path_folder,
                               'result' + str(cnt_shot) + '.jpg')
    cv2.imwrite(output_path, im2show[:, :, ::-1])
    print(cls_dets)
    print(end_time - start_time)
    def forward(self, input):
        """
        for each (H, W) location i
            generate 9 anchor boxes centered on cell i
            finetune the for the 9 anchors at cell i bbox by predicted bbox deltas
        H = feat_h = h/16
        W = feat_w = w/16

        @param input: a tuple (rpn_cls_prob,    rpn_bbox_pred,   im_info,  cfg_key) whose shape is
                              ((batch,18,H,W), (batch,36,H,W),  (batch,2), 'train/test')
        @return: rois (batch, 2000, 5), 2000 training proposals, each row is [batch_ind, x1, y1, x2, y2]
        """

        # take the positive (object) scores
        scores = input[0][:, self._num_anchors:, :, :]  # (batch, 9, H, W)
        bbox_deltas = input[1]  # (batch, 36, H, W)
        im_info = input[2]  # (batch, 2)
        cfg_key = input[3]  # 'train/test'

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # 6000 for train
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N  # 300 for test
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # 0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # 16
        batch_size = bbox_deltas.size(0)  # batch

        # compute the shift value for H*W cells
        feat_height, feat_width = scores.size(2), scores.size(3)  # H, W
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()  # (H*W, 4)

        # copy and shift the 9 anchors for H*W cells
        # copy the H*W*9 anchors for batch images
        A = self._num_anchors  # 9
        K = shifts.size(0)  # H * W
        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 4) + shifts.view(
            K, 1, 4)  # (H*W, 9, 4) anchors for 1 image
        anchors = anchors.view(1, K * A, 4).expand(
            batch_size, K * A, 4)  # (batch, H*W*9, 4) anchors for batch images

        # make bbox_deltas the same order with the anchors:
        bbox_deltas = bbox_deltas.permute(
            0, 2, 3, 1).contiguous()  # (batch, 36, H, W) --> (batch, H, W, 36)
        bbox_deltas = bbox_deltas.view(
            batch_size, -1, 4)  # (batch, H, W, 36) --> (batch, H*W*9, 4)

        # Same story for the scores:
        scores = scores.permute(
            0, 2, 3, 1).contiguous()  # (batch, 9, H, W) --> (batch, H, W, 9)
        scores = scores.view(batch_size,
                             -1)  # (batch, H, W, 9) --> (batch, H*W*9)

        # Finetune [x1, y1, x2, y2] of anchors according to the predicted bbox_delta
        proposals = bbox_transform_inv(anchors, bbox_deltas,
                                       batch_size)  # (batch, H*W*9, 4)

        # 2. clip predicted boxes to the image, make sure [x1, y1, x2, y2] are within the image [h, w]
        proposals = clip_boxes(proposals, im_info,
                               batch_size)  # (batch, H*W*9, 4)
        scores_keep = scores
        proposals_keep = proposals

        # 3. remove predicted bboxes whose height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        _, order = torch.sort(scores_keep, 1, True)  # high score to low score

        # initialise the proposals by zero tensor
        output = scores.new(batch_size, post_nms_topN, 5).zero_()

        # for each image
        for i in range(batch_size):
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]
            order_single = order[i]

            # 5. take top pre_nms_topN proposals before NMS (e.g. 6000)
            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]
            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply NMS (e.g. threshold = 0.7)
            keep_idx_i = nms(proposals_single, scores_single.squeeze(1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # 7. take after_nms_topN proposals after NMS (e.g. 300 for test, 2000 for train)
            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]

            # 8. return the top proposals (-> RoIs top)
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # 9. padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output  # (batch, 2000, 5) 2000 training proposals, each row is [batch_ind, x1, y1, x2, y2]
Exemplo n.º 13
0
def test_net(model=None, image=None, params=None, bg=None, cls=None):
    blob, scale, label = params
    with torch.no_grad():  # pre-processing data for passing net
        im_data = Variable(torch.FloatTensor(1))
        im_info = Variable(torch.FloatTensor(1))
        num_boxes = Variable(torch.LongTensor(1))
        gt_boxes = Variable(torch.FloatTensor(1))

    im_info_np = np.array([[blob.shape[1], blob.shape[2], scale[0]]], dtype=np.float32)
    im_data_pt = torch.from_numpy(blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():  # resize
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes)  # predict

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if opt.TEST_BBOX_REG:
        box_deltas = bbox_pred.data
        if opt.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if opt.cuda:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS).cuda()
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS) \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS)

            box_deltas = box_deltas.view(1, -1, 4 * len(label))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= scale[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    image = np.copy(image[:, :, ::-1])
    demo = image.copy()
    bubbles = []
    dets_bubbles = []

    for j in range(1, len(label)):
        inds = torch.nonzero(scores[:, j] > opt.THRESH).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], opt.TEST_NMS)
            cls_dets = cls_dets[keep.view(-1).long()].cpu().numpy()

            #  post-processing : get contours of speech bubble
            demo, image, bubbles, dets_bubbles = bubble_utils.get_cnt_bubble(image, image.copy(), label[j], cls_dets,
                                                                             cls, bg=bg)
    return demo, image, bubbles, dets_bubbles
Exemplo n.º 14
0
def predict1():
    data = {"success": False}
    im_info1 = {}
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data, volatile=True)
    im_info = Variable(im_info, volatile=True)
    num_boxes = Variable(num_boxes, volatile=True)
    gt_boxes = Variable(gt_boxes, volatile=True)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()

    start = time.time()
    max_per_image = 100
    thresh = 0.05
    vis = True

    file_dir = os.path.join(basedir, 'upload/')
    print('file_dir',file_dir)
    webcam_num = args.webcam_num
    # Set up webcam or get image directories
    if webcam_num >= 0:
        cap = cv2.VideoCapture(webcam_num)
        num_images = 0
    else:
        imglist = os.listdir(file_dir)
        num_images = len(imglist)

    print('Loaded Photo: {} images.'.format(num_images))

    while (num_images >= 0):
        total_tic = time.time()
        if webcam_num == -1:
            num_images -= 1

        # Get image from the webcam
        if webcam_num >= 0:
            if not cap.isOpened():
                raise RuntimeError("Webcam could not open. Please check connection.")
            ret, frame = cap.read()
            im_in = np.array(frame)
        # Load the demo image
        else:
            im_file = os.path.join(file_dir, imglist[num_images])
            print("im_fileeeeeee",im_file)
            # im = cv2.imread(im_file)
            im_in = np.array(imread(im_file))
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im = im_in[:, :, ::-1]

        blobs, im_scales = _get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        with torch.no_grad():
            im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.resize_(1, 1, 5).zero_()
            num_boxes.resize_(1).zero_()

        # pdb.set_trace()
        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)



        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    if args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        jindex = []
        info = {}
        info['predictions'] = list()
        filename = os.path.split(im_file)
        print("filename",filename[1])
        info['filename'] = filename[1]
        image1 = Image.open(im_file);
        print('image1.size', image1.size);
        info['width'] = image1.size[0]
        info['height'] = image1.size[1]
        if vis:
            im2show = np.copy(im)
        for j in range(1, len(pascal_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                print('j', j)
                cls_dets.cpu().numpy()
                jindex.append(j)
                if vis:
                    im2show = vis_detections(im2show, j, cls_dets.cpu().numpy(), 0.5)
                    pred = vis_results(j,cls_dets.cpu().numpy(),0.5)
                    print('pred',pred)
                    if(pred!=[]):
                        info['predictions'].append(pred)
                    # print("cls_dets.cpu().numpy()",cls_dets.cpu().numpy())

        # print('cls_dets',cls_dets)
        # box_re = cls_dets.cpu().numpy()
        # print('box_re',box_re)
        # # Loop over the results and add them to the list of returned predictions
        # info = {}
        # filename = os.path.split(im_file)
        # print("filename",filename[1])
        # info['filename'] = filename[1]
        # image1 = Image.open(im_file);
        # print('image1.size', image1.size);
        # info['width'] = image1.size[0]
        # info['height'] = image1.size[1]
        # info['predictions'] = list()
        # j = 0
        # for box in box_re:
        #     r = {"BoxList": [str(i) for i in np.rint(box[:4]).astype(int)]}
        #     r["BoxList"].append(jindex[j])
        #     j=j+1
        #     info['predictions'].append(r)
        #     # Indicate that the request was a success.
        # s = {}
        data["success"] = True
        # s = {im_file: info}
        im_info1[filename[1]]=info
        data['im_info'] = im_info1
        print(data)
        new_data = process(data)
        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
            result_path = os.path.join(file_dir, imglist[num_images][:-4] + "_det.jpg")
            # cv2.imwrite(result_path, im2show)
        else:
            im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
            cv2.imshow("frame", im2showRGB)
            total_toc = time.time()
            total_time = total_toc - total_tic
            frame_rate = 1 / total_time
            print('Frame rate:', frame_rate)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    if webcam_num >= 0:
        cap.release()
        cv2.destroyAllWindows()
    return flask.jsonify(new_data)
Exemplo n.º 15
0
    def detect(self, dataset, foldername, filename, ch, vis, bbox_log):
        image_num = os.path.splitext(filename)[0]
        output_folder = 'output/' + dataset + "_ch" + str(ch)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        total_tic = time.time()

        # im = cv2.imread(im_file)
        im_file = foldername + "/" + filename

        im_in = np.array(imread(im_file))

        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im = im_in[:, :, ::-1]

        blobs, im_scales = _get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)


        with torch.no_grad():
            self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            self.gt_boxes.resize_(1, 1, 5).zero_()
            self.num_boxes.resize_(1).zero_()

        # pdb.set_trace()
        det_tic = time.time()

        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, rois_label = self.fasterRCNN(
            self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.myargs.class_agnostic:
                    if self.myargs.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.myargs.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.myargs.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                if bbox_log:
                    bbox_list = cls_dets.cpu().numpy()
                    for bb in bbox_list:
                        start_x = int(bb[0])
                        start_y = int(bb[1])
                        end_x = int(bb[2])
                        end_y = int(bb[3])
                        confidence = bb[4]
                        if confidence > 0.5:
                            fo.write(
                                str(ch) + "," + image_num + "," + str(start_x) + "," + str(start_y) + "," +
                                str(end_x) + "," + str(end_y) + "," + str(confidence) + "\n"
                            )

                if vis:
                    im2show = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
        #                       .format(num_images + 1, len(imglist), detect_time, nms_time))
        # sys.stdout.flush()
        if vis:
            result_path = os.path.join(output_folder, str(image_num) + ".jpg")
            cv2.imwrite(result_path, im2show)
Exemplo n.º 16
0
def eval_one_dataloader(save_dir_test_out, dataloader_t, fasterRCNN, device, imdb, target_num=0,
                        class_agnostic=False, thresh=0.0, max_per_image=100, return_ap_class=False):

    save_name = save_dir_test_out + '_test_in_'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    data_iter = iter(dataloader_t)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    #fasterRCNN.training = False
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)

        im_data = data[0].to(device)
        im_info = data[1].to(device)
        gt_boxes = data[2].to(device)
        num_boxes = data[3].to(device)
        with torch.no_grad():
            if isinstance(fasterRCNN, frcnn_htcn) or isinstance(fasterRCNN, frcnn_htcn_m):
                det_tic = time.time()
                rois , cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, target_num=target_num)
            elif isinstance(fasterRCNN, frcnn_saito):
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            else:
                det_tic = time.time()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        # d_pred = d_pred.data
        # path = data[4]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()  # [1, 300, 2] -> [300, 2]
        pred_boxes = pred_boxes.squeeze()  # [1, 300, 8] -> [300, 8]
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1)  # [300]
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]  # [300]
                _, order = torch.sort(cls_scores, 0, True)
                if class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]  # [300, 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)  # [300, 5]
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS)
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)  # [N, 1]
                cls_dets = cls_dets[keep.view(-1).long()]  # [N, 5]

                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])  # [M,]
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
        #                  .format(i + 1, num_images, detect_time, nms_time))
        # sys.stdout.flush()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map, ap_per_class = imdb.evaluate_detections(all_boxes, output_dir)
    #fasterRCNN.training =
    del scores
    del boxes
    del all_boxes
    del pred_boxes
    del rois
    del cls_prob
    del bbox_pred
    del rpn_loss_cls
    del rpn_loss_box
    del RCNN_loss_cls
    del RCNN_loss_bbox
    del rois_label


    if return_ap_class:
        return map, ap_per_class
    return map
Exemplo n.º 17
0
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                if args.soft_nms:
                    np_dets = cls_dets.cpu().numpy().astype(np.float32)
                    keep = soft_nms(np_dets, cfg.TEST.SOFT_NMS_METHOD
                                    )  # np_dets will be changed in soft_nms
                    keep = torch.from_numpy(keep).type_as(cls_dets).int()
                    cls_dets = torch.from_numpy(np_dets).type_as(cls_dets)
                else:
                    keep = nms(cls_dets, cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
Exemplo n.º 18
0
    def forward(self, input):

        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            keep_idx_i = nms(proposals_single, scores_single.squeeze(1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Exemplo n.º 19
0
def test_net(fasterRCNN, image, img_blob, img_scales, items, labels, i):
    im_data, im_info, num_boxes, gt_boxes = items
    im_info_np = np.array(
        [[img_blob.shape[1], img_blob.shape[2], img_scales[0]]],
        dtype=np.float32)
    im_data_pt = torch.from_numpy(img_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if config.TEST_BBOX_REG:
        box_deltas = bbox_pred.data
        if config.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if config.cuda:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS).cuda()
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS) \
                             + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS)

            box_deltas = box_deltas.view(1, -1, 4 * len(labels))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= img_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    copy_img = np.copy(image[:, :, ::-1])
    bubbles = []
    for j in range(1, len(labels)):
        inds = torch.nonzero(scores[:, j] > config.THRESH).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], config.TEST_NMS)
            cls_dets = cls_dets[keep.view(-1).long()]

            copy_img, vis_img, bubbles, boxes = sbd_utils.divideBubbleFromImage(
                copy_img,
                image[:, :, ::-1],
                labels[j],
                cls_dets.cpu().numpy(),
                config.CLASS_THRESH,
                bg=config.BACKGROUND)

    copy_img, vis_img, cuts = sbd_utils.divideCutFromImage(
        copy_img, image[:, :, ::-1], i, bg=config.BACKGROUND)
    alpha_image = sbd_utils.addImageToAlphaChannel(copy_img,
                                                   copy_img,
                                                   FLAG='conversion')
    vis_img, texts = text.detection(vis_img, bubbles, boxes)
    return alpha_image, vis_img, cuts, bubbles, texts
Exemplo n.º 20
0
def test(args, model=None):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Load dataset
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False)
    imdb_vu.competition_mode(on=True)
    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu._classes,
                                training=False)

    # initilize the network here.
    if not model:
        if args.net == 'vgg16':
            fasterRCNN = vgg16(imdb_vu.classes,
                               pretrained=False,
                               class_agnostic=args.class_agnostic)
        elif args.net == 'res101':
            fasterRCNN = resnet(imdb_vu.classes,
                                101,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res50':
            fasterRCNN = resnet(imdb_vu.classes,
                                50,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        elif args.net == 'res152':
            fasterRCNN = resnet(imdb_vu.classes,
                                152,
                                pretrained=False,
                                class_agnostic=args.class_agnostic)
        else:
            print("network is not defined")
        fasterRCNN.create_architecture()

        # Load checkpoint
        print("load checkpoint %s" % (args.weights))
        checkpoint = torch.load(args.weights)
        fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')
    else:
        # evaluate constructed model
        fasterRCNN = model

    # initialize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()
        im_data = im_data.cuda()
        query = query.cuda()
        im_info = im_info.cuda()
        catgory = catgory.cuda()
        gt_boxes = gt_boxes.cuda()

    # record time
    start = time.time()

    # visiualization
    vis = args.vis if hasattr(args, 'vis') else None
    if vis:
        thresh = 0.05
    else:
        thresh = 0.0
    max_per_image = 100

    fasterRCNN.eval()
    dataset_vu.query_position = 0
    test_scales = cfg.TEST.SCALES
    multiscale_iterators = []
    for i_scale, test_scale in enumerate(test_scales):
        cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu,
                                                        batch_size=1,
                                                        shuffle=False,
                                                        num_workers=0,
                                                        pin_memory=True)
        cur_data_iter_vu = iter(cur_dataloader_vu)
        multiscale_iterators.append(cur_data_iter_vu)

    # total quantity of testing images, each images include multiple detect class
    num_images_vu = len(imdb_vu.image_index)
    num_detect = len(ratio_index_vu[0])

    all_boxes = [[[] for _ in range(num_images_vu)]
                 for _ in range(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}

    for i, index in enumerate(ratio_index_vu[0]):
        det_tic = time.time()
        multiscale_boxes = []
        multiscale_scores = []
        for i_scale, (data_iter_vu, test_scale) in enumerate(
                zip(multiscale_iterators, test_scales)):
            # need to rewrite cfg.TRAIN.SCALES - very hacky!
            BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES
            cfg.TRAIN.SCALES = [test_scale]
            data = next(data_iter_vu)
            cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES

            with torch.no_grad():
                im_data.resize_(data[0].size()).copy_(data[0])
                query.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                catgory.data.resize_(data[4].size()).copy_(data[4])

                # Run Testing
                if not hasattr(args, "class_image_augmentation"
                               ) or not args.class_image_augmentation:
                    queries = [query]
                elif args.class_image_augmentation.lower() == "rotation90":
                    queries = [query]
                    for _ in range(3):
                        queries.append(queries[-1].rot90(1, [2, 3]))
                else:
                    raise RuntimeError(
                        "Unknown class_image_augmentation: {}".format(
                            args.class_image_augmentation))

                for q in queries:
                    rois, cls_prob, bbox_pred, \
                    rpn_loss_cls, rpn_loss_box, \
                    RCNN_loss_cls, _, RCNN_loss_bbox, \
                    rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory)

                    scores = cls_prob.data
                    boxes = rois.data[:, :, 1:5]

                    # Apply bounding-box regression
                    if cfg.TEST.BBOX_REG:
                        # Apply bounding-box regression deltas
                        box_deltas = bbox_pred.data
                        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                            # Optionally normalize targets by a precomputed mean and stdev
                            if args.class_agnostic:
                                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                                box_deltas = box_deltas.view(1, -1, 4)
                            else:
                                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                                box_deltas = box_deltas.view(
                                    1, -1, 4 * len(imdb_vu.classes))

                        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
                    else:
                        # Simply repeat the boxes, once for each class
                        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

                    # Resize to original ratio
                    pred_boxes /= data[2][0][2].item()

                    # Remove batch_size dimension
                    scores = scores.squeeze()
                    pred_boxes = pred_boxes.squeeze()

                    multiscale_scores.append(scores)
                    multiscale_boxes.append(pred_boxes)

        scores = torch.cat(multiscale_scores, dim=0)
        pred_boxes = torch.cat(multiscale_boxes, dim=0)

        # Record time
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        # Post processing
        inds = torch.nonzero(scores > thresh).view(-1)
        if inds.numel() > 0:
            # remove useless indices
            cls_scores = scores[inds]
            cls_boxes = pred_boxes[inds, :]
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)

            # rearrange order
            _, order = torch.sort(cls_scores, 0, True)
            cls_dets = cls_dets[order]

            # NMS
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            all_boxes[catgory][index] = cls_dets.cpu().numpy()

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                try:
                    image_scores = all_boxes[catgory][index][:, -1]
                    if len(image_scores) > max_per_image:
                        image_thresh = np.sort(image_scores)[-max_per_image]

                        keep = np.where(
                            all_boxes[catgory][index][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[catgory][index] = all_boxes[catgory][index][
                            keep, :]
                except:
                    pass

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                .format(i + 1, num_detect, detect_time, nms_time))
            sys.stdout.flush()

            # save test image
            if vis and i % 1 == 0:
                im2show = cv2.imread(
                    dataset_vu._roidb[dataset_vu.ratio_index[i]]['image'])
                im2show = vis_detections(im2show, 'shot',
                                         cls_dets.cpu().numpy(), 0.3)

                o_query = data[1][0].permute(1, 2,
                                             0).contiguous().cpu().numpy()
                o_query *= [0.229, 0.224, 0.225]
                o_query += [0.485, 0.456, 0.406]
                o_query *= 255
                o_query = o_query[:, :, ::-1]

                (h, w, c) = im2show.shape
                o_query = cv2.resize(o_query, (h, h),
                                     interpolation=cv2.INTER_LINEAR)
                im2show = np.concatenate((im2show, o_query), axis=1)

                vis_path = "./test_img"
                if not os.path.isdir(vis_path):
                    os.makedirs(vis_path)
                cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show)

    print('Evaluating detections')
    mAP = imdb_vu.evaluate_detections(all_boxes, None)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    return mAP
Exemplo n.º 21
0
            inds = torch.nonzero(scores[:, j] > thresh,
                                 as_tuple=False).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) # 0.4.0 version
                keep = nms(cls_dets[:, :4], cls_dets[:, 4], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, pascal_classes[j],
                                             cls_dets.cpu().numpy(), 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                             .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            # cv2.imshow('test', im2show)
Exemplo n.º 22
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        ########################################################
        ##########
        # save anchor
        ##########
        # _, order = torch.sort(scores, 1, True)
        # anchors_to_save = anchors[0][order[0]].clone().cpu().numpy()
        # scores_to_save = scores[0][order[0]].clone().cpu().numpy()
        # np.save('/home/tony/FSOD/output/visualization/anchors.npy', anchors_to_save)
        # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save)
        ########################################################

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)
        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            #########################################################
            ##########
            # save aproposals (before nms)
            ##########
            # proposals_to_save = proposals_single.clone().cpu().numpy()
            # scores_to_save = scores_single.clone().cpu().numpy()
            # np.save('/home/tony/FSOD/output/visualization/proposals.npy', proposals_to_save)
            # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save)
            #########################################################

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            keep_idx_i = nms(proposals_single, scores_single.squeeze(1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            ########################################################
            ##########
            # save aproposals (after nms)
            ##########
            # proposals_to_save = proposals_single.clone().cpu().numpy()
            # scores_to_save = scores_single.clone().cpu().numpy()
            # np.save('/home/tony/FSOD/output/visualization/proposals.npy', proposals_to_save)
            # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save)
            ########################################################

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
def validation(val_dataloader, epoch, model_name, val_imdb, args):
    val_imdb.competition_mode(on=True)
    print('Start Validation')
    val_fasterRCNN = resnet(val_imdb.classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    val_fasterRCNN.create_architecture()

    print("load checkpoint %s" % model_name)
    checkpoint = torch.load(model_name)
    val_fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    if args.cuda:
        val_im_data = torch.FloatTensor(1).cuda()
        val_im_info = torch.FloatTensor(1).cuda()
        val_num_boxes = torch.LongTensor(1).cuda()
        val_gt_boxes = torch.FloatTensor(1).cuda()
        val_fasterRCNN.cuda()
        cfg.CUDA = True
    else:
        val_im_data = torch.FloatTensor(1)
        val_im_info = torch.FloatTensor(1)
        val_num_boxes = torch.LongTensor(1)
        val_gt_boxes = torch.FloatTensor(1)

    val_im_data = Variable(val_im_data)
    val_im_info = Variable(val_im_info)
    val_num_boxes = Variable(val_num_boxes)
    val_gt_boxes = Variable(val_gt_boxes)

    start = time.time()
    # 每张图像最大目标检测数量
    max_per_image = 100

    thresh = 0.0

    save_name = 'val_' + args.exp_group
    num_images = len(val_imdb.image_index)
    # 创建[[[],[]...[]],[[],[]...[]]] 1,2,200
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(val_imdb.num_classes)]

    output_dir = get_output_dir(val_imdb, save_name)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    save_dir = os.path.join(output_dir, f"PRCurves_{args.exp_group}")
    os.makedirs(save_dir, exist_ok=True)
    det_file = os.path.join(save_dir, f'epoch_{epoch}_detections.pkl')

    val_fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i, data in enumerate(val_dataloader):
        with torch.no_grad():
            val_im_data.resize_(data[0].size()).copy_(data[0])
            val_im_info.resize_(data[1].size()).copy_(data[1])
            val_gt_boxes.resize_(data[2].size()).copy_(data[2])
            val_num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        val_rois, val_cls_prob, val_bbox_pred, \
        val_rpn_loss_cls, val_rpn_loss_box, val_RCNN_loss_cls, \
        val_RCNN_loss_bbox, val_rois_label = val_fasterRCNN(val_im_data, val_im_info, val_gt_boxes, val_num_boxes)

        scores = val_cls_prob.data
        boxes = val_rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = val_bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(val_imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, val_im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()

        for j in range(1, val_imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([
                all_boxes[j][i][:, -1] for j in range(1, val_imdb.num_classes)
            ])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, val_imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format(
            i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    map = val_imdb.evaluate_detections(all_boxes, epoch, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))

    return map
Exemplo n.º 24
0
def main(args):
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda
    np.random.seed(cfg.RNG_SEED)

    pascal_classes = np.asarray(['__background__', 'targetobject', 'hand'])
    args.set_cfgs = [
        'ANCHOR_SCALES', '[8, 16, 32, 64]', 'ANCHOR_RATIOS', '[0.5, 1, 2]'
    ]

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
        raise Exception

    fasterRCNN.create_architecture()

    load_name = 'models/res101_handobj_100K/pascal_voc/faster_rcnn_1_8_132028.pth'

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))

    fasterRCNN.load_state_dict(checkpoint['model'])

    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY

    def _get_image_blob(im):
        """Converts an image into a network input.
        Arguments:
          im (ndarray): a color image in BGR order
        Returns:
          blob (ndarray): a data blob holding an image pyramid
          im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
        """
        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS

        im_shape = im_orig.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        processed_ims = []
        im_scale_factors = []

        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_min)
            # Prevent the biggest axis from being more than MAX_SIZE
            if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
            im = cv2.resize(im_orig,
                            None,
                            None,
                            fx=im_scale,
                            fy=im_scale,
                            interpolation=cv2.INTER_LINEAR)
            im_scale_factors.append(im_scale)
            processed_ims.append(im)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)

        return blob, np.array(im_scale_factors)

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    box_info = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    with torch.no_grad():
        if args.cuda > 0:
            cfg.CUDA = True

        if args.cuda > 0:
            fasterRCNN.cuda()

        fasterRCNN.eval()

    with torch.no_grad():
        start = time.time()
        max_per_image = 100
        thresh_hand = args.thresh_hand
        thresh_obj = args.thresh_obj
        vis = args.vis

        # print(f'thresh_hand = {thresh_hand}')
        # print(f'thnres_obj = {thresh_obj}')

        webcam_num = args.webcam_num
        # Set up webcam or get image directories
        if webcam_num >= 0:
            cap = cv2.VideoCapture(webcam_num)
            num_images = 0
        else:
            print(f'image dir = {args.image_dir}')
            print(f'save dir = {args.save_dir}')
            imglist = os.listdir(args.image_dir)
            num_images = len(imglist)

        print('Loaded Photo: {} images.'.format(num_images))

        while (num_images >= 0):
            total_tic = time.time()
            if webcam_num == -1:
                num_images -= 1

            # Get image from the webcam
            if webcam_num >= 0:
                if not cap.isOpened():
                    raise RuntimeError(
                        "Webcam could not open. Please check connection.")
                ret, frame = cap.read()
                im_in = np.array(frame)
            # Load the demo image
            else:
                im_file = os.path.join(args.image_dir, imglist[num_images])
                im_in = np.array(imread(im_file))
                # resize
                # im_in = np.array(Image.fromarray(im_in).resize((640, 360)))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            with torch.no_grad():
                im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
                im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
                gt_boxes.resize_(1, 1, 5).zero_()
                num_boxes.resize_(1).zero_()
                box_info.resize_(1, 1, 5).zero_()

                # pdb.set_trace()
            det_tic = time.time()
            print(im_data.shape)
            rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
                loss_list = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, box_info)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            # extact predicted params
            contact_vector = loss_list[0][0]  # hand contact state info
            offset_vector = loss_list[1][0].detach(
            )  # offset vector (factored into a unit vector and a magnitude)
            lr_vector = loss_list[2][0].detach()  # hand side info (left/right)

            # get hand contact
            _, contact_indices = torch.max(contact_vector, 2)
            contact_indices = contact_indices.squeeze(0).unsqueeze(-1).float()

            # get hand side
            lr = torch.sigmoid(lr_vector) > 0.5
            lr = lr.squeeze(0).float()

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(
                                -1, 4) * torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_STDS
                                ) + torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(
                                -1, 4) * torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_STDS
                                ) + torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            print(detect_time)
            if vis:
                im2show = np.copy(im)
            obj_dets, hand_dets = None, None
            for j in xrange(1, len(pascal_classes)):
                # inds = torch.nonzero(scores[:,j] > thresh).view(-1)
                if pascal_classes[j] == 'hand':
                    inds = torch.nonzero(scores[:, j] > thresh_hand,
                                         as_tuple=False).view(-1)
                elif pascal_classes[j] == 'targetobject':
                    inds = torch.nonzero(scores[:, j] > thresh_obj,
                                         as_tuple=False).view(-1)

                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat(
                        (cls_boxes,
                         cls_scores.unsqueeze(1), contact_indices[inds],
                         offset_vector.squeeze(0)[inds], lr[inds]), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if pascal_classes[j] == 'targetobject':
                        obj_dets = cls_dets.cpu().numpy()
                    if pascal_classes[j] == 'hand':
                        hand_dets = cls_dets.cpu().numpy()

            if vis:
                # visualization
                im2show = vis_detections_filtered_objects_PIL(
                    im2show, obj_dets, hand_dets, thresh_hand, thresh_obj)

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            if webcam_num == -1:
                sys.stdout.write(
                    'im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r'.format(
                        num_images + 1, len(imglist), detect_time, nms_time))
                sys.stdout.flush()

            if vis and webcam_num == -1:

                folder_name = args.save_dir
                os.makedirs(folder_name, exist_ok=True)
                result_path = os.path.join(
                    folder_name, imglist[num_images][:-4] + "_det.png")
                im2show.save(result_path)
            else:
                im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
                cv2.imshow("frame", im2showRGB)
                total_toc = time.time()
                total_time = total_toc - total_tic
                frame_rate = 1 / total_time
                print('Frame rate:', frame_rate)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        if webcam_num >= 0:
            cap.release()
            cv2.destroyAllWindows()
Exemplo n.º 25
0
def eval_test(fasterRCNN, args, cfg, imdb, dataloader, output_dir):
    # initialize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    start = time.time()
    max_per_image = 100

    vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = "faster_rcnn_10"
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    data_iter = iter(dataloader)

    _t = {"im_detect": time.time(), "misc": time.time()}
    det_file = os.path.join(output_dir, "detections.pkl")

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):
        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

            # im_data.data.resize_(data[0].size()).copy_(data[0])
            # im_info.data.resize_(data[1].size()).copy_(data[1])
            # gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            # num_boxes.data.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = fasterRCNN(
            im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() +
                                  torch.FloatTensor(
                                      cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda())
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() +
                                  torch.FloatTensor(
                                      cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda())
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write("im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r".format(
            i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite("result.png", im2show)
            pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)
    with open(det_file, "wb") as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print("Evaluating detections")
    imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
    if "coco" in args.dataset:
        return imdb.coco_eval
Exemplo n.º 26
0
def seal_detection(image_path):
    
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data, volatile=True)
    im_info = Variable(im_info, volatile=True)
    num_boxes = Variable(num_boxes, volatile=True)
    gt_boxes = Variable(gt_boxes, volatile=True)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()
    
    fasterRCNN.eval()
    
    # imglist = os.listdir(images_path)                    #Beginning Load Images 
    # num_images = len(imglist)
    # print('imglist:', imglist)
    # print('num_images:', num_images)
    # print('Loaded Photo: {} images.'.format(num_images))
    # im_file = os.path.join(args.image_dir, imglist[num_images-1])
    # im_file = images_path
    #modified
    # print('im_file', im_file)
    
    # edited 
    im_in = np.array(imread(image_path))
    if len(im_in.shape) == 2:
        im_in = im_in[:,:,np.newaxis]
        im_in = np.concatenate((im_in,im_in,im_in), axis=2)
    # rgb -> bgr
    im = im_in[:,:,::-1]
    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():
            im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.resize_(1, 1, 5).zero_()
            num_boxes.resize_(1).zero_()

    # pdb.set_trace()
    det_tic = time.time()

    try:
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
    except:
        print(imglist[num_images])

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if args.class_agnostic:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))
        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))
    
    pred_boxes /= im_scales[0]
    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()
    result_bbox = []
    score_bbox = []
    for j in xrange(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:,j]>thresh).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:,j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            dets = cls_dets.cpu().numpy()
            for i in range(np.minimum(10, dets.shape[0])):
                bbox = tuple(int(np.round(x)) for x in dets[i, :4])
                score = dets[i, -1]
                result_bbox.append(bbox)
                score_bbox.append(score)
    return result_bbox, score_bbox
            for j in xrange(1, imdb.num_classes):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, imdb.classes[j],
                                                 cls_dets.cpu().numpy(), 0.3)
                    all_boxes[j][i] = cls_dets.cpu().numpy()
                else:
                    all_boxes[j][i] = empty_array

            # Limit to max_per_image detections *over all classes*
            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][i][:, -1]
                    for j in xrange(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
Exemplo n.º 28
0
def eval_frcnn(frcnn_extra, device, fasterRCNN, is_break=False):
    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(frcnn_extra.output_dir, 'detections.pkl')
    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    data_iter_test = iter(frcnn_extra.dataloader_test)
    for i in range(frcnn_extra.num_images_test):
        data_test = next(data_iter_test)
        im_data = data_test[0].to(device)
        im_info = data_test[1].to(device)
        gt_boxes = data_test[2].to(device)
        num_boxes = data_test[3].to(device)
        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if frcnn_extra.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(frcnn_extra.imdb_test.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data_test[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        for j in range(1, frcnn_extra.imdb_test.num_classes):
            inds = torch.nonzero(scores[:, j] > frcnn_extra.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if frcnn_extra.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                frcnn_extra.all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                frcnn_extra.all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if frcnn_extra.max_per_image > 0:
            image_scores = np.hstack([frcnn_extra.all_boxes[j][i][:, -1]
                                      for j in range(1, frcnn_extra.imdb_test.num_classes)])
            if len(image_scores) > frcnn_extra.max_per_image:
                image_thresh = np.sort(image_scores)[-frcnn_extra.max_per_image]
                for j in range(1, frcnn_extra.imdb_test.num_classes):
                    keep = np.where(frcnn_extra.all_boxes[j][i][:, -1] >= image_thresh)[0]
                    frcnn_extra.all_boxes[j][i] = frcnn_extra.all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic
        if is_break:
            break
    ap = frcnn_extra.imdb_test.evaluate_detections(frcnn_extra.all_boxes, frcnn_extra.output_dir)
    return ap
Exemplo n.º 29
0
def raber_detection(im_in):
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if use_gpu:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()

    thresh = 0.05
    vis = False

    if len(im_in.shape) == 2:
        im_in = im_in[:, :, np.newaxis]
        im_in = np.concatenate((im_in, im_in, im_in), axis=2)
    # rgb -> bgr
    im = im_in[:, :, ::-1]

    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
    im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
    gt_boxes.data.resize_(1, 1, 5).zero_()
    num_boxes.data.resize_(1).zero_()

    # pdb.set_trace()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
        pred_boxes = _.cuda() if args.cuda > 0 else _

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    if vis:
        im2show = np.copy(im)
    for j in range(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:, j] > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
            keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            if vis:
                im2show = vis_detections(im2show, pascal_classes[j],
                                         cls_dets.cpu().numpy(), 0.5)
    box_results = cls_dets.cpu().numpy()
    if vis:
        # cv2.imshow('test', im2show)
        # cv2.waitKey(0)
        result_path = os.path.join(args.result_dir, "OnlineDet.jpg")
        cv2.imwrite(result_path, im2show)
    return box_results
Exemplo n.º 30
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        # scores = input[0][:, self._num_anchors:, :, :]
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1

        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios,
                feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        # bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        # bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
        #
        # # Same story for the scores:
        # scores = scores.permute(0, 2, 3, 1).contiguous()
        # scores = scores.view(batch_size, -1)
        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            # keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh)

            keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output