示例#1
0
def prosess(img_path):
    im_in = np.array(imread(img_path))
    # to RGB
    im = im_in[:, :, ::-1]
    # get input inform
    blobs, im_scales = _get_image_blob(im)
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    # 准备input
    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
    im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
    gt_boxes.resize_(1, 1, 5).zero_()
    num_boxes.resize_(1).zero_()

    # detection
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    box_deltas = bbox_pred.data
    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                    + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
    box_deltas = box_deltas.view(1, -1, 4 * 2)
    pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
    pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    inds = torch.nonzero(scores[:, 1] > 0.05).view(-1)

    # show image
    im2show = np.copy(im)

    # if there is det
    if inds.numel() > 0:
        cls_scores = scores[:, 1][inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = pred_boxes[inds][:, 4:8]

        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        cls_dets = cls_dets[order]
        # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
        keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
        cls_dets = cls_dets[keep.view(-1).long()]

        # 调试可视化
        im2show = vis_detections(im2show, 'tampered',
                                 cls_dets.cpu().numpy(), 0.5)

        # 输出结果
        # cv2.imshow('test', im2show)
        # cv2.waitKey(0)
    else:
        print('No bbox!')

    # 暂存结果
    cv2.imwrite('temp.jpg', im2show)

    # to PIL
    img = Image.open('temp.jpg')
    img = img.resize((640, 480))

    img = ImageTk.PhotoImage(img)
    label_img.config(image=img)
    label_img.image = img
    pass
示例#2
0
def eval_result(args, logger, epoch, output_dir):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    args.batch_size = 1
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)

    imdb.competition_mode(on=True)

    load_name = os.path.join(output_dir,
                             'thundernet_epoch_{}.pth'.format(epoch, ))

    layer = int(args.net.split("_")[1])
    _RCNN = snet(imdb.classes,
                 layer,
                 pretrained=False,
                 class_agnostic=args.class_agnostic)

    _RCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
    _RCNN.load_state_dict(checkpoint['model'])

    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable (PyTorch 0.4.0+)
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        _RCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = True

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'thundernet'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=False, normalize=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    _RCNN.eval()

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    for i in range(num_images):

        data = next(data_iter)

        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        with torch.no_grad():
            time_measure, \
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(args.batch_size, -1,
                                                 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if vis:
                    vis_detections(im2show, imdb.classes[j],
                                   color_list[j].tolist(),
                                   cls_dets.cpu().numpy(), 0.6)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write(
            'im_detect: {:d}/{:d}\tDetect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s)\tNMS: {:.3f}s\r' \
            .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2],
                    time_measure[3], nms_time))
        sys.stdout.flush()

        if vis and i % 200 == 0 and args.use_tfboard:
            im2show = im2show[:, :, ::-1]
            logger.add_image('pred_image_{}'.format(i),
                             trans.ToTensor()(Image.fromarray(
                                 im2show.astype('uint8'))),
                             global_step=i)

            # cv2.imwrite('result.png', im2show)
            # pdb.set_trace()
            # cv2.imshow('test', im2show)
            # cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    ap_50 = imdb.evaluate_detections(all_boxes, output_dir)
    logger.add_scalar("map_50", ap_50, global_step=epoch)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
示例#3
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        _bbox_pred = bbox_pred.clone()

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        ## Get global and local region from Faster R-CNN
        cuda = True
        pascal_classes = np.array([
            '__background__', 'Blue mackerel', 'Chub mackerel', 'Hybrid',
            'Blue mackerel redline', 'Chub mackerel redline', 'Hybrid redline'
        ])

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        box_deltas = _bbox_pred.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if self.class_agnostic:
                if cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        print(pred_boxes.data.cpu().numpy().shape)
        print(scores.data.cpu().numpy().shape)
        print(scores)
        # get global region
        thresh = 0.05

        region_g = np.ndarray((0, 5))
        region_l = np.ndarray((0, 5))
        for j in range(1, 4):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            inds_l = torch.nonzero(scores[:, j + 3] > thresh).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                cls_scores_l = scores[:, j + 3][inds]
                _, order = torch.sort(cls_scores, 0, True)
                _, order_l = torch.sort(cls_scores_l, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds]
                    cls_boxes_l = pred_boxes[inds_l]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_boxes_l = pred_boxes[inds_l][:,
                                                     (j + 3) * 4:(j + 4) * 4]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets_l = torch.cat(
                    (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order_l]

                ind = np.argmax(cls_dets[..., -1])
                ind_l = np.argmax(cls_dets_l[..., -1])

                region_g = np.vstack((region_g, cls_des[ind]))
                region_g = np.vstack((region_l, cls_dets_l[ind]))

                #keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                #cls_dets = cls_dets[keep.view(-1).long()]
        print(region_g)
        print(region_l)
        region_g = region_g[np.argmax(region_g[..., -1])]
        region_l = region_l[np.argmax(region_l[..., -1])]

        print(region_g)
        print(region_l)

        ## GLCC

        # global region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat_g = self.RCNN_roi_crop(base_feat,
                                               Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_g = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_g = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # local region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat_l = self.RCNN_roi_crop(base_feat,
                                               Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_l = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_l = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        x = torch.cat((pooled_feat_g, pooled_feat_l), dim=0)
        x = self.glcc_conv1(x)
        x = F.Relu()
        x = self.glcc_fc1(x)
        x = F.Relu(x)
        x = nn.Dropout2d()(x)
        x = self.glcc_fc2(x)
        x = F.Relu(x)
        x = nn.Droopout2d()(x)
        x = self.glcc_fc_out(x)

        #GLCC_loss_cls = 0
        #if self.training:
        #    GLCC_loss_cls = F.cross_entropy(x, t)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, x
示例#4
0
        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                # FIXME: pytorch normalize
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im2show = np.copy(im)
        for j in xrange(1, len(classes)):
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(
                            n_legs, batch_size, -1,
                            4 * len(imagenet_vid_classes))
                pred_boxes = bbox_transform_inv_legs(boxes, box_deltas,
                                                     batch_size)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
            else:
                # Simply repeat the boxes, once for each class
                raise NotImplementedError
            trk_box_deltas = tracking_pred.unsqueeze(0).data
            #TODO Check whether this is necessary
            trk_box_deltas = trk_box_deltas.view(-1,4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                    + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            trk_box_deltas = trk_box_deltas.view(1, -1, 4)
            pred_trk_boxes = bbox_transform_inv(trk_boxes, trk_box_deltas, 1)
            pred_trk_boxes = clip_boxes(pred_trk_boxes,
                                        im_info.permute(1, 0, 2)[0].data, 1)

            # Assume scales are same for frames in the same video
            im_scale = im_info.data.squeeze(0)[0][-1]
            #im_scales = im_info[:,:,2].data.contiguous().view(1,-1,1,1).permute(1,0,2,3)
            pred_boxes /= im_scale
            pred_trk_boxes /= im_scale
            # squeeze batch dim
            #scores = scores.squeeze(1)
            #pred_boxes = pred_boxes.squeeze(1)
            #pred_trk_boxes = pred_trk_boxes.squeeze(0)
            # Permute such that we have (frame_sample_id, n_legs, n_boxes, ...)
            pred_boxes = pred_boxes.permute(
                1, 0, 2, 3).contiguous()  #2*1*300*4=>1*2*300*4
示例#6
0
    def det_im(self, im_file):
        max_per_image = 100
        thresh = 0.05
        total_tic = time.time()
        # im = cv2.imread(im_file)
        im_in = np.array(imread(im_file))
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im = im_in[:, :, ::-1]

        blobs, im_scales = self._get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        self.im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        self.im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        self.gt_boxes.data.resize_(1, 1, 5).zero_()
        self.num_boxes.data.resize_(1).zero_()

        det_tic = time.time()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.args.class_agnostic:
                    if self.args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.args.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                   + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.vrd_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        im2show = np.copy(im)
        res = {}
        res['box'] = np.zeros((0, 4))
        res['cls'] = []
        res['confs'] = []
        for j in xrange(1, len(self.vrd_classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show = res_detections(im2show, j, self.vrd_classes[j],
                                         cls_dets.cpu().numpy(), res, 0.5)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:.3f}s {:.3f}s   \r'.format(
            detect_time, nms_time))
        sys.stdout.flush()
        cv2.imwrite('img/im_det.jpg', im2show)
        return res
示例#7
0
def objgrasp_inference(o_cls_prob,
                       o_box_output,
                       g_cls_prob,
                       g_box_output,
                       im_info,
                       rois=None,
                       class_agnostic=True,
                       n_classes=None,
                       g_box_prior=None,
                       for_vis=False,
                       topN_g=False,
                       recover_imscale=True):
    """
    :param o_cls_prob: N x N_cls tensor
    :param o_box_output: N x 4 tensor
    :param g_cls_prob: N x K*A x 2 tensor
    :param g_box_output: N x K*A x 5 tensor
    :param im_info: size 4 tensor
    :param rois: N x 4 tensor
    :param g_box_prior: N x K*A * 5 tensor
    :return:

    Note:
    1 This function simultaneously supports ROI-GN with or without object branch. If no object branch, o_cls_prob
    and o_box_output will be none, and object detection results are shown in the form of ROIs.
    2 This function can only detect one image per invoking.
    """
    o_scores = o_cls_prob
    rois = rois[:, 1:5]

    g_scores = g_cls_prob

    if for_vis:
        o_thresh = 0.5
    else:
        o_thresh = 0.
        topN_g = 1

    if not topN_g:
        g_thresh = 0.5
    else:
        g_thresh = 0.

    if rois is None:
        raise RuntimeError("You must specify rois for ROI-GN.")

    if g_box_prior is None:
        raise NotImplementedError(
            "Inference for anchor free algorithms has not been implemented.")

    # infer grasp boxes
    normalizer = {
        'mean': cfg.FCGN.BBOX_NORMALIZE_MEANS,
        'std': cfg.FCGN.BBOX_NORMALIZE_STDS
    }
    g_box_output = box_unnorm_torch(g_box_output,
                                    normalizer,
                                    d_box=5,
                                    class_agnostic=True,
                                    n_cls=None)
    g_box_output = g_box_output.view(g_box_prior.size())
    # N x K*A x 5
    grasp_pred = grasp_decode(g_box_output, g_box_prior)

    # N x K*A x 1
    rois_w = (rois[:, 2] -
              rois[:, 0]).view(-1).unsqueeze(1).unsqueeze(2).expand_as(
                  grasp_pred[:, :, 0:1])
    rois_h = (rois[:, 3] -
              rois[:, 1]).view(-1).unsqueeze(1).unsqueeze(2).expand_as(
                  grasp_pred[:, :, 1:2])
    keep_mask = (grasp_pred[:, :, 0:1] > 0) & (grasp_pred[:, :, 1:2] > 0) & \
                (grasp_pred[:, :, 0:1] < rois_w) & (grasp_pred[:, :, 1:2] < rois_h)
    grasp_scores = g_scores.contiguous().view(rois.size(0), -1, 2)
    # N x 1 x 1
    xleft = rois[:, 0].view(-1).unsqueeze(1).unsqueeze(2)
    ytop = rois[:, 1].view(-1).unsqueeze(1).unsqueeze(2)
    # rois offset
    grasp_pred[:, :, 0:1] = grasp_pred[:, :, 0:1] + xleft
    grasp_pred[:, :, 1:2] = grasp_pred[:, :, 1:2] + ytop
    # N x K*A x 8
    grasp_pred_boxes = labels2points(grasp_pred).contiguous().view(
        rois.size(0), -1, 8)
    # N x K*A
    grasp_pos_scores = grasp_scores[:, :, 1]
    if topN_g:
        # N x K*A
        _, grasp_score_idx = torch.sort(grasp_pos_scores,
                                        dim=-1,
                                        descending=True)
        _, grasp_idx_rank = torch.sort(grasp_score_idx, dim=-1)
        # N x K*A mask
        topn_grasp = topN_g
        grasp_maxscore_mask = (grasp_idx_rank < topn_grasp)
        # N x topN
        grasp_maxscores = grasp_pos_scores[grasp_maxscore_mask].contiguous(
        ).view(rois.size()[:1] + (topn_grasp, ))
        # N x topN x 8
        grasp_pred_boxes = grasp_pred_boxes[grasp_maxscore_mask].view(
            rois.size()[:1] + (topn_grasp, 8))
    else:
        raise NotImplementedError(
            "Now ROI-GN only supports top-N grasp detection for each object.")

    # infer object boxes
    if cfg.TRAIN.COMMON.BBOX_REG:
        if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            normalizer = {
                'mean': cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS,
                'std': cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS
            }
            box_output = box_unnorm_torch(o_box_output, normalizer, 4,
                                          class_agnostic, n_classes)
            pred_boxes = bbox_transform_inv(rois, box_output, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info, 1)
    else:
        pred_boxes = rois.clone()

    if recover_imscale:
        pred_boxes = box_recover_scale_torch(pred_boxes, im_info[3],
                                             im_info[2])
        grasp_pred_boxes = box_recover_scale_torch(grasp_pred_boxes,
                                                   im_info[3], im_info[2])

    all_box = [[]]
    all_grasp = [[]]
    for j in xrange(1, n_classes):
        if class_agnostic or not cfg.TRAIN.COMMON.BBOX_REG:
            cls_boxes = pred_boxes
        else:
            cls_boxes = pred_boxes[:, j * 4:(j + 1) * 4]
        cls_dets, cls_scores, box_keep_inds = box_filter(cls_boxes,
                                                         o_scores[:, j],
                                                         o_thresh,
                                                         use_nms=True)
        cls_dets = np.concatenate((cls_dets, np.expand_dims(cls_scores, -1)),
                                  axis=-1)
        grasps = (grasp_pred_boxes.cpu().numpy())[box_keep_inds]

        if for_vis:
            cls_dets[:, -1] = j
        else:
            grasps = np.squeeze(grasps, axis=1)
        all_box.append(cls_dets)
        all_grasp.append(grasps)

    if for_vis:
        all_box = np.concatenate(all_box[1:], axis=0)
        all_grasp = np.concatenate(all_grasp[1:], axis=0)

    return all_box, all_grasp
示例#8
0
        if cfg.TEST.BBOX_REG:
            # Calculate prediction boxes (x1,x2,y1,y2) using bboxes and offset
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(
                boxes, box_deltas,
                1)  # Change proposed rois to predicted bbox by using deltas
            pred_boxes = clip_boxes(pred_boxes, im_info.data,
                                    1)  # Clip bboxes to fit the image size
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        # Affine transform the prediction boxes pixel by the scale of the patch
        pred_boxes /= data[1][0][2].item(
        )  # data[1] : [850, 600, 1.6997] (H, W, scale)
        #########################################################################################
        # conf_threshold, nms_threshold, max_boxes, n_classes, coord_h, coord_w =
        # post_proc = PostProc(conf_threshold, nms_threshold, max_boxes, n_classes, coord_h, coord_w)

        scores = scores.squeeze()
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # Bottom-up
        c1 = self.RCNN_layer0(im_data)
        c2 = self.RCNN_layer1(c1)
        c3 = self.RCNN_layer2(c2)
        c4 = self.RCNN_layer3(c3)
        c5 = self.RCNN_layer4(c4)
        # Top-down
        p5 = self.RCNN_toplayer(c5)
        p4 = self._upsample_add(p5, self.RCNN_latlayer1(c4))
        p4 = self.RCNN_smooth1(p4)
        p3 = self._upsample_add(p4, self.RCNN_latlayer2(c3))
        p3 = self.RCNN_smooth2(p3)
        p2 = self._upsample_add(p3, self.RCNN_latlayer3(c2))
        p2 = self.RCNN_smooth3(p2)

        p6 = self.maxpool2d(p5)

        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            rpn_feature_maps, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        # pooling features based on rois, output 14x14 map   (128,64,7,7)
        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois,
                                              im_info)

        Use_emsemble = False
        emsemble_vgg, emsemble_detnet = [False, True]
        if Use_emsemble:
            if emsemble_vgg:
                model_vgg = Cnn()
                model_vgg = model_vgg.cuda()
                ## vgg net
                pretrained_model_vgg = '/home/lab30202/lq/ai_future/single_classsification_vgg/model_save/galxay_star_classification_vgg.pth'  # 预训练模型参数保存地址
                pretrained_dict = torch.load(pretrained_model_vgg)
                model_dict = model_vgg.state_dict()
                pretrained_dict = {
                    k: v
                    for k, v in pretrained_dict.items() if k in model_dict
                }
                model_dict.update(pretrained_dict)
                model_vgg.load_state_dict(model_dict)
                feature_map_vgg = model_vgg.convnet(im_data)
                if self.training:
                    idx_l = [x for x in range(0, 128, 1)]
                else:
                    idx_l = [x for x in range(0, 300, 1)]
                idx_l = torch.LongTensor(idx_l)
                feat = self.RCNN_roi_align(feature_map_vgg, rois[idx_l], 0.5)
                roi_pool_vgg = feat.view(feat.shape[0], -1)
                cls_score_vgg = model_vgg.fc(roi_pool_vgg)
                # cls_prob_vgg = F.softmax(cls_score_vgg,dim=1)
            if emsemble_detnet:
                ## detnet
                detnet = Detnet()
                detnet = detnet.cuda()
                # Bottom-up
                c1_det = detnet.RCNN_layer0_det(im_data)
                c2_det = detnet.RCNN_layer1_det(c1_det)
                c3_det = detnet.RCNN_layer2_det(c2_det)
                c4_det = detnet.RCNN_layer3_det(c3_det)
                c5_det = detnet.RCNN_layer4_det(c4_det)
                c6_det = detnet.RCNN_layer5_det(c5_det)

                # Top-down
                p6_det = detnet.RCNN_toplayer_det(c6_det)
                p5_det = detnet.RCNN_latlayer1_det(c5_det) + p6_det
                p4_det = detnet.RCNN_latlayer2_det(c4_det) + p5_det
                p3_det = detnet._upsample_add(
                    p4_det, detnet.RCNN_latlayer3_det(c3_det))
                p3_det = detnet.RCNN_smooth1_det(p3_det)
                p2_det = detnet._upsample_add(
                    p3_det, detnet.RCNN_latlayer4_det(c2_det))
                p2_det = detnet.RCNN_smooth2_det(p2_det)

                rpn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det, p6_det]
                mrcnn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det]
                rois_det, rpn_loss_cls_det, rpn_loss_bbox_det = self.RCNN_rpn(
                    rpn_feature_maps_det, im_info, gt_boxes, num_boxes)
                if self.training:
                    roi_data_det = self.RCNN_proposal_target(
                        rois_det, gt_boxes, num_boxes)
                    rois_det, rois_label_det, gt_assign_det, rois_target_det, rois_inside_ws_det, rois_outside_ws_det = roi_data_det
                    rois_det = rois_det.view(-1, 5)
                    rois_label_det = rois_label_det.view(-1).long()
                    gt_assign_det = gt_assign_det.view(-1).long()
                    pos_id_det = rois_label_det.nonzero().squeeze()
                    gt_assign_pos_det = gt_assign_det[pos_id_det]
                    rois_label_pos_det = rois_label_det[pos_id_det]
                    rois_label_pos_ids_det = pos_id_det

                    rois_pos_det = Variable(rois_det[pos_id_det])
                    rois_det = Variable(rois_det)
                    rois_label_det = Variable(rois_label_det)

                    rois_target_det = Variable(
                        rois_target_det.view(-1, rois_target_det.size(2)))
                    rois_inside_ws_det = Variable(
                        rois_inside_ws_det.view(-1,
                                                rois_inside_ws_det.size(2)))
                    rois_outside_ws_det = Variable(
                        rois_outside_ws_det.view(-1,
                                                 rois_outside_ws_det.size(2)))
                else:
                    rois_label_det = None
                    gt_assign_det = None
                    rois_target_det = None
                    rois_inside_ws_det = None
                    rois_outside_ws_det = None
                    rpn_loss_cls_det = 0
                    rpn_loss_bbox_det = 0
                    rois_det = rois_det.view(-1, 5)
                    pos_id_det = torch.arange(
                        0, rois_det.size(0)).long().type_as(rois_det).long()
                    rois_label_pos_ids_det = pos_id_det
                    rois_pos_det = Variable(rois_det[pos_id_det])
                    rois_det = Variable(rois_det)

                feat_det = self._PyramidRoI_Feat(mrcnn_feature_maps_det, rois,
                                                 im_info)
                if emsemble_detnet:
                    pooled_feat_det = detnet._head_to_tail(feat_det)
                    cls_score_det = self.RCNN_cls_score(pooled_feat_det)
                else:
                    roi_pool_det = feat_det.view(feat_det.shape[0], -1)
                    cls_score_det = model_vgg.fc(roi_pool_det)

        pooled_feat = self._head_to_tail(roi_pool_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.long().view(rois_label.size(0), 1,
                                       1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        # cls_prob = F.softmax(cls_score,dim=1)

        if Use_emsemble:
            if emsemble_detnet and emsemble_vgg:
                cls_score_liner = 0.5 * cls_score + 0.3 * cls_score_vgg + 0.2 * cls_score_det
                cls_score = model_vgg.fc_new(cls_score_liner)
                cls_prob = F.softmax(cls_score, dim=1)
            elif emsemble_vgg and not emsemble_detnet:
                cls_score_liner = cls_score + cls_score_vgg
                cls_score = model_vgg.fc_new(cls_score_liner)
                cls_prob = F.softmax(cls_score, dim=1)
            elif emsemble_detnet and not emsemble_vgg:
                cls_score_liner = cls_score + cls_score_det
                cls_score = detnet.fc_add(cls_score_liner)
                cls_prob = F.softmax(cls_score, dim=1)
        else:
            cls_score = self.RCNN_cls_score(pooled_feat)
            cls_prob = F.softmax(cls_score, dim=1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # loss (cross entropy) for object classification
            Use_focal_loss = True
            Use_label_smoothing = False
            Use_Giou_loss = False
            if not Use_focal_loss:
                if Use_label_smoothing:
                    # criteria = LabelSmoothSoftmaxCE(label_smoothing=0.1)
                    criteria = LabelSmoothSoftmaxCE(lb_pos=0.9, lb_neg=5e-3)
                    RCNN_loss_cls = criteria(cls_score, rois_label)
                else:
                    RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            else:
                FL = FocalLoss(class_num=self.n_classes, alpha=1, gamma=2)
                RCNN_loss_cls = FL(cls_score, rois_label)
                RCNN_loss_cls = RCNN_loss_cls.type(torch.FloatTensor).cuda()

            # loss (l1-norm) for bounding box regression
            if Use_Giou_loss:
                rois1 = rois.view(batch_size, -1, rois.size(1))
                boxes = rois1.data[:, :, 1:5]
                bbox_pred1 = bbox_pred.view(batch_size, -1, bbox_pred.size(1))
                box_deltas = bbox_pred1.data
                # if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                #     # Optionally normalize targets by a precomputed mean and stdev
                #     box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                #                  + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                #     box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
                pred_boxes /= im_info[0][2].cuda()
                # RCNN_loss_bbox = generalized_iou_loss(rois_target,bbox_pred)
                _, _, RCNN_loss_bbox = Giou_np(pred_boxes, boxes)
            else:
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1))
        bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#10
0
    def _get_single_obj_det_results(self, rois, cls_prob, bbox_pred, im_info):
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        results = []
        if cfg.TEST.COMMON.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS).type_as(box_deltas) \
                                     + torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS).type_as(box_deltas)
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS).type_as(box_deltas) \
                                     + torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS).type_as(box_deltas)
                    box_deltas = box_deltas.view(1, -1, 4 * self.n_classes)
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        thresh = 0
        for j in xrange(1, self.n_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, cfg.TEST.COMMON.NMS, force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                final_keep = torch.nonzero(cls_dets[:, -1] > cfg.TEST.COMMON.OBJ_DET_THRESHOLD).squeeze()
                result = cls_dets[final_keep]
                # unsqueeze result to 2 dims
                if result.numel()>0 and result.dim() == 1:
                    result = result.unsqueeze(0)
                # in testing, concat object labels
                if final_keep.numel() > 0:
                    if self.training:
                        result = result[:,:4]
                    else:
                        result = torch.cat([result[:,:4],
                                j * torch.ones(result.size(0),1).type_as(result)],1)
                if result.numel() > 0:
                    results.append(result)

        if len(results):
            final = torch.cat(results, 0)
        else:
            final = torch.Tensor([]).type_as(rois)

        return final
示例#11
0
def bld_train(args, ann_path=None, step=0):

    # print('Train from annotaion {}'.format(ann_path))
    # print('Called with args:')
    # print(args)

    if args.use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger(
            os.path.join('./.logs', args.active_method,
                         "/activestep" + str(step)))

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "voc_coco":
        args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val"
        args.imdbval_name = "voc_coco_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    else:
        raise NotImplementedError

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # print('Using config:')
    # pprint.pprint(cfg)
    # np.random.seed(cfg.RNG_SEED)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set = source set + target set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    # source train set, fully labeled
    #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json')
    #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'source'))
    imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'target'))

    print('{:d} roidb entries for source set'.format(len(roidb)))
    print('{:d} roidb entries for target set'.format(len(roidb_tg)))

    output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str(
        step)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch_tg = None  # do not sample target set

    bs_tg = 4
    dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \
                             imdb_tg.num_classes, training=True)

    assert imdb.num_classes == imdb_tg.num_classes

    dataloader_tg = torch.utils.data.DataLoader(dataset_tg,
                                                batch_size=bs_tg,
                                                sampler=sampler_batch_tg,
                                                num_workers=args.num_workers,
                                                worker_init_fn=_rand_fn())

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    image_label = torch.FloatTensor(1)
    confidence = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        image_label = image_label.cuda()
        confidence = confidence.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    image_label = Variable(image_label)
    confidence = Variable(confidence)

    if args.cuda:
        cfg.CUDA = True

    # initialize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    # initialize the expectation network.
    if args.net == 'vgg16':
        fasterRCNN_val = vgg16(imdb.classes,
                               pretrained=True,
                               class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN_val = resnet(imdb.classes,
                                101,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN_val = resnet(imdb.classes,
                                50,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN_val = resnet(imdb.classes,
                                152,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    fasterRCNN.create_architecture()
    fasterRCNN_val.create_architecture()

    # lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
    else:
        raise NotImplementedError

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # expectation model
    print("load checkpoint for expectation model: %s" % args.model_path)
    checkpoint = torch.load(args.model_path)
    fasterRCNN_val.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    fasterRCNN_val = fasterRCNN_val
    fasterRCNN_val.eval()

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)
        #fasterRCNN_val = nn.DataParallel(fasterRCNN_val)

    if args.cuda:
        fasterRCNN.cuda()
        fasterRCNN_val.cuda()

    # Evaluation
    # data_iter = iter(dataloader_tg)
    # for target_k in range( int(train_size_tg / args.batch_size)):
    fname = "noisy_annotations.pkl"
    if not os.path.isfile(fname):
        for batch_k, data in enumerate(dataloader_tg):
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])
            b_size = len(im_data)
            # expactation pass
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes)
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(b_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) )
                        box_deltas = box_deltas.view(b_size, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            # TODO: data distalliation
            # Choose the confident samples
            for b_idx in range(b_size):
                # fill one confidence
                # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0)
                # resize prediction
                pred_boxes[b_idx] /= data[1][b_idx][2]
                for j in xrange(1, imdb.num_classes):
                    if image_label.data[b_idx, j] != 1:
                        continue  # next if no image label

                    # filtering box outside of the image
                    not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \
                               (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3])
                    keep = torch.nonzero(not_keep == 0).view(-1)
                    # decease the number of pgts
                    thresh = 0.5
                    while torch.nonzero(
                            scores[b_idx, :,
                                   j][keep] > thresh).view(-1).numel() <= 0:
                        thresh = thresh * 0.5
                    inds = torch.nonzero(
                        scores[b_idx, :, j][keep] > thresh).view(-1)

                    # if there is no det, error
                    if inds.numel() <= 0:
                        print('Warning!!!!!!! It should not appear!!')
                        continue

                    # find missing ID
                    missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0]
                    if (len(missing_list) == 0): continue
                    missing_id = missing_list[0]
                    cls_scores = scores[b_idx, :, j][keep][inds]
                    cls_boxes = pred_boxes[b_idx][keep][inds][:, j *
                                                              4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    keep = nms(cls_dets, 0.2)  # Magic number ????
                    keep = keep.view(-1).tolist()
                    sys.stdout.write(
                        'from {} predictions choose-> min({},4) as pseudo label  \r'
                        .format(len(cls_scores), len(keep)))
                    sys.stdout.flush()
                    _, order = torch.sort(cls_scores[keep], 0, True)
                    if len(keep) == 0: continue

                    max_keep = 4
                    for pgt_k in range(max_keep):
                        if len(order) <= pgt_k: break
                        if missing_id + pgt_k >= 20: break
                        gt_boxes.data[b_idx, missing_id +
                                      pgt_k, :4] = cls_boxes[keep][order[
                                          len(order) - 1 - pgt_k]]
                        gt_boxes.data[b_idx, missing_id + pgt_k,
                                      4] = j  # class
                        #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]]
                        num_boxes[b_idx] = num_boxes[b_idx] + 1
                sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg +
                                                         b_idx]]
                pgt_boxes = np.array([
                    gt_boxes[b_idx, x, :4].cpu().data.numpy()
                    for x in range(int(num_boxes[b_idx]))
                ])
                pgt_classes = np.array([
                    gt_boxes[b_idx, x, 4].cpu().data[0]
                    for x in range(int(num_boxes[b_idx]))
                ])
                sample["boxes"] = pgt_boxes
                sample["gt_classes"] = pgt_classes
                # DEBUG
                assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \
                    "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy())

        #with open(fname, 'w') as f:
        # pickle.dump(roidb_tg, f)
    else:
        pass
        # with open(fname) as f:  # Python 3: open(..., 'rb')
        # roidb_tg = pickle.load(f)

    print("-- Optimization Stage --")
    # Optimization
    print("######################################################l")

    roidb.extend(roidb_tg)  # merge two datasets
    print('before filtering, there are %d images...' % (len(roidb)))
    i = 0
    while i < len(roidb):
        if True:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        else:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        i += 1

    print('after filtering, there are %d images...' % (len(roidb)))
    from roi_data_layer.roidb import rank_roidb_ratio
    ratio_list, ratio_index = rank_roidb_ratio(roidb)
    train_size = len(roidb)
    sampler_batch = sampler(train_size, args.batch_size)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             sampler=sampler_batch,
                                             num_workers=args.num_workers,
                                             worker_init_fn=_rand_fn())
    iters_per_epoch = int(train_size / args.batch_size)
    print("Training set size is {}".format(train_size))
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        fasterRCNN.train()

        loss_temp = 0
        start = time.time()
        epoch_start = start

        # adjust learning rate
        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        # one step
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])

            #gt_boxes.data = \
            #    torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2)
            conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda()
            confidence.data.resize_(conf_data.size()).copy_(conf_data)

            fasterRCNN.zero_grad()

            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.net == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= args.disp_interval

                if args.mGPUs:
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                    images = []
                    for k in range(args.batch_size):
                        image = draw_bounding_boxes(
                            im_data[k].data.cpu().numpy(),
                            gt_boxes[k].data.cpu().numpy(),
                            im_info[k].data.cpu().numpy(),
                            num_boxes[k].data.cpu().numpy())
                        images.append(image)
                    logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \
                                          images, step)
                loss_temp = 0
                start = time.time()
                if False:
                    break

        if args.mGPUs:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        epoch_end = time.time()
        print('Epoch time cost: {}'.format(epoch_end - epoch_start))

    print('finished!')
示例#12
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
            RCNN_loss_cls, RCNN_loss_bbox, rois_label \
            = self.FRCN(im_data, im_info, gt_boxes, num_boxes)

        # get global and local region from Faster R-CNN

        base_feat = self.FRCN.RCNN_base(im_data)

        #print(rois.data.cpu().numpy())
        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        box_deltas = self.FRCN._bbox_pred.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if self.class_agnostic:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if self.use_cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda(
                        ) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor(
                            cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(self.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()

        # get global region
        thresh = 0.00

        region_g = np.ndarray((0, 5))
        region_l = np.ndarray((0, 5))
        for j in range(1, 4):
            inds = torch.nonzero(scores[:, j] >= thresh).view(-1)
            inds_l = torch.nonzero(scores[:, j + 3] >= thresh).view(-1)
            #print(inds)
            if inds.numel() > 0 and inds_l.numel() > 0:
                cls_scores = scores[:, j][inds]
                cls_scores_l = scores[:, j + 3][inds_l]
                #print(cls_scores)
                #print(cls_scores_l)
                _, order = torch.sort(cls_scores, 0, True)
                _, order_l = torch.sort(cls_scores_l, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds]
                    cls_boxes_l = pred_boxes[inds_l]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_boxes_l = pred_boxes[inds_l][:,
                                                     (j + 3) * 4:(j + 4) * 4]
                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets_l = torch.cat(
                    (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1)

                cls_dets = cls_dets[order]
                cls_dets_l = cls_dets_l[order_l]

                region_g = np.vstack(
                    (region_g, cls_dets[np.argmax(cls_dets[..., -1])]))
                region_l = np.vstack(
                    (region_l, cls_dets_l[np.argmax(cls_dets_l[..., -1])]))

        #print(cls_dets)
        #print(pred_boxes)

        # if true, then show detection global and local region
        if True:
            print(region_g)
            print(region_l)
            im = im_data.cpu().numpy()[0]
            im = np.transpose(im, (1, 2, 0))[..., ::-1]
            im -= im.min()
            im /= im.max()
            plt.imshow(im.astype(np.float))
            ax = plt.axes()
            ax.add_patch(
                plt.Rectangle((region_g[0, 0], region_g[0, 1]),
                              region_g[0, 2] - region_g[0, 0],
                              region_g[0, 3] - region_g[0, 1],
                              fill=False,
                              edgecolor='red',
                              linewidth=1))

            ax.add_patch(
                plt.Rectangle((region_l[0, 0], region_l[0, 1]),
                              region_l[0, 2] - region_l[0, 0],
                              region_l[0, 3] - region_l[0, 1],
                              fill=False,
                              edgecolor='yellow',
                              linewidth=1))
            plt.show()

        rois_g = np.zeros((1, 1, 5), dtype=np.float32)
        rois_g[0, 0, 1:5] = region_g[0, :4] / 16.
        rois_l = np.zeros((1, 1, 5), dtype=np.float32)
        rois_l[0, 0, 1:5] = region_l[0, :4] / 16.

        GPU = 0
        rois_g = torch.tensor(rois_g, dtype=torch.float).to(GPU)
        rois_l = torch.tensor(rois_l, dtype=torch.float).to(GPU)

        # global region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois_g.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_g = self.RCNN_roi_align(base_feat, rois_g.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_g = self.RCNN_roi_pool(base_feat, rois_g.view(-1, 5))

        # local region
        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois_l.view(-1, 5),
                                       base_feat.size()[2:],
                                       self.FRCN.grid_size)
            grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]],
                                  3).contiguous()
            pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat,
                                                    Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_l = self.RCNN_roi_align(base_feat, rois_l.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_l = self.RCNN_roi_pool(base_feat, rois_l.view(-1, 5))

        #print(pooled_feat_g.cpu().detach().numpy().shape)
        x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1)
        #print(x.cpu().detach().numpy().shape)
        x = self.glcc_conv1(x)
        x = F.relu(x)
        x = x.view(-1, self.roipool * self.roipool * 512)
        x = self.glcc_fc1(x)
        s = F.relu(x)
        x = nn.Dropout2d()(x)
        x = self.glcc_fc2(x)
        x = F.relu(x)
        x = nn.Dropout2d()(x)
        x = self.glcc_fc_out(x)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, x
示例#13
0
def object_detection(im, result):
    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
    im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
    gt_boxes.data.resize_(1, 1, 5).zero_()
    num_boxes.data.resize_(1).zero_()

    # pdb.set_trace()
    det_tic = time.time()
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]
    result_box = []
    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()
    if vis:
        im2show = np.copy(result)

    for j in xrange(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:, j] > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            result_box.append([cls_dets.cpu(), j])
            if vis:
                im2show = vis_detections(im2show, pascal_classes[j],
                                         cls_dets.cpu().numpy(), 0.5)

    misc_toc = time.time()
    nms_time = misc_toc - misc_tic

    # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
    # .format(i + 1, num_images, detect_time, nms_time))
    # sys.stdout.flush()
    return result_box, im2show
示例#14
0
    def pose_est(self, im_in):
        # rgb -> bgr
        im = im_in[:, :, ::-1]  # # all items in the array, reversed

        blobs, im_scales = _get_image_blob(im)
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        with torch.no_grad():
            self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            self.gt_boxes.resize_(1, 1, 6).zero_()
            self.num_boxes.resize_(1).zero_()

        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label, ps_prob, RCNN_loss_ps, rois_pose = self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]
        scores_ps = ps_prob.data

        if cfg.TEST.BBOX_REG:  # Test using bounding-box regressors, True
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data  # (1, 300, 4)
            # True, set in lib/model/utils
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:  # our case
                    if self.cuda > 0:
                        # (300, 4)
                        # BBOX_NORMALIZE_STDS=(0.1, 0.1, 0.2, 0.2), BBOX_NORMALIZE_MEANS=(0.0, 0.0, 0.0, 0.0)
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                      + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1, 4)  # (1, 300, 4)

            # boxes: RoIs output from RPN, in image coordinates
            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            # Clip boxes to image boundaries
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]  # im_scales[0] = 1.25
        # (1,300,5) --> (300,5). 5: classes
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        scores_ps = scores_ps.squeeze()
        if self.vis:  # self.vis = True
            im2show = np.copy(im)

        daset_classes = self.grasp_classes
        pose_lists = []
        pose_highest_lists = []
        pose_highest = []

        # # start from ind 1, ignore the bg cls
        for j in range(1, len(daset_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                ps_scores = scores_ps[inds]
                ps_scores_max_values, ps_scores_inds = torch.max(ps_scores, 1)

                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1),
                                      ps_scores_max_values.unsqueeze(1),
                                      ps_scores_inds.unsqueeze(1).float()), 1)

                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                if self.vis:
                    # only show bboxes having class score > 0.5
                    im2show, pose_list = vis_detections(im2show,
                                                        daset_classes[j],
                                                        cls_dets.cpu().numpy(),
                                                        thresh=0.5)
                    # pose_list: list of all bboxes for each class
                    if len(pose_list):
                        if len(pose_list) > 1:
                            # sort all bboxes of 1 class according to angle score
                            pose_list.sort(key=itemgetter(5), reverse=True)

                        pose_lists.append(pose_list)
                        #print('pose_lists: {}'.format(pose_lists))
                        # only keep the bbox having the highest angle score of each class
                        pose_highest_lists.append(pose_list[0])
                        print('pose_highest_lists: {}'.format(
                            pose_highest_lists))

                        if len(pose_highest_lists) > 1:
                            # sort all highest bboxes of all classes, according to angle score
                            pose_highest_lists.sort(key=itemgetter(5),
                                                    reverse=True)

                        # get the highest angle core bbox accross all classes, bboxes
                        pose_highest = pose_highest_lists[0]

        print('pose_highest: {}'.format(pose_highest))

        im2show_copy = np.copy(im2show)
        im2show_copy = im2show_copy.astype(np.uint8)
        im2showRGB = cv2.cvtColor(im2show_copy, cv2.COLOR_BGR2RGB)

        return pose_highest, im2show_copy, im2showRGB
示例#15
0
def frcnn(train):

    args = parse_args()

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)
    from model.utils.config import cfg

    cfg.USE_GPU_NMS = args.cuda

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    pascal_classes = np.asarray([
        '___background__', u'person', u'bicycle', u'car', u'motorcycle',
        u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light',
        u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird',
        u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear',
        u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie',
        u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball',
        u'kite', u'baseball bat', u'baseball glove', u'skateboard',
        u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup',
        u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich',
        u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut',
        u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table',
        u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard',
        u'cell phone', u'microwave', u'oven', u'toaster', u'sink',
        u'refrigerator', u'book', u'clock', u'vase', u'scissors',
        u'teddy bear', u'hair drier', u'toothbrush'
    ])
    # initilize the network here.
    #args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
    # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    # pdb.set_trace()

    print("load checkpoint %s" % (load_name))

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()
    thresh = 0.5

    webcam_num = args.webcam_num
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)

    print('Loaded Photo: {} images.'.format(num_images))
    import json, re
    from tqdm import tqdm
    d = {}
    pbar = tqdm(imglist)
    if not train:
        for i in pbar:
            im_file = os.path.join(args.image_dir, i)
            # im = cv2.imread(im_file)
            im_name = i
            im_in = np.array(imread(im_file))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.data.resize_(1, 1, 5).zero_()
            num_boxes.data.resize_(1).zero_()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                #Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]
            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            lis = json.load(
                open(
                    '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json',
                    'r'))

            sm_lis = np.zeros(len(lis))
            for j in xrange(1, len(pascal_classes)):

                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:

                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    #cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets,
                               cfg.TEST.NMS,
                               force_cpu=not cfg.USE_GPU_NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    score = cls_dets[0][-1]
                    try:
                        sm_lis[lis.index(pascal_classes[j])] = score.numpy()
                    except:
                        pass
            d[re.sub("\D", "", im_name)] = sm_lis.tolist()
            json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2)
    else:
        pass
                torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            dim_orien = dim_orien * \
                torch.FloatTensor(cfg.TRAIN.DIM_NORMALIZE_STDS).cuda() + \
                torch.FloatTensor(cfg.TRAIN.DIM_NORMALIZE_MEANS).cuda()

            box_delta_left = box_delta_left.view(1, -1, 4 * len(imdb._classes))
            box_delta_right = box_delta_right.view(1, -1,
                                                   4 * len(imdb._classes))
            dim_orien = dim_orien.view(1, -1, 5 * len(imdb._classes))
            kpts_delta = kpts_delta.view(1, -1, 1)
            left_delta = left_delta.view(1, -1, 1)
            right_delta = right_delta.view(1, -1, 1)
            max_prob = max_prob.view(1, -1, 1)

            pred_boxes_left = \
                bbox_transform_inv(boxes_left, box_delta_left, 1)
            pred_boxes_right = \
                bbox_transform_inv(boxes_right, box_delta_right, 1)
            pred_kpts, kpts_type = \
                kpts_transform_inv(boxes_left, kpts_delta, cfg.KPTS_GRID)
            pred_left = \
                border_transform_inv(boxes_left, left_delta, cfg.KPTS_GRID)
            pred_right = \
                border_transform_inv(boxes_left, right_delta, cfg.KPTS_GRID)

            pred_boxes_left = clip_boxes(pred_boxes_left, im_info.data, 1)
            pred_boxes_right = clip_boxes(pred_boxes_right, im_info.data, 1)

            pred_boxes_left /= im_info[0, 2].data
            pred_boxes_right /= im_info[0, 2].data
            pred_kpts /= im_info[0, 2].data
示例#17
0
def detect(image, threshold=0.5, max_bbox=20):
    thresh = 0.05
    vis = True
    result = []

    # rgb -> bgr
    im = image[:, :, ::-1]

    blobs, im_scales = _get_image_blob(im)
    assert len(im_scales) == 1, "Only single-image batch implemented"
    im_blob = blobs
    im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                          dtype=np.float32)

    im_data_pt = torch.from_numpy(im_blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
    im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
    gt_boxes.data.resize_(1, 1, 5).zero_()
    num_boxes.data.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:

                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

                box_deltas = box_deltas.view(1, -1, 4)
            else:

                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                        + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    pred_boxes /= im_scales[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    for j in xrange(1, len(pascal_classes)):
        inds = torch.nonzero(scores[:, j] > thresh).view(-1)
        # if there is det
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
                cls_boxes = pred_boxes[inds, :]
            else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            if vis:
                dets = cls_dets.cpu().numpy()
                for i in range(np.minimum(max_bbox, dets.shape[0])):
                    bbox = tuple(int(np.round(x)) for x in dets[i, :4])
                    score = dets[i, -1]
                    if score > threshold:
                        ymin, xmin, ymax, xmax = bbox
                        result.append(score)
                        result.append(int(xmin))
                        result.append(int(ymin))
                        result.append(int(xmax))
                        result.append(int(ymax))
                        result.append(pascal_classes[j])

    return result
示例#18
0
    def Predict(self, im_in, area):
        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            im_data = im_data.cuda()
            im_info = im_info.cuda()
            num_boxes = num_boxes.cuda()
            gt_boxes = gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            im_data = Variable(im_data)
            im_info = Variable(im_info)
            num_boxes = Variable(num_boxes)
            gt_boxes = Variable(gt_boxes)

        if self.cuda > 0:
            cfg.CUDA = True

        if self.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        #im_in = cv2.imread(im_file)
        if len(im_in.shape) == 2:
            im_in = im_in[:, :, np.newaxis]
            im_in = np.concatenate((im_in, im_in, im_in), axis=2)
        # rgb -> bgr
        im_in = im_in[:, :, ::-1]
        im = cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB)

        blobs, im_scales = self._get_image_blob(im)
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.data.resize_(1, 1, 5).zero_()
        num_boxes.data.resize_(1).zero_()


        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.pascal_classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
            pred_boxes = _.cuda() if self.cuda > 0 else _

        pred_boxes /= im_scales[0]

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        ItemAndBoxes_all = []
        im2show = np.copy(im)
        for j in xrange(1, len(self.pascal_classes)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets,
                           cfg.TEST.NMS,
                           force_cpu=not cfg.USE_GPU_NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                im2show, ItemAndBoxes = vis_detections(im2show,
                                                       self.pascal_classes[j],
                                                       cls_dets.cpu().numpy(),
                                                       self.visThresh)
                ItemAndBoxes_all.append(ItemAndBoxes)

        ItemAndBoxes_all = sorted(ItemAndBoxes_all,
                                  key=lambda x: x[2],
                                  reverse=True)
        ItemAndBoxes_all = ItemAndBoxes_all[0:3]
        ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[1][0])

        if self.vis == 1:
            cv2.namedWindow("result", 0)
            cv2.resizeWindow("result", 1080, 720)
            cv2.imshow('result', im2show)
            cv2.waitKey(0)
            result_path = os.path.join(self.image_dir, str(area) + ".jpg")
            cv2.imwrite(result_path, im2show)

        return {
            "Left": ItemAndBoxes_all[0][0],
            "Mid": ItemAndBoxes_all[1][0],
            "Right": ItemAndBoxes_all[2][0]
        }
示例#19
0
def objdet_inference(cls_prob,
                     box_output,
                     im_info,
                     box_prior=None,
                     class_agnostic=True,
                     n_classes=None,
                     for_vis=False,
                     recover_imscale=True):
    """
    :param cls_prob: predicted class info
    :param box_output: predicted bounding boxes (for anchor-based detection, it indicates deltas of boxes).
    :param im_info: image scale information, for recovering the original bounding box scale before image resizing.
    :param box_prior: anchors, RoIs, e.g.
    :param class_agnostic: whether the boxes are class-agnostic. For faster RCNN, it is class-specific by default.
    :param n_classes: number of object classes
    :param for_vis: the results are for visualization or validation.
    :param recover_imscale: whether the predicted bounding boxes are recovered to the original scale.
    :return: a list of bounding boxes, one class corresponding to one element. If for_vis, they will be concatenated.
    """
    assert box_output.dim(
    ) == 2, "Multi-instance batch inference has not been implemented."

    if for_vis:
        thresh = cfg.TEST.COMMON.OBJ_DET_THRESHOLD
    else:
        thresh = 0.

    scores = cls_prob

    # TODO: Inference for anchor free algorithms has not been implemented.
    if box_prior is None:
        raise NotImplementedError(
            "Inference for anchor free algorithms has not been implemented.")

    if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        normalizer = {
            'mean': cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS,
            'std': cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS
        }
        box_output = box_unnorm_torch(box_output, normalizer, 4,
                                      class_agnostic, n_classes)
    else:
        raise RuntimeError(
            "BBOX_NORMALIZE_TARGETS_PRECOMPUTED is forced to be True in our version."
        )

    pred_boxes = bbox_transform_inv(box_prior, box_output, 1)
    pred_boxes = clip_boxes(pred_boxes, im_info, 1)

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    if recover_imscale:
        pred_boxes = box_recover_scale_torch(pred_boxes, im_info[3],
                                             im_info[2])

    all_box = [[]]
    for j in xrange(1, n_classes):
        if class_agnostic:
            cls_boxes = pred_boxes
        else:
            cls_boxes = pred_boxes[:, j * 4:(j + 1) * 4]
        cls_dets, cls_scores, _ = box_filter(cls_boxes,
                                             scores[:, j],
                                             thresh,
                                             use_nms=True)
        cls_dets = np.concatenate((cls_dets, np.expand_dims(cls_scores, -1)),
                                  axis=-1)
        if for_vis:
            cls_dets[:, -1] = j
        all_box.append(cls_dets)
    if for_vis:
        return np.concatenate(all_box[1:], axis=0)
    return all_box
示例#20
0
def test_net(model=None, image=None, params=None, bg=None, cls=None):
    blob, scale, label = params
    with torch.no_grad():  # pre-processing data for passing net
        im_data = Variable(torch.FloatTensor(1).cuda())
        im_info = Variable(torch.FloatTensor(1).cuda())
        num_boxes = Variable(torch.LongTensor(1).cuda())
        gt_boxes = Variable(torch.FloatTensor(1).cuda())

    im_info_np = np.array([[blob.shape[1], blob.shape[2], scale[0]]], dtype=np.float32)
    im_data_pt = torch.from_numpy(blob)
    im_data_pt = im_data_pt.permute(0, 3, 1, 2)
    im_info_pt = torch.from_numpy(im_info_np)

    with torch.no_grad():  # resize
        im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
        im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
        gt_boxes.resize_(1, 1, 5).zero_()
        num_boxes.resize_(1).zero_()

    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, RCNN_loss_bbox, \
    rois_label = model(im_data, im_info, gt_boxes, num_boxes)  # predict

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if opt.TEST_BBOX_REG:
        box_deltas = bbox_pred.data
        if opt.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            if opt.cuda:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS).cuda() \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS).cuda()
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS) \
                             + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS)

            box_deltas = box_deltas.view(1, -1, 4 * len(label))

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= scale[0]

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()

    image = np.copy(image[:, :, ::-1])
    demo = image.copy()
    bubbles = []
    dets_bubbles = []

    for j in range(1, len(label)):
        inds = torch.nonzero(scores[:, j] > opt.THRESH).view(-1)
        if inds.numel() > 0:
            cls_scores = scores[:, j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_boxes[order, :], cls_scores[order], opt.TEST_NMS)
            cls_dets = cls_dets[keep.view(-1).long()].cpu().numpy()

            #  post-processing : get contours of speech bubble
            demo, image, bubbles, dets_bubbles = bubble_utils.get_cnt_bubble(image, image.copy(), label[j], cls_dets,
                                                                             cls, bg=bg)
    return demo, image, bubbles, dets_bubbles
示例#21
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]  # batch_size x num_rois x 4
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(
            generate_anchors_all_pyramids(
                self._fpn_scales, self._anchor_ratios, feat_shapes,
                self._fpn_feature_strides,
                self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors,
                               4).expand(batch_size, num_anchors, 4)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()

        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            keep_idx_i = nms(proposals_single, scores_single.squeeze(1),
                             nms_thresh)
            # keep_idx_i = nms(proposals_single, scores_single, nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
示例#22
0
    def detect(self, cv_img, is_rgb=True):
        # - image shape is (height,width,no_channels)
        # print('- input image shape: {}'.format(cv_img.shape))
        # - result is a list of [x1,y1,x2,y2,class_id]
        results = []

        im_in = np.array(cv_img)

        if is_rgb:
            im = im_in[:, :, ::-1]  # rgb -> bgr
        else:
            im = im_in

        blobs, im_scales = self._get_image_blob(
            im)  # prep_type = 'caffe' is applied
        assert len(im_scales) == 1, "Only single-image batch implemented"
        im_blob = blobs
        im_info_np = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        im_data_pt = torch.from_numpy(im_blob)
        im_data_pt = im_data_pt.permute(0, 3, 1, 2)
        im_info_pt = torch.from_numpy(im_info_np)

        with torch.no_grad():
            self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
            self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
            self.gt_boxes.resize_(1, 1, 5).zero_()
            self.num_boxes.resize_(1).zero_()

        # pdb.set_trace()
        det_tic = time.time()

        rois0, cls_prob0, bbox_pred0, _, _, _, _, _, share_pred0, _, progress_pred0, _ = \
            self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes, flow_id=0)
        rois1, cls_prob1, bbox_pred1, _, _, _, _, _, share_pred1, _, _, _ = \
            self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes, flow_id=1)

        # rois0 # [1, 300, 5]
        rois = torch.cat((rois0, rois1), dim=1)

        # share_pred0 # [1, 300, 1]
        aaaa = torch.ones(
            (share_pred0.shape[0], rois0.shape[1], 1)).cuda() * share_pred1
        share_pred = torch.cat((share_pred0, aaaa), dim=1)

        progress_pred = progress_pred0

        # cls_prob0 # [1, 300, 3]
        # bbox_pred0 # [1, 300, 12 (3x4)

        cls_prob = torch.zeros(
            (cls_prob0.shape[0], cls_prob0.shape[1] + cls_prob1.shape[1],
             len(self.classes_total))).cuda()
        bbox_pred = torch.zeros(
            (bbox_pred0.shape[0], bbox_pred0.shape[1] + bbox_pred1.shape[1],
             4 * len(self.classes_total))).cuda()

        for j, j_name in enumerate(self.classes_total):
            if j_name in self.classes0:
                j_idx = (j_name == self.classes0).nonzero()[0][0]
                num_batch0 = cls_prob0.shape[1]
                cls_prob[:, :num_batch0, j] = cls_prob0[:, :, j_idx]
                bbox_pred[:, :num_batch0, j * 4:(j + 1) *
                          4] = bbox_pred0[:, :, j_idx * 4:(j_idx + 1) * 4]

            if j_name in self.classes1:
                j_idx = (j_name == self.classes1).nonzero()[0][0]
                num_batch1 = cls_prob1.shape[1]
                cls_prob[:, num_batch0:num_batch0 + num_batch1,
                         j] = cls_prob1[:, :, j_idx]
                bbox_pred[:, num_batch0:num_batch0 + num_batch1,
                          j * 4:(j + 1) * 4] = bbox_pred1[:, :, j_idx *
                                                          4:(j_idx + 1) * 4]

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if self.class_agnostic:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    if self.cuda > 0:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(self.classes_total))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= im_scales[0]

        if self.use_share_regress:
            share_pred = share_pred.squeeze()
        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if self.vis or self.save_result:
            h, w = im.shape[:2]
            if max(h, w) > 800:
                if h > 800:
                    im2show = cv2.resize(im, (int(800 / h * w), 800))
                if w > 800:
                    im2show = cv2.resize(im, (800, int(800 / w * h)))

                h_display = im2show.shape[0]
                im_scale = h_display / h
            else:
                im2show = np.copy(im)
                im_scale = 1.0

        im_pil = torchvision.transforms.ToPILImage(mode=None)(im[:, :, ::-1])
        im_width, im_height = im_pil.size

        # for j in range(1, len(self.classes_total)):
        #     inds = torch.nonzero(scores[:, j] > self.thresh).view(-1)    # find index with scores > threshold in j-class
        #     # if there is det
        #     if inds.numel() > 0:
        #         cls_scores = scores[:, j][inds]
        #         if self.use_share_regress:
        #             share_pred_inds = share_pred[inds]
        #
        #         _, order = torch.sort(cls_scores, 0, True)
        #         if self.class_agnostic:
        #             cls_boxes = pred_boxes[inds, :]
        #         else:
        #             cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
        #
        #         if self.use_share_regress:
        #             cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1), share_pred_inds.unsqueeze(1)), 1)
        #         else:
        #             cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        #         # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        #         cls_dets = cls_dets[order]
        #         keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
        #         cls_dets = cls_dets[keep.view(-1).long()]
        #
        #         # im: original image, (768, 1024, 3)
        #         # im_data: blob image, (1, 3, 600, 800)
        #         # cls_dets: x1, y1, x2, y2, score
        #
        #         # crop and feed to classifier
        #         # im_pil.save(osp.join(pathOutputSaveImages, 'debug_input.png'))
        #         if self.classes_total[j] == 'food':
        #             for k in range(cls_dets.shape[0]):
        #                 crop_margin_ratio = 0.1
        #
        #                 x1 = int(cls_dets[k, 0])
        #                 y1 = int(cls_dets[k, 1])
        #                 x2 = int(cls_dets[k, 2])
        #                 y2 = int(cls_dets[k, 3])
        #
        #                 crop_h_margin = (y2 - y1) * crop_margin_ratio/2.
        #                 crop_w_margin = (x2 - x1) * crop_margin_ratio/2.
        #
        #                 x1 = x1 - crop_w_margin
        #                 y1 = y1 - crop_h_margin
        #                 x2 = x2 + crop_w_margin
        #                 y2 = y2 + crop_h_margin
        #
        #                 if x1 < 0: x1 = 0
        #                 if y1 < 0: y1 = 0
        #                 if x2 > im_width-1: x2 = im_width-1
        #                 if y2 > im_height-1: y2 = im_height-1
        #
        #                 im_crop = im_pil.crop((x1, y1, x2, y2))
        #                 # im_crop.save(osp.join(pathOutputSaveImages, 'debug_crop.png'))
        #
        #                 im_crop = self.food_classifier.test_transform(im_crop)
        #                 im_crop = torch.unsqueeze(im_crop, dim=0)
        #
        #                 if self.food_classifier.eval_crop_type == 'TenCrop':
        #                     bs, ncrops, c, h, w = im_crop.size()
        #                     im_crop = im_crop.view(-1, c, h, w)
        #
        #                 food_output = self.food_classifier.classify(im_crop)
        #
        #                 if self.food_classifier.eval_crop_type == 'TenCrop':
        #                     food_output = food_output.view(bs, ncrops, -1).mean(1)  # avg over crops
        #
        #                 topk_score, topk_index = torch.topk(food_output, 5, dim=1)
        #
        #                 food_class = [self.food_classifier.idx_to_class[topk_index[0][l].item()] for l in range(5)]
        #                 food_score = torch.nn.functional.softmax(topk_score[0], dim=0)
        #
        #                 if self.vis or self.save_result:
        #                     bbox_draw = cls_dets.detach().cpu().numpy()[k:k + 1, :]
        #                     bbox_draw[:, :4] = bbox_draw[:, :4] * im_scale
        #
        #                     # - result is a list of [x1,y1,x2,y2,class_id]
        #                     results.append([int(bbox_draw[0][0]), int(bbox_draw[0][1]), int(bbox_draw[0][2]), int(bbox_draw[0][3]),
        #                                     self.classes_total[j],
        #                                     topk_index[0][0].item(),
        #                                     food_class[0],
        #                                     bbox_draw[0][5].item()])
        #
        #                     # class_name_w_food = '%s (%s: %.2f)'%(pascal_classes[j], food_class[0], food_score[0].item())
        #                     class_name_w_food = '%s (%s)'%(self.classes_total[j], food_class[0])
        #                     im2show = vis_detections_korean_ext2_wShare(im2show, class_name_w_food, bbox_draw,
        #                                                          box_color=self.list_box_color[j], text_color=(255, 255, 255),
        #                                                          text_bg_color=self.list_box_color[j], fontsize=20, thresh=self.vis_th,
        #                                                          draw_score=False, draw_text_out_of_box=True)
        #         else:
        #             if self.vis or self.save_result:
        #                 bbox_draw = cls_dets.detach().cpu().numpy()
        #                 bbox_draw[:, :4] = bbox_draw[:, :4] * im_scale
        #
        #                 results.append([int(bbox_draw[0][0]), int(bbox_draw[0][1]), int(bbox_draw[0][2]), int(bbox_draw[0][3]),
        #                                 self.classes_total[j],
        #                                 0,
        #                                 0,
        #                                 0])
        #
        #                 im2show = vis_detections_korean_ext2(im2show, self.classes_total[j], bbox_draw,
        #                                                      box_color=self.list_box_color[j], text_color=(255, 255, 255),
        #                                                      text_bg_color=self.list_box_color[j], fontsize=20, thresh=self.vis_th,
        #                                                      draw_score=False, draw_text_out_of_box=True)

        for j in range(1, len(self.classes_total)):
            inds = torch.nonzero(scores[:, j] > self.thresh).view(
                -1)  # find index with scores > threshold in j-class
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                if self.use_share_regress:
                    share_pred_inds = share_pred[inds]

                _, order = torch.sort(cls_scores, 0, True)
                if self.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                if self.use_share_regress:
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1),
                                          share_pred_inds.unsqueeze(1)), 1)
                else:
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]

                # im: original image, (768, 1024, 3)
                # im_data: blob image, (1, 3, 600, 800)
                # cls_dets: x1, y1, x2, y2, score

                # crop and feed to classifier
                # im_pil.save(osp.join(pathOutputSaveImages, 'debug_input.png'))
                if self.classes_total[j] == 'food':
                    for k in range(cls_dets.shape[0]):
                        crop_margin_ratio = 0.1

                        x1 = int(cls_dets[k, 0])
                        y1 = int(cls_dets[k, 1])
                        x2 = int(cls_dets[k, 2])
                        y2 = int(cls_dets[k, 3])

                        crop_h_margin = (y2 - y1) * crop_margin_ratio / 2.
                        crop_w_margin = (x2 - x1) * crop_margin_ratio / 2.

                        x1 = x1 - crop_w_margin
                        y1 = y1 - crop_h_margin
                        x2 = x2 + crop_w_margin
                        y2 = y2 + crop_h_margin

                        if x1 < 0: x1 = 0
                        if y1 < 0: y1 = 0
                        if x2 > im_width - 1: x2 = im_width - 1
                        if y2 > im_height - 1: y2 = im_height - 1

                        im_crop = im_pil.crop((x1, y1, x2, y2))
                        # im_crop.save(osp.join(pathOutputSaveImages, 'debug_crop.png'))

                        im_crop = self.food_classifier.test_transform(im_crop)
                        im_crop = torch.unsqueeze(im_crop, dim=0)

                        if self.food_classifier.eval_crop_type == 'TenCrop':
                            bs, ncrops, c, h, w = im_crop.size()
                            im_crop = im_crop.view(-1, c, h, w)

                        food_output = self.food_classifier.classify(im_crop)

                        if self.food_classifier.eval_crop_type == 'TenCrop':
                            food_output = food_output.view(
                                bs, ncrops, -1).mean(1)  # avg over crops

                        topk_score, topk_index = torch.topk(food_output,
                                                            5,
                                                            dim=1)

                        food_class = [
                            self.food_classifier.idx_to_class[
                                topk_index[0][l].item()] for l in range(5)
                        ]
                        food_score = torch.nn.functional.softmax(topk_score[0],
                                                                 dim=0)

                        bbox_draw = cls_dets.detach().cpu().numpy()[k:k + 1, :]
                        bbox_draw[:, :4] = bbox_draw[:, :4] * im_scale

                        box_y = (bbox_draw[0, 1] + bbox_draw[0, 3]) / 2.
                        if bbox_draw[
                                0, 4] >= self.vis_th and box_y > im2show.shape[
                                    0] / 2:
                            # - result is a list of [x1,y1,x2,y2,class_id]
                            results.append([
                                int(bbox_draw[0][0]),
                                int(bbox_draw[0][1]),
                                int(bbox_draw[0][2]),
                                int(bbox_draw[0][3]),
                                self.classes_total[j],
                                topk_index[0][0].item(),  # food_class index
                                food_class[0],
                                bbox_draw[0][5].item()
                            ])
                else:
                    bbox_draw = cls_dets.detach().cpu().numpy()
                    bbox_draw[:, :4] = bbox_draw[:, :4] * im_scale

                    for k in range(cls_dets.shape[0]):
                        box_y = (bbox_draw[k, 1] + bbox_draw[k, 3]) / 2.
                        if bbox_draw[
                                k, 4] >= self.vis_th and box_y > im2show.shape[
                                    0] / 2:
                            results.append([
                                int(bbox_draw[k][0]),
                                int(bbox_draw[k][1]),
                                int(bbox_draw[k][2]),
                                int(bbox_draw[k][3]), self.classes_total[j], 0,
                                0, 0
                            ])

        # dish-food converter
        # every dish find the food and its amount
        # if food is not found, zero amount is assigned.
        print('0.results:', results)
        new_results = []
        for item in results:
            x1, y1, x2, y2, class_name, food_index, food_name, food_amount = item

            if class_name == 'dish':
                new_results.append(item)
        print('1.new_results:', new_results)

        for item in results:
            x1, y1, x2, y2, class_name, food_index, food_name, food_amount = item

            if class_name == 'food':
                is_find_dish = False
                for dish_i, dish_item in enumerate(new_results):
                    d_x1, d_y1, d_x2, d_y2, _, _, _, dish_amount = dish_item

                    # check overlap
                    overlap_ratio = self.get_overlap_ratio_meal(
                        food_bbox=[x1, y1, x2, y2],
                        dish_bbox=[d_x1, d_y1, d_x2, d_y2])
                    if overlap_ratio > 0.9:
                        new_results[dish_i][5] = food_index
                        new_results[dish_i][6] = food_name
                        new_results[dish_i][7] += food_amount

                        is_find_dish = True

                if not is_find_dish:
                    new_results.append(item)
        print('2.new_results:', new_results)

        rep_drink = 0, 0, 0, 0, 'drink', -1, -1, 200
        rep_food = 0, 0, 0, 0, 'food', -1, -1, 200

        for dish_i, dish_item in enumerate(new_results):
            # x1, y1, x2, y2, class_name, food_index, food_name, food_amount = item
            # new_results[dish_i][4] = 'food'
            # new_results[dish_i][6] = 'food'
            if new_results[dish_i][5] == 94 or new_results[dish_i][5] == 64:
                new_results[dish_i][4] = 'drink'
            else:
                new_results[dish_i][4] = 'food'

            new_amount = new_results[dish_i][7]
            if new_amount > 1.0: new_amount = 1.0
            if new_amount < 0.0: new_amount = 0.0
            new_results[dish_i][7] = int(round(new_amount * 100))

            if new_results[dish_i][
                    4] == 'drink' and new_results[dish_i][7] < rep_drink[7]:
                rep_drink = new_results[dish_i]

            if new_results[dish_i][
                    4] == 'food' and new_results[dish_i][7] < rep_food[7]:
                rep_food = new_results[dish_i]

        old_results = copy.copy(results)
        results = []
        if rep_drink[7] <= 100: results.append(rep_drink)
        if rep_food[7] <= 100: results.append(rep_food)
        # dish-food converter - end
        print('3.results: ', results)

        # drink one, food one

        # if self.save_result:
        #     for item in old_results:
        #         # item = [x1, y1, x2, y2, category, (food_index), (food_name), (amount)]
        #         if item[4] == 'food':
        #             str_name = '%s (%s, %s, %.2f)' % (item[4], item[5], item[6], item[7])
        #         else:
        #             str_name = '%s' % (item[0])
        #
        #         bbox_draw = np.array([[item[0], item[1], item[2], item[3], 1.0]])
        #
        #         color_index = 0
        #         im2show = vis_detections_korean_ext2(im2show, str_name, bbox_draw,
        #                                                     box_color=self.list_box_color[color_index], text_color=(255, 255, 255),
        #                                                     text_bg_color=self.list_box_color[color_index], fontsize=20,
        #                                                     thresh=self.vis_th,
        #                                                     draw_score=False, draw_text_out_of_box=False)

        if self.save_result:
            for item in results:
                # item = [x1, y1, x2, y2, category, (food_name), (amount)]
                if item[4] == 'food' or item[4] == 'drink':
                    str_name = '%s (%.2f)' % (item[4], item[7])
                else:
                    str_name = '%s' % (item[4])

                bbox_draw = np.array(
                    [[item[0], item[1], item[2], item[3], 1.0]])

                color_index = 1
                im2show = vis_detections_korean_ext2(
                    im2show,
                    str_name,
                    bbox_draw,
                    box_color=self.list_box_color[color_index],
                    text_color=(255, 255, 255),
                    text_bg_color=self.list_box_color[color_index],
                    fontsize=20,
                    thresh=self.vis_th,
                    draw_score=False,
                    draw_text_out_of_box=True)

        if self.vis:
            cv2.imwrite('debug.png', im2show)
            #print('debug.png is saved.')

        return results, im2show
示例#23
0
      if cfg.TEST.BBOX_REG:
          # Apply bounding-box regression deltas
          box_deltas = bbox_pred.data
          if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
          # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4)
            else:
                box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                           + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

          pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
          pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
      else:
          # Simply repeat the boxes, once for each class
          pred_boxes = np.tile(boxes, (1, scores.shape[1]))

      pred_boxes /= data[1][0][2]

      scores = scores.squeeze()
      pred_boxes = pred_boxes.squeeze()
      det_toc = time.time()
      detect_time = det_toc - det_tic
      misc_tic = time.time()
      if vis:
          im = cv2.imread(imdb.image_path_at(i))
          im2show = np.copy(im)
示例#24
0
        # predicted boxes
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                  + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(batch_size, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                  + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(batch_size, -1,
                                                 4 * len(imagenet_vid_classes))
            pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
        else:
            # Simply repeat the boxes, once for each class
            raise NotImplementedError

        # Assume scales are same for frames in the same video
        im_scale = im_info.data[0][-1]
        pred_boxes /= im_scale
        pred_boxes = pred_boxes.squeeze()
        scores = scores.squeeze()

        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        for j in xrange(1, imdb.num_classes):