def gen_depth(img):
    # returns dataframe with image bounding box
    df = pd.DataFrame(columns=[
        'filename', 'class', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'
    ])
    img = t.from_numpy(img)[None]

    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load(
        '/home/olixu/distance-cnn/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth.701052458187'
    )
    opt.caffe_pretrain = False  # this model was trained from torchvision-pretrained model
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                            visualize=True)
    box_new = np.asarray(_bboxes)
    label_new = at.tonumpy(_labels[0]).reshape(-1)
    score_new = at.tonumpy(_scores[0]).reshape(-1)

    for i in range(box_new.shape[1]):
        df.at[i, 'filename'] = 'file' + str(i)
        df.at[i, 'class'] = label_new[i]
        df.at[i, 'confidence'] = score_new[i]
        # bbox coordinates
        df.at[i, 'xmin'] = box_new[0, i, 1]
        df.at[i, 'ymin'] = box_new[0, i, 0]
        df.at[i, 'xmax'] = box_new[0, i, 3]
        df.at[i, 'ymax'] = box_new[0, i, 2]
    return inf.infer(df)
def clip_bboxs_on_image(rois, roi_locs):
    """

    :param rois: Tensor
    :param roi_locs: Tensor
    :return: bbox: Tensor
    """
    loc_normalize_mean = (0., 0., 0., 0.)
    loc_normalize_std = (0.1, 0.1, 0.2, 0.2)
    mean = torch.Tensor(loc_normalize_mean).cuda(). \
        repeat(2)[None]
    std = torch.Tensor(loc_normalize_std).cuda(). \
        repeat(2)[None]

    roi_locs = roi_locs * std + mean
    roi_loc = roi_locs.view(-1, 2, 4)
    rois = at.totensor(rois)
    rois = rois.view(-1, 1, 4).expand_as(roi_loc)
    bbox = loc2bbox(at.tonumpy(rois).reshape((-1, 4)), at.tonumpy(roi_loc).reshape((-1, 4)))
    bbox = at.totensor(bbox)
    box = bbox.view(-1, 8)
    box[:, 0::2] = (box[:, 0::2]).clamp(min=0, max=800)
    box[:, 1::2] = (box[:, 1::2]).clamp(min=0, max=800)
    box = box.reshape((-1, 2, 4))[:, 1, :]

    return box
def draw(dataloader, faster_rcnn, test_num=100):
    for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_,
             id_) in enumerate(dataloader):
        sizes = [sizes[0][0].item(), sizes[1][0].item()]
        pred_bboxes_, pred_labels_, pred_scores_, _feature = faster_rcnn.predict(
            imgs, [sizes])
        img_file = opt.voc_data_dir + '/JPEGImages/' + str(id_[0]) + '.jpg'
        image = cv2.imread(img_file)
        # 转成 numpy格式
        bboxs = at.tonumpy(pred_bboxes_[0])
        name = at.tonumpy(pred_labels_[0]).reshape(-1)
        score = at.tonumpy(pred_scores_[0]).reshape(-1)

        # 保存测试集每一轮预测的结果 最好加个epoch判断 每10轮保存一次 不然太浪费时间
        for i in range(len(name)):
            xmin = int(round(float(bboxs[i, 1])))
            ymin = int(round(float(bboxs[i, 0])))
            xmax = int(round(float(bboxs[i, 3])))
            ymax = int(round(float(bboxs[i, 2])))
            if score[i] <= opt.threshold:
                continue
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1)
            cv2.putText(image, opt.VOC_BBOX_LABEL_NAMES[name[i]],
                        (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX,
                        1e-3 * image.shape[0], (0, 0, 255), 1)
            cv2.putText(image,
                        str(score[i])[0:3], (xmin + 30, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0],
                        (0, 0, 255), 1)
        cv2.imwrite('result/' + str(id_[0]) + '.jpg', image)
Exemplo n.º 4
0
def train(model, train_loader, criterion, epoch, vis):
    model.train()
    batch_loss = 0
    for batch_idx, sample_batched in enumerate(train_loader):
        data = sample_batched['image']
        target = sample_batched['mask']
        data, target = Variable(data.type(opt.dtype)), Variable(
            target.type(opt.dtype))
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        batch_loss += loss.data[0]
        if (batch_idx + 1) % opt.plot_every == 0:
            ori_img_ = inverse_normalize(at.tonumpy(data[0]))
            target_ = at.tonumpy(target[0])
            pred_ = at.tonumpy(output[0])
            vis.img('gt_img', ori_img_)
            vis.img('gt_mask', target_)
            vis.img('pred_mask', (pred_ >= 0.5).astype(np.float32))

    batch_loss /= (batch_idx + 1)
    print('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss))
    with open('logs.txt', 'a') as file:
        file.write('epoch: ' + str(epoch) + ', train loss: ' +
                   str(batch_loss) + '\n')
    vis.plot('train loss', batch_loss)
def eval(dataloader, faster_rcnn, vis, test_num=10000):
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    for ii, (imgs, sizes, gt_bboxes_, gt_labels_,
             gt_difficults_) in tqdm(enumerate(dataloader)):
        # plot groud truth bboxes
        sizes = [sizes[0][0].item(), sizes[1][0].item()]
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
            imgs, [sizes])
        img = imgs.cuda().float()
        ori_img_ = inverse_normalize(at.tonumpy(img[0]))
        pred_img = visdom_bbox(ori_img_, at.tonumpy(pred_bboxes_[0]),
                               at.tonumpy(pred_labels_[0]).reshape(-1),
                               at.tonumpy(pred_scores_[0]))
        vis.img('test_pred_img', pred_img)
        gt_bboxes += list(gt_bboxes_.numpy())
        gt_labels += list(gt_labels_.numpy())
        gt_difficults += list(gt_difficults_.numpy())
        pred_bboxes += pred_bboxes_
        pred_labels += pred_labels_
        pred_scores += pred_scores_
        if ii == test_num:
            break

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                use_07_metric=True)
    return result
Exemplo n.º 6
0
 def predict(self, imgs=[]):
     imglist = []
     for i in imgs:
         img_path = os.path.join(self.imgs_path, i)
         img = read_image(img_path)
         if self.isneed_enhance:
             img = Image_Enhance().api(img)
         img = t.from_numpy(img)[None]
         imglist.append(img)
     for index, img in enumerate(imglist):
         starttime = datetime.datetime.now()
         _bboxes, _labels, _scores = self.trainer.faster_rcnn.predict(
             img, visualize=True)
         endtime = datetime.datetime.now()
         print('predict time consum=%s' % round(
             (endtime-starttime).microseconds/1000000+(endtime-starttime).seconds, 6))
         if self.imgs_vis_path:
             img_path = os.path.join(self.imgs_vis_path, imgs[index])
             img = read_image(img_path)
             img = t.from_numpy(img)[None]
         ax = vis_bbox(at.tonumpy(img[0]),
                       at.tonumpy(_bboxes[0]),
                       at.tonumpy(_labels[0]).reshape(-1),
                       at.tonumpy(_scores[0]).reshape(-1))
         fig = ax.get_figure()
         fig.savefig("output.png")
Exemplo n.º 7
0
    def rpn_loss(self, rpn_loc, rpn_score, bbox, anchor, im_size):
        ## Get ground truth locs and labels
        #gt_rpn_loc, gt_rpn_lbl = self.anchor_target_creator(at.tonumpy(bbox), \
        #                                                          anchor, im_size)
        #gt_rpn_lbl = at.totensor(gt_rpn_loc).long()
        #gt_rpn_loc = at.totensor(gt_prn_loc)
        #
        ## calculate localization loss for rpn (sigma = 3)
        #rpn_loss_loc = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_lbl.data, 3)
        #
        ## calculate classification loss for rpn
        #rpn_loss_cls = F.cross-entropy(rpn_score, gt_prn_lbl.device(), ignore_index=-1)

        ## Get ground truth locs and labels
        gt_rpn_loc, gt_rpn_lbl = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, im_size)
        gt_rpn_lbl = at.totensor(gt_rpn_lbl).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)

        ## calculate localization loss for rpn (sigma = 3)
        rpn_loc_loss = self.loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_lbl.data, 3)

        # calculate classification loss for rpn
        rpn_cls_loss = F.cross_entropy(rpn_score.to(self.device),
                                       gt_rpn_lbl.to(self.device),
                                       ignore_index=-1)
        _gt_rpn_lbl = gt_rpn_lbl[gt_rpn_lbl > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_lbl) > -1]
        #self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_lbl.data.long())

        return rpn_cls_loss, rpn_loc_loss
Exemplo n.º 8
0
    def forward(self, features, img_size, scale, gt_bbox, gt_label):
        n = 1
        h = F.relu(self.rpn_conv(features))

        loc = self.rpn_loc(h)
        score = self.rpn_score(h)

        h, w = loc.shape[2:]

        loc = loc.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
        score = score.permute(0, 2, 3, 1).contiguous()

        softmax_score = F.softmax(score.view(n, h, w, self.n_anchor, 2), dim=4)
        fg_score = softmax_score[:, :, :, :, 1].contiguous().view(n, -1)

        score = score.view(n, -1, 2)

        feat_shape = (h, w)
        feat_stride = img_size[0] / h
        anchor = generate_anchors(self.scales, self.ratios, feat_shape,
                                  feat_stride)

        loc = loc[0]
        score = score[0]
        fg_score = fg_score[0]

        roi = self.proposal_layer(loc.cpu().data.numpy(),
                                  fg_score.cpu().data.numpy(), anchor,
                                  img_size, scale)

        if self.training:
            # if training phase, then sample RoIs
            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_layer(
                roi, at.tonumpy(gt_bbox), at.tonumpy(gt_label),
                self.loc_normalize_mean, self.loc_normalize_std)

            # get gt_loc(offset from anchor to gt_bbox)
            gt_rpn_loc, gt_rpn_label = self.anchor_target_layer(
                at.tonumpy(gt_bbox), anchor, img_size)
            gt_rpn_loc = at.totensor(gt_rpn_loc)
            gt_rpn_label = at.totensor(gt_rpn_label).long()

            # bounding-box regression loss
            rpn_loc_loss = bbox_regression_loss(loc, gt_rpn_loc,
                                                gt_rpn_label.data,
                                                self.rpn_sigma)

            # foreground-background classification loss
            rpn_cls_loss = F.cross_entropy(score,
                                           gt_rpn_label.cuda(),
                                           ignore_index=-1)

            return sample_roi, gt_roi_loc, gt_roi_label, rpn_loc_loss, rpn_cls_loss

        return roi
Exemplo n.º 9
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # ori_img_ = (at.tonumpy(img[0]))
                losses = trainer.get_meter_data()
                print(losses)
                write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png')
                _bboxes = trainer.faster_rcnn.predict([ori_img_],
                                                      visualize=True)
                _bboxes = at.tonumpy(_bboxes[0])
                # plot predicted bboxes
                write_image(ori_img_, _bboxes, 'pred.png')
                print('saved an image')

        if epoch == 13:
            break
Exemplo n.º 10
0
def test(img):
    img = t.from_numpy(img)[None]
    opt.caffe_pretrain=False # this model was trained from caffe-pretrained model
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True)
    #output the 坐标
    bboxes = at.tonumpy(_bboxes[0])
    print(bboxes)  #输出框的坐标,array格式

    test_img = visdom_bbox(at.tonumpy(img[0]),
                      at.tonumpy(_bboxes[0]),
                      at.tonumpy(_labels[0]).reshape(-1),
                      at.tonumpy(_scores[0]).reshape(-1))
    trainer.vis.img('test_img', test_img)
Exemplo n.º 11
0
    def forward(self, imgs, bboxes, lbls, scale):
        # forward pass, get losses as tuple
        n = bboxes.shape[0]
        _, _, H, W = imgs.shape  #H, W = dimensions of images
        im_size = (H, W)

        features = self.model.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_ind, anchor = self.model.rpn(
            features, im_size, scale)

        #batch size = 1, therefore make variable singular
        rpn_loc = rpn_locs[0]
        rpn_score = rpn_scores[0]
        roi = rois
        bbox = bboxes[0]
        lbl = lbls[0]

        sample_roi, gt_roi_loc, gt_roi_lbl = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(lbl),
            self.model.loc_normalize_mean, self.model.loc_normalize_std)
        sample_roi_ind = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.model.head(features, sample_roi,
                                                 sample_roi_ind)

        # ----- RPN Losses -----
        rpn_cls_loss, rpn_loc_loss = self.rpn_loss(rpn_loc, rpn_score, bbox,
                                                   anchor, im_size)

        # ----- ROI losses -----
        roi_cls_loss, roi_loc_loss = self.roi_loss(roi_cls_loc, gt_roi_loc,
                                                   gt_roi_lbl)

        total = rpn_loc_loss + rpm_cls_loss + roi_loc_loss + roi_cls_loss

        # not sure if losses should be a dictionary instead, but here's a definition for that just in case
        #losses = {
        #    'rpn_loc_loss': rpn_loc_loss,
        #    'rpn_cls_loss': rpn_cls_loss,
        #    'roi_loc_loss': roi_loc_loss,
        #    'roi_cls_loss': roi_cls_loss,
        #    'total_loss'  : total
        #}
        losses = [
            rpn_loc_loss.to(self.device),
            rpm_cls_loss.to(self.device),
            roi_loc_loss.to(self.device),
            roi_cls_loss.to(self.device),
            total.to(self.device)
        ]
        return losses
Exemplo n.º 12
0
def compute_iou(pred_masks, gt_masks):
    pred_masks, gt_masks = np.squeeze(at.tonumpy(pred_masks)), np.squeeze(at.tonumpy(gt_masks))
    ious = []
    for i in range(len(pred_masks)):
        pred_mask = pred_masks[i]
        gt_mask = gt_masks[i]

        union = np.sum(np.logical_or(pred_mask, gt_mask))
        intersection = np.sum(np.logical_and(pred_mask, gt_mask))
        iou = intersection/union
        ious.append(iou)
    batch_iou = np.sum(np.array(ious))

    return batch_iou
Exemplo n.º 13
0
def test():
    img_arr = read_image('demo.jpg')
    img = t.from_numpy(img_arr)[None]

    faster_rcnn = FasterRCNN()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load('weights/chainer_best_model_converted_to_pytorch_0.7053.pth')
    opt.caffe_pretrain = True
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True)
    vis_bbox(at.tonumpy(img[0]),
             at.tonumpy(_bboxes[0]),
             at.tonumpy(_labels[0]).reshape(-1),
             at.tonumpy(_scores[0]).reshape(-1))
    def step2(self, imgs, bboxes, labels, scale, epoch):
        self.optimizer.zero_grad()
        _, _, H, W = imgs.shape
        img_size = (H, W)

        ############ EXTRACTOR STEP #################
        features1 = self.faster_rcnn.extractor1(imgs)
        features2 = self.faster_rcnn.extractor2(imgs)

        ############ RPN STEP #######################
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features1, img_size, scale)

        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        ############ HEAD STEP #######################
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi,
                                                       sample_roi_index)

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(),
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())
        rpn_loc_loss = t.tensor([0]).cuda()
        rpn_cls_loss = t.tensor([0]).cuda()

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss
                                           ] + [roi_loc_loss + roi_cls_loss]

        all_losses = LossTuple(*losses)
        all_losses.total_roi.backward()
        self.optimizer.step()
        self.update_meters(all_losses)
        return all_losses
Exemplo n.º 15
0
 def single_predict(self, img_path=''):
     starttime = datetime.datetime.now()
     img = read_image(img_path)
     img = t.from_numpy(img)[None]
     _bboxes, _labels, _scores = self.trainer.faster_rcnn.predict(
         img, visualize=True)
     endtime = datetime.datetime.now()
     print('predict time consum=%s' % round(
         (endtime-starttime).microseconds/1000000+(endtime-starttime).seconds, 6))
     ax = vis_bbox(at.tonumpy(img[0]),
                   at.tonumpy(_bboxes[0]),
                   at.tonumpy(_labels[0]).reshape(-1),
                   at.tonumpy(_scores[0]).reshape(-1))
     fig = ax.get_figure()
     fig.savefig("output.png")
Exemplo n.º 16
0
def Recognize(img):
    '''
	##作者:左家乐
	##日期:2020-07-30
	##功能:该函数用来检测目标赛车roboot
	##IN-para : 一帧ZED数据
	##return : 车的目标框坐标
	'''
    img = t.from_numpy(img)[None]
    opt.caffe_pretrain = False  # this model was trained from caffe-pretrained model
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                            visualize=True)
    bboxes = at.tonumpy(_bboxes[0])
    labels = at.tonumpy(_labels[0].reshape(-1))
    return bboxes, labels
Exemplo n.º 17
0
    def __call__(self, masks):
        # mask = mask[0]
        # masks = []
        # for i in range(bboxs.shape[0]):
        #     bbox = bboxs[i]
        #     # TODO: instead of convert to int, use crop function in ROIAlign
        #     sub_mask = mask[:, int(bbox[0]+1):int(bbox[2]), int(bbox[1]+1):int(bbox[3])]
        #     masks.append(sub_mask)
        #
        # return masks
        # img_id = img_id[0]
        # masks_path = os.path.join(opt.root_dir, img_id, 'masks.npy')
        # temp = []
        # masks = np.load(masks_path)
        # for i in range(masks.shape[0]):
        #     temp.append(transform.resize(masks[i], (14, 14), preserve_range=False, mode='constant')>=0.5)
        #
        # temp = np.array(temp).astype(np.float32)
        # return temp
        temp = []
        for i in range(len(masks)):
            mask = at.tonumpy(masks[i])[0].copy()
            # print(mask.shape)
            mask = transform.resize(mask, (14, 14),
                                    preserve_range=False,
                                    mode='constant')
            temp.append(mask.astype(np.float32))

        return np.array(temp).copy()
Exemplo n.º 18
0
def apply_mask_bbox(image, masks, bbox, color, alpha=0.5):
    """Apply the given mask to the image.
    """
    ax = plot.subplot(111)
    ax.imshow(np.transpose(np.squeeze(image / 255.), (1, 2, 0)))
    for i in range(bbox.shape[0]):
        y1, x1, y2, x2 = int(bbox[i][0]), int(bbox[i][1]), int(
            bbox[i][2]), int(bbox[i][3])
        h = y2 - y1
        w = x2 - x1
        rect = patches.Rectangle((x1, y1),
                                 w,
                                 h,
                                 linewidth=1,
                                 edgecolor='r',
                                 facecolor='none')
        ax.add_patch(rect)

        mask = at.tonumpy(masks[i])[0]
        mask = transform.resize(mask, (int(h), int(w)),
                                preserve_range=False,
                                mode='constant')
        for c in range(3):
            image[
                0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]] = np.where(
                    mask == 1,
                    image[0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]] *
                    (1 - 0.5) + alpha * color[c] * 255,
                    image[0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]])

    ax.imshow(np.transpose(np.squeeze(image / 255.), (1, 2, 0)))
    plot.show()
Exemplo n.º 19
0
def val(model, val_loader, criterion, epoch, vis):
    model.eval()
    batch_loss = 0
    avg_iou = 0
    for batch_idx, sample_batched in enumerate(val_loader):
        data = sample_batched['image']
        target = sample_batched['mask']
        data, target = Variable(data.type(opt.dtype), volatile=True), Variable(
            target.type(opt.dtype), volatile=True)
        output = model.forward(data)
        loss = criterion(output, target)
        batch_loss += loss.data[0]
        avg_iou += compute_iou(pred_masks=at.tonumpy(output >= 0.5).astype(
            np.float32),
                               gt_masks=target)

    batch_loss /= (batch_idx + 1)
    avg_iou /= len(val_loader.dataset)

    print('epoch: ' + str(epoch) + ', validation loss: ' + str(batch_loss),
          ', avg_iou: ', avg_iou)
    with open('logs.txt', 'a') as file:
        file.write('epoch: ' + str(epoch) + ', validation loss: ' +
                   str(batch_loss) + ', avg_iou: ' + str(avg_iou) + '\n')

    vis.plot('val loss', batch_loss)
    vis.plot('validation average IOU', avg_iou)
    return avg_iou
Exemplo n.º 20
0
def show_batch_train(sample_batched):
    """
    Visualize one training image and its corresponding bbox
    """
    img_id, image, mask = sample_batched['img_id'], sample_batched['image'], sample_batched['mask']
    image, mask = np.squeeze(at.tonumpy(image)), np.squeeze(at.tonumpy(mask))

    image = inverse_normalize(image)

    combined = np.multiply(image, mask)
    ax1 = plt.subplot(121)
    ax1.imshow(image / 255.)
    ax1.set_title(img_id[0])
    ax2 = plt.subplot(122)
    ax2.imshow(combined / 255.)
    ax2.set_title(img_id[0])
    plt.show()
Exemplo n.º 21
0
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()  #取消BN和dropout
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))  #预处理
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):  #遍历我们要预测的每张图片
            img = at.totensor(img[None]).float()  # #1 C H W
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #进入forward 向前计算
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale  #对应resize前真实图片的roi

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \  #均值repeat n_class
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \  #标准差repeat n_class
                repeat(self.n_class)[None]
Exemplo n.º 22
0
def test(**kwargs):
    opt._parse(kwargs)
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load(
        'C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/checkpoints/fasterrcnn_08042317_0.9090909090909093'
    )
    print('load successs!')
    img = read_image('test_img/test.jpg')
    img = t.from_numpy(img)[None]
    opt.caffe_pretrain = False  # this model was trained from caffe-pretrained model
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                            visualize=True)
    test_img = visdom_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]),
                           at.tonumpy(_labels[0]).reshape(-1),
                           at.tonumpy(_scores[0]).reshape(-1))
    trainer.vis.img('test_img', test_img)
Exemplo n.º 23
0
def imgflip(img, bbox, x_flip=True, y_flip=True):
    imgs = at.tonumpy(img[0])
    if y_flip:
        imgs = imgs[:, ::-1, :]
    if x_flip:
        imgs = imgs[:, :, ::-1]
    # print imgs
    imgs = np.expand_dims(imgs, axis=0)
    return inverse_normalize(imgs)
Exemplo n.º 24
0
def eval_mAP(trainer, val_loader):
    tqdm.monitor_interval = 0
    mAP = []
    for ii, sample in tqdm(enumerate(val_loader)):
        if len(sample.keys()) == 5:
            img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                sample['label']
            img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)

        else:
            img_id, img, scale = sample['img_id'], sample['image'], sample[
                'scale']
            bbox = np.zeros((1, 0, 4))
            label = np.zeros((1, 0, 1))
            img = img.cuda().float()
            img = Variable(img)
        # if bbox is None:
        #     continue
        scale = at.scalar(scale)
        ori_img_ = inverse_normalize(at.tonumpy(img[0]))
        pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict(
            [ori_img_], visualize=True)
        pred_boxes = pred_boxes[0]
        pred_labels = pred_labels[0]
        pred_scores = pred_scores[0]
        bbox = at.tonumpy(bbox[0])
        # Rescale back
        C, H, W = ori_img_.shape
        ori_img_ = transform.resize(ori_img_,
                                    (C, H * (1 / scale), W * (1 / scale)),
                                    mode='reflect')
        o_H, o_W = H * (1 / scale), W * (1 / scale)
        pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W))
        bbox = resize_bbox(bbox, (H, W), (o_H, o_W))
        mAP.append(map_iou(bbox, pred_boxes, pred_scores))
        # if ii>=100:
        #     break

    mAP = np.array(mAP)
    mAP = mAP[mAP != np.array(None)].astype(np.float32)

    return np.mean(mAP)
Exemplo n.º 25
0
def _smooth_l1_loss(x, t, in_weight, sigma):
    sigma2 = sigma**2
    diff = in_weight * (x - t)
    abs_diff = diff.abs()

    flag = (abs_diff.data < (1. / sigma2)).float()
    y = (flag * (sigma2 / 2.) * (diff**2) + (1 - flag) *
         (abs_diff - 0.5 / sigma2))
    modif = at.tonumpy(y)
    modif[np.isnan(modif)] = 0
    return modif.sum()
Exemplo n.º 26
0
def run_test(model, test_loader):
    pred_masks = []
    img_ids = []
    images = []
    for batch_idx, sample_batched in tqdm(enumerate(test_loader)):
        data, img_id = sample_batched['image'], sample_batched['img_id']
        data = Variable(data.type(opt.dtype), volatile=True)
        output = model.forward(data)
        # output = (output > 0.5)
        output = at.tonumpy(output)
        for i in range(0, output.shape[0]):
            pred_mask = np.squeeze(output[i])
            id = img_id[i]
            pred_mask = (pred_mask >= 0.5).astype(np.float32)
            pred_masks.append(pred_mask)
            img_ids.append(id)
            ori_img_ = inverse_normalize(at.tonumpy(data[i]))
            images.append(ori_img_)

    return img_ids, images, pred_masks
def draw_predict(pred_bboxes_, pred_labels_, pred_scores_):
    pred_bboxes1 = iter(pred_bboxes_)
    pred_labels1 = iter(pred_labels_)
    pred_scores1 = iter(pred_scores_)
    if opt.nms_type == 'soft_nms':
        write_path = 'result/'
    else:
        write_path = 'result_nms/'
    if opt.nms_use_label == True:
        write_path = 'label_' + write_path
    print(write_path)
    f = open('/media/chenli/E/VOCdevkit/VOC2007/ImageSets/Main/test2.txt')
    for pred_bbox, pred_label, pred_score in six.moves.zip(
            pred_bboxes1, pred_labels1, pred_scores1):
        id_ = f.readline()[:-1]
        # print id_
        img_file = '/media/chenli/E/VOCdevkit/VOC2007/JPEGImages/' + \
            str(id_) + '.jpg'
        image = cv2.imread(img_file)
        # 转成 numpy格式
        bboxs = at.tonumpy(pred_bbox)
        name = at.tonumpy(pred_label).reshape(-1)
        score = at.tonumpy(pred_score).reshape(-1)
        # 保存测试集每一轮预测的结果 最好加个epoch判断 每10轮保存一次 不然太浪费时间
        for i in range(len(name)):
            xmin = int(round(float(bboxs[i, 1])))
            ymin = int(round(float(bboxs[i, 0])))
            xmax = int(round(float(bboxs[i, 3])))
            ymax = int(round(float(bboxs[i, 2])))
            if score[i] <= opt.threshold:
                continue
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1)
            cv2.putText(image, opt.VOC_BBOX_LABEL_NAMES[name[i]],
                        (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX,
                        1e-3 * image.shape[0], (0, 0, 255), 1)
            cv2.putText(image,
                        str(score[i])[0:3], (xmin + 30, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0],
                        (0, 0, 255), 1)

        cv2.imwrite(write_path + str(id_) + '.jpg', image)
Exemplo n.º 28
0
    def forward(self, img_size, x, rois, roi_indices):
        """Forward the chain.

        We assume that there are :math:`N` batches.

        Args:
            x (Variable): 4D image variable.
            rois (Tensor): A bounding box array containing coordinates of
                proposal boxes.  This is a concatenation of bounding box
                arrays from multiple images in the batch.
                Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
                RoIs from the :math:`i` th image,
                :math:`R' = \\sum _{i=1} ^ N R_i`.
            roi_indices (Tensor): An array containing indices of images to
                which bounding boxes correspond to. Its shape is :math:`(R',)`.

        """
        # in case roi_indices is  ndarray
        img_h, img_w = img_size
        # size of rois in the input images. (h, w)
        roi_size = np.concatenate(
            (np.expand_dims(at.tonumpy(rois[:, 2] - rois[:, 0]), axis=1),
             (np.expand_dims(at.tonumpy(rois[:, 3] - rois[:, 1]), axis=1))),
            axis=1)
        feature_h, feature_w = x.shape[2], x.shape[3]
        roi_indices = at.totensor(roi_indices).int()
        rois = at.totensor(rois).float()
        rois[:, 0] = rois[:, 0] / img_h * feature_h
        rois[:, 2] = rois[:, 2] / img_h * feature_h
        rois[:, 1] = rois[:, 1] / img_w * feature_w
        rois[:, 3] = rois[:, 3] / img_w * feature_w
        # pool = self.roi(x, indices_and_rois)
        rois = at.tovariable(rois)
        roi_indices = at.tovariable(roi_indices)
        pool = self.roi(x, rois, roi_indices)  # (128, 512, 7, 7)
        pool = pool.view(pool.size(0), -1)
        fc7 = self.classifier(pool)
        roi_cls_locs = self.cls_loc(fc7)
        roi_scores = self.score(fc7)
        return roi_cls_locs, roi_scores
Exemplo n.º 29
0
    def predict(self, imgs, sizes=None, visualize=False):
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(array_tool.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = array_tool.totensor(img[None]).float()  #img增加一维(_, C, H, W)
            scale = img.shape[3] / size[1]  # W' / W, 处理后图像和原图比例
            roi_cls_locs, roi_scores, rois, roi_indices = self(img,
                                                               scale=scale)

            #batch size为1
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_locs.data
            roi = array_tool.totensor(rois) / scale

            mean = torch.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]  #(1,84)
            std = torch.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)  #(R, 21 ,4)

            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)  #扩充维度  #(R, 21, 4)
            cls_bbox = loc2bbox(
                array_tool.tonumpy(roi).reshape(-1, 4),
                array_tool.tonumpy(roi_cls_loc).reshape(-1, 4))
            cls_bbox = array_tool.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)  #(R, 84)
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(
                min=0, max=size[1])  #裁剪预测bbox不超出原图尺寸

            prob = array_tool.tonumpy(
                functional.softmax(array_tool.totensor(roi_score), dim=1))

            raw_cls_bbox = array_tool.tonumpy(cls_bbox)
            raw_prob = array_tool.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)  #将每个batch_size的压在一起

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Exemplo n.º 30
0
def detec_test_pic(pth, pic_test):
    opt.load_path = opt.caffe_pretrain_path
    opt.env = 'detec-tset-pic'
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    trainer.load(pth)
    opt.caffe_pretrain = True  # this model was trained from caffe-pretrained model
    pic_index = 0

    for pic in tqdm(os.listdir(pic_test)):
        time.sleep(1)
        img = read_image(os.path.join(pic_test, pic))
        img = t.from_numpy(img)[None]
        _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                                visualize=True)
        pred_img = visdom_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]),
                               at.tonumpy(_labels[0]).reshape(-1),
                               at.tonumpy(_scores[0]).reshape(-1))
        trainer.vis.img('pred_img', pred_img)
        pic_index += 1
        if pic_index > 1000:
            break
Exemplo n.º 31
0
    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            at.tonumpy(bbox),
            at.tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(
            features,
            sample_roi,
            sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox),
            anchor,
            img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.tovariable(gt_roi_label).long()
        gt_roi_loc = at.tovariable(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)
Exemplo n.º 32
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores