def eval(dataloader, resnet, test_num=10000):
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    for ii, data in enumerate(dataloader):
        (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) = data

        nms_scores, sorted_labels, sorted_cls_bboxes = resnet(
            imgs.cuda().float())
        if not (nms_scores is None):
            test = np.reshape(np.argwhere(nms_scores > 0.5), -1)
            nms_scores = nms_scores[test]
            sorted_labels = sorted_labels[test]
            sorted_cls_bboxes = sorted_cls_bboxes[test]

            pred_bboxes.append(
                np.reshape(tonumpy(sorted_cls_bboxes), (-1, 4)).copy())
            pred_labels.append(np.reshape(tonumpy(sorted_labels), (-1)).copy())
            pred_scores.append(np.reshape(tonumpy(nms_scores), (-1)).copy())
        else:
            pred_bboxes.append(np.array([]))
            pred_labels.append(np.array([]))
            pred_scores.append(np.array([]))
        gt_bboxes += list(gt_bboxes_.numpy())
        gt_labels += list(gt_labels_.numpy())
        gt_difficults += list(gt_difficults_.numpy())
        if ii == test_num: break

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                use_07_metric=True)
    return result
示例#2
0
    def forward(self, inputs, scale=1.):
        if self.training:
            img_batch, bboxes, labels, _ = inputs

        else:
            img_batch = inputs

        _, _, H, W = img_batch.shape
        img_size = (H, W)
        x = self.conv1(img_batch)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        #features = self.fpn([x2, x3, x4])
        features = self.conv2(x4)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn(
            features, img_size, scale)

        if self.training:
            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                at.tonumpy(bboxes[0]), anchor, img_size)
            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
                rois, at.tonumpy(bboxes[0]), at.tonumpy(labels[0]),
                self.loc_normalize_mean, self.loc_normalize_std)
            sample_roi_index = t.zeros(len(sample_roi))

            roi_cls_loc, roi_score, appearance_features = self.roi_head(
                features, sample_roi, sample_roi_index)
            nms_scores, sorted_labels, sorted_cls_bboxes = self.duplicate_remover(
                sample_roi, roi_cls_loc, roi_score, appearance_features,
                img_size)
            if (nms_scores is None):
                return self.Loss(gt_rpn_loc, gt_rpn_label, gt_roi_loc,
                                 gt_roi_label, roi_cls_loc, roi_score,
                                 rpn_locs, rpn_scores)
            else:
                result_loss = self.Loss(gt_rpn_loc, gt_rpn_label, gt_roi_loc,
                                        gt_roi_label, roi_cls_loc, roi_score,
                                        rpn_locs, rpn_scores)
                result_loss[4] += self.nmsLoss(bboxes, labels, nms_scores,
                                               sorted_labels,
                                               sorted_cls_bboxes)
                return result_loss
        else:
            roi_cls_loc, roi_score, appearance_features = self.roi_head(
                features, rois, roi_indices)
            nms_scores, sorted_labels, sorted_cls_bboxes = self.duplicate_remover(
                rois, roi_cls_loc, roi_score, appearance_features, img_size)
            return nms_scores, sorted_labels, sorted_cls_bboxes
示例#3
0
    def predict(self, imgs, sizes=None, visualize=False):
        
        self.eval()
        prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = totensor(rois[:,1:]) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cuda:1")  #torch.device代表将torch.Tensor分配到的设备的对象
            mean = torch.Tensor(self.loc_normalize_mean).to(device).repeat(self.n_class)[None]
            std = torch.Tensor(self.loc_normalize_std).to(device).repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(tonumpy(roi).reshape((-1, 4)),tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[1])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[0])

            prob = tonumpy(F.softmax(totensor(roi_score), dim=1))

            raw_cls_bbox = tonumpy(cls_bbox)
            raw_prob = tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
        
        self.train()   
        
        return bboxes, labels, scores
示例#4
0
    def forward(self, inputs, scale=1.):

        if self.training:
            img_batch, bboxes, labels, scale = inputs

        else:
            img_batch = inputs

        _, _, H, W = img_batch.shape
        img_size = (H, W)
        x = self.conv1(img_batch)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        features = self.fpn([x2, x3, x4])
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn(features,img_size,scale)



        if self.training:
            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                at.tonumpy(bboxes[0]),
                anchor,
                img_size)

            sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
                rois,
                at.tonumpy(bboxes[0]),
                at.tonumpy(labels[0]),
                self.loc_normalize_mean,
                self.loc_normalize_std)

            sample_roi_index = t.zeros(len(sample_roi))
            roi_cls_loc, roi_score = self.roi_head(features, sample_roi, sample_roi_index)


            return self.Loss(gt_rpn_loc,gt_rpn_label, gt_roi_loc, gt_roi_label,roi_cls_loc, roi_score,rpn_locs, rpn_scores)
        else:
            roi_cls_loc, roi_score = self.roi_head(features, rois, roi_indices)
            return roi_cls_loc,roi_score, rois, roi_indices
示例#5
0
    def forward(self, gt_bboxes, gt_labels, nms_scores, sorted_labels, sorted_cls_bboxes):
        sorted_score, prob_argsort = t.sort(nms_scores, descending=True)
        sorted_cls_bboxes = sorted_cls_bboxes[prob_argsort]
        sorted_labels = sorted_labels[prob_argsort]
        sorted_labels = tonumpy(sorted_labels)
        gt_labels = tonumpy(gt_labels)

        nms_gt = t.zeros_like(sorted_score)

        eps = 1e-8

        iou = bbox_iou(tonumpy(gt_bboxes[0]), tonumpy(sorted_cls_bboxes))
        for gt_idx in range(len(iou)):
            accept_iou = np.reshape(np.argwhere(iou[gt_idx] > 0.5),-1)
            accept_label = np.reshape(np.argwhere(sorted_labels[accept_iou] == gt_labels[0][gt_idx]),-1)

            if not(len(accept_label)==0):
                nms_gt[accept_iou[accept_label[0]]] = 1.

        loss = nms_gt * (sorted_score+ eps).log() + (1 - nms_gt) * (1-sorted_score + eps).log()
        loss = -loss.mean()
        return loss
示例#6
0
    def predict(self,imgs,visualize):
        self.use_preset(isTraining=False)
        if visualize:
            self.training=False
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            self.n_class=21
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
示例#7
0
 def _suppress(self, raw_cls_bbox, raw_prob):
    
     bbox = list()
     label = list()
     score = list()
     # skip cls_id = 0 because it is the background class
     for l in range(1, self.n_class):
         cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :]
         prob_l = raw_prob[:, l]
         mask = prob_l > self.score_thresh
         cls_bbox_l = cls_bbox_l[mask]
         prob_l = prob_l[mask]
         
         keep = nms(totensor(cls_bbox_l), totensor(prob_l), self.nms_thresh)
         keep = tonumpy(keep)
         bbox.append(cls_bbox_l[keep])
         # The labels are in [0, self.n_class - 2].
         label.append((l - 1) * np.ones((len(keep),)))
         score.append(prob_l[keep])
     bbox = np.concatenate(bbox, axis=0).astype(np.float32)
     label = np.concatenate(label, axis=0).astype(np.int32)
     score = np.concatenate(score, axis=0).astype(np.float32)
     return bbox, label, score
示例#8
0
    def forward(self, imgs, bboxes, labels, scale):
       
        n = bboxes.shape[0]#batchsize数量
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, anchor = self.faster_rcnn.rpn(features, img_size, scale)#rpn_locs的维度(hh*ww*9,4),
        #rpn_scores维度为(hh*ww*9,2), rois的维度为(2000,4),roi_indices用不到,anchor的维度为(hh*ww*9,4),H和W是经过数据预处理后的。
        #计算(H/16)x(W/16)x9(大概20000)个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G^的坐标。roi的维度为(2000,4)

        # 程序限定N=1,把批维度去掉方便操作
        bbox = bboxes[0] #bbox维度(N, R, 4)
        label = labels[0] #labels维度为(N,R)
        rpn_score = rpn_scores[0] #(hh*ww*9,4)
        rpn_loc = rpn_locs[0] #hh*ww*9
        roi = rois #(2000,4)


        # Sample RoIs and forward
        # it's fine to break the computation graph of rois, 
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            tonumpy(bbox),
            tonumpy(label),
            self.loc_normalize_mean,
            self.loc_normalize_std)
        
        #因为ProposalTargetCreator的设计问题,此处需要插入一列idx
        sample_roi_index = np.zeros(len(sample_roi)) 
        sample_roi = np.insert(sample_roi, 0, values=sample_roi_index, axis=1)
        
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(tonumpy(bbox), anchor, img_size)
        
        gt_rpn_label = totensor(gt_rpn_label).long()
        gt_rpn_loc = totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...索引默认值
        
        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(device), ignore_index=-1)

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(device), totensor(gt_roi_label).long()]
        
        gt_roi_label = totensor(gt_roi_label).long()
        gt_roi_loc = totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc.contiguous(),
            gt_roi_loc,
            gt_roi_label.data,
            self.roi_sigma)
        #迷惑行为  self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.to(device))

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return losses
示例#9
0
    def forward(self, sample_roi, roi_cls_loc, roi_score, appearance_features,
                size):
        N = sample_roi.shape[0]
        roi_score = roi_score.data
        roi_cls_loc = roi_cls_loc.data
        roi = at.totensor(sample_roi)


        mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
        std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

        roi_cls_loc = (roi_cls_loc * std + mean)
        roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
        roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
        cls_bbox = loc2bbox(
            at.tonumpy(roi).reshape((-1, 4)),
            at.tonumpy(roi_cls_loc).reshape((-1, 4)))
        cls_bbox = at.totensor(cls_bbox)
        cls_bbox = cls_bbox.view(-1, self.n_class, 4)
        # clip bounding box
        cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
        cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

        prob = F.softmax(at.tovariable(roi_score), dim=1)

        prob, prob_argmax = torch.max(prob, dim=-1)
        cls_bbox = cls_bbox[np.arange(start=0, stop=N), prob_argmax]

        nonzero_idx = torch.nonzero(prob_argmax)

        if (nonzero_idx.size()[0] == 0):
            return None, None, None
        else:
            nonzero_idx = nonzero_idx[:, 0]
            prob_argmax = prob_argmax[nonzero_idx]
            prob = prob[nonzero_idx]
            cls_bbox = cls_bbox[nonzero_idx]
            appearance_features_nobg = appearance_features[nonzero_idx]
            sorted_score, prob_argsort = torch.sort(prob, descending=True)

            sorted_prob = prob[prob_argsort]
            sorted_cls_bboxes = cls_bbox[prob_argsort]
            sorted_labels = prob_argmax[prob_argsort]
            sorted_features = appearance_features_nobg[prob_argsort]

            nms_rank_embedding = RankEmbedding(sorted_prob.size()[0],
                                               self.appearance_feature_dim)
            nms_rank = self.nms_rank_fc(nms_rank_embedding)
            roi_feat_embedding = self.roi_feat_embedding_fc(sorted_features)
            nms_embedding_feat = nms_rank + roi_feat_embedding
            position_embedding = PositionalEmbedding(
                sorted_cls_bboxes, dim_g=self.geo_feature_dim)
            nms_logit = self.relation_module(nms_embedding_feat,
                                             position_embedding)
            nms_logit = self.nms_logit_fc(nms_logit)
            s1 = self.sigmoid(nms_logit).view(-1)
            nms_scores = s1 * sorted_prob

            return nms_scores, sorted_labels - 1, sorted_cls_bboxes