예제 #1
0
 def __call__(self, anchors: torch.Tensor, truths):
     # filter out too small objects
     truths_wh = truths[:, 2:] - truths[:, :2]
     msk = (truths_wh[:, 0] < self.igs) | (truths_wh[:, 1] < self.igs)
     truths = truths[~msk]
     truths_wh = truths_wh[~msk]
     # compute IOUs
     overlaps = jaccard(
         truths,
         point_form(anchors)
     )  # size [num_truths, num_priors]
     # [1,num_objects] best prior for each ground truth
     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False)
     recall_msk = best_prior_overlap <= 0.5
     l1 = best_prior_overlap.mean()
     l2 = (best_prior_overlap[recall_msk]).mean()
     l3 = (best_prior_overlap ** (1. / 3)).mean()
     # approx ssd loss loss
     # diff_wh = anchors[best_prior_idx, 2:] / truths_wh
     # diff_wh = diff_wh.log().abs()
     # diff_wh = torch.where(diff_wh < 1., 0.5 * diff_wh ** 2, diff_wh - 0.5)
     # l4 = diff_wh.sum(dim=1).mean() * self.l
     # loss = (-(l1.log() + l2.log() + l3.log() * 3) + l4) / 4.
     loss = -(l1.log() + l2.log() + l3.log() * 3) / 3.
     if self.decay > 0.01:
         best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False)
         l0 = -best_truth_overlap.mean().log()
         loss = l0 * self.decay + loss * (1. - self.decay)
         self.decay *= 0.9
     return loss
예제 #2
0
 def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)):
     sigmoid_alphas = params[:, -1].sigmoid()  # size [num_priors]
     priors = torch.cat([locs, params[:, :2]], dim=1)  # size [num_priors, 4]
     with torch.no_grad():
         overlaps = jaccard(
             truths,
             point_form(priors)
         )  # size [num_truths, num_priors]
     # [num_priors] best ground truth for each prior
     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False)
     # [1,num_objects] best prior for each ground truth
     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False)
     # replace original best truth indexes whose prior boxes are the best priors of given truths
     best_truth_overlap[best_prior_idx] = best_prior_overlap
     best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long)
     # create filter
     x_filter = torch.zeros(best_truth_overlap.size())
     x_filter[best_truth_overlap > self.thresh] = 1.
     x_filter[best_prior_idx] = self.k
     # filtering
     msk = x_filter > 1e-7
     x_filter = x_filter[msk]
     best_truth_overlap = best_truth_overlap[msk]
     # return loss value
     return ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum()
             + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
예제 #3
0
 def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)):
     sigmoid_alphas = params[:, -1].sigmoid()  # size [num_priors]
     priors = torch.cat([locs, params[:, :2]], dim=1)  # size [num_priors, 4]
     with torch.no_grad():
         overlaps = jaccard(
             truths,
             point_form(priors)
         )  # size [num_truths, num_priors]
     # [num_priors] best ground truth for each prior
     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False)
     # [1,num_objects] best prior for each ground truth
     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False)
     # replace original best truth indexes whose prior boxes are the best priors of given truths
     # best_truth_overlap[best_prior_idx] = best_prior_overlap
     best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long)
     # filter
     x_filter = torch.zeros(best_truth_overlap.size())
     x_filter[best_truth_overlap > self.thresh] = 1.
     x_filter[best_prior_idx] = self.k
     # encode, L1_loss
     encoded_dis = encode(truths[best_truth_idx], priors, variance)
     encoded_dis = torch.abs(encoded_dis)
     l1_tensor = torch.where(encoded_dis < 1., 0.5 * encoded_dis ** 2, encoded_dis - 0.5)
     l1_tensor = l1_tensor.sum(dim=1)
     # return loss value
     return ((sigmoid_alphas * x_filter * l1_tensor).sum() + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
예제 #4
0
 def __mk_iou_tensor(self, prior_boxes):
     if isinstance(prior_boxes, int):
         i, j = prior_boxes >> 16, prior_boxes & 0xffff
         prior_boxes = self.prior_groups[i][j]
     ret = torch.zeros(self.gts.size(0))
     for start, end in self.intervals:
         truths = self.gts[start:end].cuda()
         overlaps = jaccard(truths, prior_boxes)
         best_prior_overlap, _ = overlaps.max(1, keepdim=False)
         ret[start:end] = best_prior_overlap
     return ret
예제 #5
0
def match(truths, priors, all_priors, mask_t, idx):

    iou_map = jaccard(point_form(priors), truths)
    iou_map_global = jaccard(point_form(all_priors), truths)
    feature_size = int(iou_map.shape[0])
    max_iou, _ = torch.max(iou_map_global, dim=0)
    mask_per_img = torch.zeros([feature_size], dtype=torch.int64).cuda()

    for k in range(truths.shape[0]):
        if torch.sum(truths[k]) == 0.:
            break
        #if max_iou[k] < 0.2:
        #    continue
        max_iou_per_gt = 0.35  #max_iou[k] * 0.5
        #mask_per_gt = torch.sum((iou_map[:,k] > max_iou_per_gt).view(feature_size, num_anchors), dim=1)
        mask_per_gt = iou_map[:, k] > max_iou_per_gt
        mask_per_gt = mask_per_gt.long()
        mask_per_img += mask_per_gt

    mask_per_img = mask_per_img > 0
    mask_t[idx] = mask_per_img
예제 #6
0
def fast_nms(box_thre, coef_thre, class_thre, cfg, second_threshold=False):
    class_thre, idx = class_thre.sort(
        1, descending=True)  # [80, 64 (the number of kept boxes)]

    idx = idx[:, :cfg.top_k].contiguous()
    class_thre = class_thre[:, :cfg.top_k]

    num_classes, num_dets = idx.size()

    box_thre = box_thre[idx.reshape(-1), :].reshape(num_classes, num_dets,
                                                    4)  # [80, 64, 4]
    coef_thre = coef_thre[idx.reshape(-1), :].reshape(num_classes, num_dets,
                                                      -1)  # [80, 64, 32]

    iou = jaccard(box_thre, box_thre)
    iou.triu_(diagonal=1)
    iou_max, _ = iou.max(dim=1)

    # Now just filter out the ones higher than the threshold
    keep = (iou_max <= cfg.nms_iou_thre)

    # We should also only keep detections over the confidence threshold, but at the cost of
    # maxing out your detection count for every image, you can just not do that. Because we
    # have such a minimal amount of computation per detection (matrix mulitplication only),
    # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out.
    # However, when you implement this in your method, you should do this second threshold.
    if second_threshold:
        keep *= (class_thre > cfg.nms_score_thresh)

    # Assign each kept detection to its corresponding class
    class_ids = torch.arange(num_classes,
                             device=box_thre.device)[:, None].expand_as(keep)

    class_ids = class_ids[keep]

    box_nms = box_thre[keep]
    coef_nms = coef_thre[keep]
    class_nms = class_thre[keep]

    # Only keep the top cfg.max_num_detections highest scores across all classes
    class_nms, idx = class_nms.sort(0, descending=True)

    idx = idx[:cfg.max_detections]
    class_nms = class_nms[:cfg.max_detections]

    class_ids = class_ids[idx]
    box_nms = box_nms[idx]
    coef_nms = coef_nms[idx]

    return box_nms, coef_nms, class_ids, class_nms
예제 #7
0
    def build_targets(self, target, anchors, w, h, ignore_threshold):
        obj_mask = torch.zeros(self.batch_size, self.num_anchors, h, w).byte()
        noobj_mask = torch.ones(self.batch_size, self.num_anchors, h, w).byte()
        tx = torch.zeros(self.batch_size, self.num_anchors, h, w)
        ty = torch.zeros(self.batch_size, self.num_anchors, h, w)
        tw = torch.zeros(self.batch_size, self.num_anchors, h, w)
        th = torch.zeros(self.batch_size, self.num_anchors, h, w)
        tconf = torch.zeros(self.batch_size, self.num_anchors, h, w)
        tcls = torch.zeros(self.batch_size, self.num_anchors, h, w, self.classes)

        target_bbox = target[..., ::] * 1
        target_bbox[..., :-1:2] *= w
        target_bbox[..., 1:-1:2] *= h
        gwh = target_bbox[..., 2:-1]
        anchor_shape = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1))
        gwh_shape = torch.FloatTensor(np.concatenate((np.zeros_like(gwh), np.array(gwh)), 2)).contiguous()
        for bs in range(target_bbox.size(0)):
            for tg in range(target_bbox.size(1)):
                if target_bbox[bs, tg][:-1].sum() == 0:
                    continue
                gx = target_bbox[bs, tg, 0]
                gy = target_bbox[bs, tg, 1]
                gw = target_bbox[bs, tg, 2]
                gh = target_bbox[bs, tg, 3]
                # Get grid box indices
                gi = int(gx)
                gj = int(gy)
                gi = 1 if gi == 0 else gi
                gj = 1 if gj == 0 else gj
                # acquire the best anchor by computing ious
                anchor_ious = jaccard(gwh_shape[bs, tg].unsqueeze(0), anchor_shape).squeeze()
                # if overlap more than threshold, set no object mask zero.
                noobj_mask[bs, anchor_ious > ignore_threshold, gi, gj] = 0
                # Find the best matching anchor box
                best_n = np.argmax(anchor_ious)
                # mask
                obj_mask[bs, best_n, gj, gi] = 1
                noobj_mask[bs, best_n, gj, gi] = 0
                # coordinate
                tx[bs, best_n, gj, gi] = gx - gi
                ty[bs, best_n, gj, gi] = gy - gj
                tw[bs, best_n, gj, gi] = torch.log(gw / anchors[best_n][0] + 1e-16)
                th[bs, best_n, gj, gi] = torch.log(gh / anchors[best_n][1] + 1e-16)
                # object
                tconf[bs, best_n, gj, gi] = 1
                # one-hot encoding of label
                tcls[bs, best_n, gj, gi, int(target_bbox[bs, tg, -1])] = 1
        return obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls
예제 #8
0
def compare(bbox, other):
    means = []
    for k, (p1, p2) in enumerate(zip(bbox, other)):
        exp_p1 = torch.zeros(p1.size(0), 4)
        exp_p2 = torch.zeros(p2.size(0), 4)
        exp_p1[:, 2:] = p1
        exp_p2[:, 2:] = p2
        overlaps = jaccard(
            point_form(exp_p1),
            point_form(exp_p2)
        )  # size [num_p1, num_p2]
        best_overlap, _ = overlaps.max(1)
        means += [best_overlap.mean().item()]
        print("Layer %d avg overlap = %.4f" % (k, means[-1]))
    print("Mean Avg Overlap = %.4f" % (sum(means) / len(means)))
    pass
예제 #9
0
def mk_iou_tensor(anchors: torch.Tensor,
                  gts: torch.Tensor,
                  interval=512,
                  ret_idx=False):
    intervals = [i for i in range(0, gts.size(0), interval)] + [gts.size(0)]
    ret = torch.zeros(gts.size(0))
    idx = None
    if ret_idx:
        idx = torch.zeros(gts.size(0), dtype=torch.long)
    for start, end in zip(intervals[:-1], intervals[1:]):
        truths = gts[start:end].cuda()
        overlaps = jaccard(truths, anchors)
        assert torch.isnan(overlaps).sum().item() == 0
        best_prior_overlap, _ = overlaps.max(1, keepdim=False)
        ret[start:end] = best_prior_overlap
        if ret_idx:
            idx[start:end] = _
        del overlaps, best_prior_overlap, _
    if ret_idx:
        return ret, idx
    return ret
예제 #10
0
def get_conf_gt(detection, h, w, annopath, classes=HELMET_CLASSES, cls_to_ind=None):
    num_classes = len(HELMET_CLASSES)
    dets = decode_raw_detection(detection, h, w)
    assert num_classes == len(dets)
    if cls_to_ind is None:
        cls_to_ind = dict(zip(classes, range(len(classes))))
    rec = parse_rec(annopath)
    bbgt = [torch.tensor([]) for _ in range(num_classes)]
    for cls_idx in range(num_classes):
        bbgt[cls_idx] = torch.tensor([x['bbox'] for x in rec if cls_to_ind[x['name']] == cls_idx], dtype=torch.float)
    bbdet = [dets[i][:, 1:] if dets[i].size(0) > 0 else torch.tensor([]) for i in range(len(dets))]

    cls_ious = [torch.tensor([]) for _ in range(num_classes)]
    for cls_idx in range(num_classes):
        # K * 4
        bb = bbdet[cls_idx]
        # N * 4
        gt = bbgt[cls_idx]
        if gt.size(0) == 0 or bb.size(0) == 0:
            continue
        iou = jaccard(gt, bb).t()
        cls_ious[cls_idx] = iou
    max_ious = [x.max(1)[0] if x.size(0) > 0 else None for x in cls_ious]
    return cls_ious, max_ious
예제 #11
0
 def forward(self, locs: torch.Tensor, params: torch.Tensor, truths, variance=(0.1, 0.2)):
     sigmoid_alphas = params[:, -1].sigmoid()  # size [num_priors]
     priors = torch.cat([locs, params[:, :2]], dim=1)  # size [num_priors, 4]
     with torch.no_grad():
         overlaps = jaccard(
             truths,
             point_form(priors)
         )  # size [num_truths, num_priors]
     # [num_priors] best ground truth for each prior
     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=False)
     # [1,num_objects] best prior for each ground truth
     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=False)
     # replace original best truth indexes whose prior boxes are the best priors of given truths
     best_truth_overlap[best_prior_idx] = best_prior_overlap
     best_truth_idx[best_prior_idx] = torch.tensor(range(best_prior_idx.size(0)), dtype=torch.long)
     # create filter
     x_filter = torch.zeros(best_truth_overlap.size())
     x_filter[best_truth_overlap > self.thresh] = 1.
     x_filter[best_prior_idx] = self.k
     # filtering
     msk = x_filter > 1e-7
     x_filter = x_filter[msk]
     best_truth_overlap = best_truth_overlap[msk]
     # log info
     aaa = (best_truth_overlap < 1e-7).sum().item()
     print('%d best truths after filtering' % (x_filter > 1e-4).sum().item())
     print('%d best priors, of which %d priors fail to meet iou threshold'
           % (best_prior_idx.size(0), (best_prior_overlap <= self.thresh).sum().item()))
     ret = ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum()
            + self.beta * sigmoid_alphas.sum()) / x_filter.sum()
     print("loss fn: (%.2f(1st term) + %.2f(2nd term)) / %.2f(3rd term) = %.2f"
           % ((sigmoid_alphas[msk] * x_filter * best_truth_overlap).sum().item(),
              self.beta * sigmoid_alphas.sum().item(),
              x_filter.sum().item(), ret.item()))
     # return loss value
     return ret
예제 #12
0
    def forward(self, output, labels):
        """
        pred is result of after used nms, like:
        ([pred_num, 6],
            ......
         [pred_num, 6]). 6 shape like [box + socre + cls]
        targets is real box, like:
        ([box_nums, 5],
            ......
         [box_nums, 5]). 5 shape like [box + cls]
        """

        num = len(output)
        stats = []
        for id in range(num):
            targets = labels[id]
            preds = output[id, output[id, :, 4].gt(0)]
            num_gt = len(targets)  # number of target
            tcls = targets[:, 4].tolist() if num_gt else []  # target class

            # predict is none
            if preds is None:
                if num_gt:
                    stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Assign all predictions as incorrect
            correct = [0] * len(preds)
            if num_gt:
                detected = []
                tcls_tensor = targets[:, 4]

                # target boxes
                tboxes = targets[:, :4]
                tboxes[:, [0, 2]] *= self.width
                tboxes[:, [1, 3]] *= self.height
                preds[:, [0, 2]] *= self.width
                preds[:, [1, 3]] *= self.height

                for ii, pred in enumerate(preds):
                    pbox = pred[:4].unsqueeze(0)
                    pcls = pred[5]

                    # Break if all targets already located in image
                    if len(detected) == num_gt:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in tcls:
                        continue

                    # Best iou, index between pred and targets
                    m = (pcls == tcls_tensor).nonzero().view(-1)
                    iou, bi = jaccard(pbox, tboxes[m]).max(1)

                    # If iou > threshold and class is correct mark as correct
                    if iou > self.iou_thresh and m[bi] not in detected:
                        correct[ii] = 1
                        detected.append(m[bi])

            # (correct, pconf, pcls, tcls)
            stats.append((correct, preds[:, 4].cpu(), preds[:, 5].cpu(), tcls))

        return stats
예제 #13
0
파일: eval.py 프로젝트: w55100/YOLACT
def _bbox_iou(bbox1, bbox2, iscrowd=False):
    ret = jaccard(bbox1, bbox2, iscrowd)
    return ret.cpu()
예제 #14
0
def fast_nms(box_thre, coef_thre, class_thre, second_threshold: bool = False):
    class_thre, idx = class_thre.sort(
        1, descending=True)  # [80, 64 (the number of kept boxes)]

    idx = idx[:, :cfg.top_k].contiguous()
    class_thre = class_thre[:, :cfg.top_k]

    num_classes, num_dets = idx.size()

    box_thre = box_thre[idx.view(-1), :].view(num_classes, num_dets,
                                              4)  # [80, 64, 4]
    coef_thre = coef_thre[idx.view(-1), :].view(num_classes, num_dets,
                                                -1)  # [80, 64, 32]

    iou = jaccard(box_thre, box_thre)
    iou.triu_(diagonal=1)
    iou_max, _ = iou.max(dim=1)

    # Now just filter out the ones higher than the threshold
    keep = (iou_max <= cfg.nms_thre)

    # We should also only keep detections over the confidence threshold, but at the cost of
    # maxing out your detection count for every image, you can just not do that. Because we
    # have such a minimal amount of computation per detection (matrix mulitplication only),
    # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out.
    # However, when you implement this in your method, you should do this second threshold.
    if second_threshold:
        keep *= (class_thre > cfg.conf_thresh)

    # Assign each kept detection to its corresponding class
    class_ids = torch.arange(num_classes,
                             device=box_thre.device)[:, None].expand_as(keep)

    class_ids = class_ids[keep]

    box_nms = box_thre[keep]
    coef_nms = coef_thre[keep]
    class_nms = class_thre[keep]

    # Only keep the top cfg.max_num_detections highest scores across all classes
    class_nms, idx = class_nms.sort(0, descending=True)

    idx = idx[:cfg.max_detections]
    class_nms = class_nms[:cfg.max_detections]

    class_ids = class_ids[idx]
    box_nms = box_nms[idx]
    coef_nms = coef_nms[idx]
    '''
	Test code, a little mAP dropped.
	If one box predicts more than one class, only keep the highest score duplicate.
	'''
    # box_list = np.array(box_nms.cpu()).tolist()
    # class_nms_list = np.array(class_nms.cpu()).tolist()
    #
    # repeat = []
    # ss = list(np.arange(len(box_list)))
    #
    # for aa in box_list:
    #     if (box_list.count(aa) > 1) and (aa not in repeat):
    #         repeat.append(aa)
    #
    # for aa in repeat:
    #     id1 = [j for j, bb in enumerate(box_list) if bb == aa]
    #     temp = [class_nms_list[aa] for aa in id1]
    #     temp = np.array(temp).argmax()
    #     id1.remove(id1[temp])
    #
    #     for jj in id1:
    #         ss.remove(jj)
    #
    # box_nms = box_nms[ss]
    # coef_nms = coef_nms[ss]
    # classes = classes[ss]
    # class_nms = class_nms[ss]

    return box_nms, coef_nms, class_ids, class_nms
예제 #15
0
# anchor = priors.numpy()

# min_xy = torch.max(gt[:,None,:2], priors[:,:2])
# max_xy = torch.min(gt[:,None,2:], priors[:,2:])
# inter = torch.clamp(max_xy - min-xy, min=0, max=1)

from utils.box_utils import jaccard
# iou2 = jaccard(gt.double(), priors.double())
# iou2_2_max = torch.max(jaccard(gt.double(), priors.double(),), 1)[0]

iou_static = {}
iou_param = [(1, 1), (1.25, .8), (1.5, .65), (2, .5)]
param_name_list = []
for alpha, beta in iou_param:
    param_name_list.append("{}_{}".format(alpha, beta))
    iou_static[iou_param] = torch.max(jaccard(gt.double(), priors.double(),
                                              alpha, beta)[0],
                                      dim=1)[0]

iou_static['gt_area'] = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])
iou_df = pd.DataFrame.from_dict(iou_static)

bin = np.concatenate(
    (np.arange(0, 0.1 - 1e-5,
               0.01), np.arange(0.1, 0.5 - 1e-5,
                                0.05), np.arange(0.5, 1 + 1e-5, 0.1)))
bin = [0, .02, .06, .2, .4, 1]
labels = [np.mean(bin[i:i + 2]).round(3) for i in range(len(bin) - 1)]
iou_df['size_index'] = pd.cut(iou_df['gt_area'], bin, labels=labels)
iou_df['size_range'] = pd.cut(iou_df['gt_area'], bin)
iou_des = {}
for ii in iou_df.columns[:5]:
예제 #16
0
def eval_net(net, cuda, dataset, transform, top_k):
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(dataset)
    ovthresh = 0.5
    confidence_threshold = 0.01
    num_classes = 0

    # per class

    fp = defaultdict(list)
    tp = defaultdict(list)
    gts = defaultdict(list)
    precision = Counter()
    recall = Counter()
    ap = Counter()

    for i in range(num_images//100):
        if i % 10 == 0:
            print('Evaluating image {:d}/{:d}....'.format(i + 1, num_images))
        t1 = time.time()
        img = dataset.pull_image(i)
        img_id, anno = dataset.pull_anno(i)
        anno = torch.Tensor(anno).long()
        x = cv2.resize(np.array(img), (300, 300)).astype(np.float32)
        x -= (104, 117, 123)
        x = x.transpose(2, 0, 1)
        x = Variable(torch.from_numpy(x).unsqueeze(0), volatile=True)
        if cuda:
            x = x.cuda()
        y = net(x)  # forward pass
        detections = y.data
        # scale each detection back up to the image
        scale = torch.Tensor([img.size[0], img.size[1],
                              img.size[0], img.size[1]])
        # for each class
        if num_classes == 0:
            num_classes = detections.size(1)
        for cl in range(1, detections.size(1)):
            dets = detections[0, cl, :]
            mask = dets[:, 0].ge(confidence_threshold).expand(
                5, dets.size(0)).t()
            # all dets w > 0.01 conf for class
            dets = torch.masked_select(dets, mask).view(-1, 5)
            mask = anno[:, 4].eq(cl-1).expand(5, anno.size(0)).t()
            # all gts for class
            truths = torch.masked_select(anno, mask).view(-1, 5)
            if truths.numel() > 0:
                # there exist gt of this class in the image
                # check for tp & fp
                truths = truths[:, :-1]
                if dets.numel() < 1:
                    fp[cl].extend([0] * truths.size(0))
                    tp[cl].extend([0] * truths.size(0))
                    gts[cl].extend([1] * truths.size(0))
                    # gts[cl][-1] += truths.size(0)
                    continue
                preds = dets[:, 1:]
                # compute overlaps
                overlaps = jaccard(truths.float() /
                                   scale.unsqueeze(0).expand_as(truths), preds)
                # found = if each gt obj is found yet
                found = [False] * overlaps.size(0)
                maxes, max_ids = overlaps.max(0)
                maxes.squeeze_(0), max_ids.squeeze_(0)
                for pb in range(overlaps.size(1)):
                    max_overlap = maxes[pb]
                    gt = max_ids[pb]
                    if max_overlap > ovthresh:  # 0.5
                        if found[gt]:
                            # duplicate
                            fp[cl].append(1)
                            tp[cl].append(0)
                            gts[cl].append(0)  # tp
                        else:
                            # not yet found
                            tp[cl].append(1)
                            fp[cl].append(0)
                            found[gt] = True  # mark gt as found
                            gts[cl].append(1)  # tp
                    else:
                        fp[cl].append(1)
                        tp[cl].append(0)
                        gts[cl].append(0)  # tp
            else:
                # there are no gts of this class in the image
                # all dets > 0.01 are fp
                if dets.numel() > 0:
                    fp[cl].extend([1] * dets.size(0))
                    tp[cl].extend([0] * dets.size(0))
                    gts[cl].extend([0] * dets.size(0))
        if i % 10 == 0:
            print('Timer: %.4f' % (time.time()-t1))
    for cl in range(1, num_classes):
        if len(gts[cl]) < 1:
            continue
        # for each class calc rec, prec, ap
        tp_cumsum = torch.cumsum(torch.Tensor(tp[cl]), 0)
        fp_cumsum = torch.cumsum(torch.Tensor(fp[cl]), 0)
        gt_cumsum = torch.cumsum(torch.Tensor(gts[cl]), 0)
        rec_cumsum = tp_cumsum.float() / gt_cumsum[-1]
        prec_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum).clamp(min=1e-6)
        ap[cl] = voc_ap(rec_cumsum, prec_cumsum)
        recall[cl] = rec_cumsum[-1]
        precision[cl] = prec_cumsum[-1]
        print('class %d rec %.4f prec %.4f AP %.4f tp %.4f fp %.4f, \
        gt %.4f' % (cl, recall[cl], precision[cl], ap[cl], sum(tp[cl]),
              sum(fp[cl]), sum(gts[cl])))
    # mAP = mean of APs for all classes
    mAP = sum(ap.values()) / len(ap)
    print('mAP', mAP)
    return mAP