Exemplo n.º 1
0
    def get_yolov4_target(self, pred, img_metas, batch_size, gt_bbox, gt_class,
                          gt_score):
        device = pred[0].device
        h, w = img_metas[0]['img_shape'][:2]
        tcls, tbox, indices, ignore_mask, anch = [], [], [], [], []
        ft = torch.cuda.FloatTensor if pred[0].is_cuda else torch.Tensor
        lt = torch.cuda.LongTensor if pred[0].is_cuda else torch.Tensor

        for index, (mask, downsample_ratio) in enumerate(
                zip(self.anchor_masks, self.downsample_ratios)):
            b, a, gj, gi, gxywh = lt([]).to(device), lt([]).to(device), lt(
                []).to(device), lt([]).to(device), ft([]).to(device)
            cls = lt([]).to(device)
            anchors = np.array(
                self.anchors,
                dtype=np.float32)[mask] / downsample_ratio  # Scale
            batch_ignore_mask = torch.ones(
                (batch_size, len(mask), int(h / downsample_ratio),
                 int(w / downsample_ratio), 1)).to(device)  # large object
            for bs in range(batch_size):
                xywh = xyxy2xywh(gt_bbox[bs]) if isinstance(
                    gt_bbox[bs], torch.Tensor) else xyxy2xywh(
                        torch.from_numpy(gt_bbox[bs]).to(device))
                if len(xywh) == 0:
                    continue

                grid_h, grid_w = int(h / downsample_ratio), int(
                    w / downsample_ratio)

                all_anchors_grid = np.array(
                    self.anchors, dtype=np.float32) / downsample_ratio  # Scale

                ref_anchors = np.zeros((len(all_anchors_grid), 4),
                                       dtype=np.float32)
                ref_anchors[:, 2:] = np.array(all_anchors_grid,
                                              dtype=np.float32)
                ref_anchors = torch.from_numpy(
                    ref_anchors)  # [0,0,anchor_w,anchor_h]

                gt = xywh * torch.tensor(
                    ([grid_w, grid_h, grid_w, grid_h
                      ])).to(device).float()  # x,y ,w, h,Scale
                score, _cls = gt_score[bs], gt_class[bs]

                cx_grid = gt[:, 0].floor().cpu().numpy()  # grid_x grid_y
                cy_grid = gt[:, 1].floor().cpu().numpy()  # grid_y
                n = len(gt)
                truth_box = torch.zeros(n, 4)
                truth_box[:n, 2:4] = gt[:n, 2:4]
                anchor_ious = box_iou(truth_box, ref_anchors)

                best_n_all = anchor_ious.argmax(dim=1)  # 返回按行比较最大值的位置
                best_n = best_n_all % 3
                best_n_mask = (
                    (best_n_all == mask[0]) | (best_n_all == mask[1]) |
                    (best_n_all == mask[2]))  # 查看是否和当前尺度有最大值得IOU交集,如果有为1,否则为0

                if sum(best_n_mask) == 0:  # 如果和当前尺度不是最大IOU交集,返回
                    continue

                truth_box[:n, 0:2] = gt[:n,
                                        0:2]  # cx 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量
                # truth_box[:n, 1] = gt[:n, 1]  # cy 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量

                single_ignore_mask = np.zeros((len(mask), grid_h, grid_w, 1),
                                              dtype=np.float32)

                pred_ious = box_iou(pred[index][bs, ..., :4].reshape(-1, 4),
                                    truth_box.reshape(-1, 4).to(device),
                                    xyxy=False)  # truth框和基本锚框的IOU,含位置信息
                pred_best_iou, _ = pred_ious.max(dim=1)  # [最大值,索引]
                pred_best_iou = (pred_best_iou > self.ignore_thre
                                 )  # 过滤掉小于阈值的数据,大于阈值1,小于0
                pred_best_iou = pred_best_iou.view(
                    single_ignore_mask.shape)  # 映射到具体位置,是否有目标,1代表有目标物,0代表没有目标物
                # set mask to zero (ignore) if pred matches truth
                single_ignore_mask = ~pred_best_iou  # 取反,为未包含目标的框位置,1代表没有目标物,0代表有目标物

                # torch.ones(len(truth_box))[best_n_mask].to(device)
                b = torch.cat((
                    b,
                    torch.ones(len(truth_box))[best_n_mask].long().to(device) *
                    bs))
                a = torch.cat((a, best_n[best_n_mask].to(device).long()))
                gi = torch.cat(
                    (gi,
                     torch.from_numpy(cx_grid)[best_n_mask].to(device).long()))
                gj = torch.cat(
                    (gj,
                     torch.from_numpy(cy_grid)[best_n_mask].to(device).long()))
                gxywh = torch.cat((gxywh, truth_box[best_n_mask].to(device)))
                cls = torch.cat(
                    (cls,
                     torch.from_numpy(_cls)[best_n_mask].to(device).long()))
                single_ignore_mask[a, gj, gi] = 0
                # ignore_mask[gj, gi, a] = 0
                batch_ignore_mask[bs, :] = single_ignore_mask

            indices.append((b, a, gj, gi))
            gxywh[..., :2] = gxywh[..., :2] - gxywh[..., :2].long()
            tbox.append(gxywh)
            tcls.append(cls)
            anch.append(anchors[a.cpu().numpy()])  # anchors
            ignore_mask.append(batch_ignore_mask)

        return indices, tbox, tcls, anch, ignore_mask
Exemplo n.º 2
0
    return img

file = '/disk2/project/pytorch-YOLOv4/cfg/dataset_test.py'

cfg = Config.fromfile(file)

dataset = build_from_dict(cfg.data.train,DATASET)

dataloader = build_dataloader(dataset,data=cfg.data)

for i, data_batch in enumerate(dataloader):
    if i>30:
        break
    for idx,data in enumerate(data_batch['img']):
        gt = data_batch['gt_bboxes'][idx]
        gt_xywh = xyxy2xywh(gt)  # x,y ,w, h
        n_gt = (gt.sum(dim=-1) > 0).sum(dim=-1)
        n = int(n_gt)
        if n == 0:
            continue
        gt = gt[:n].cpu().numpy()
        gt_xywh = gt_xywh[:n].cpu().numpy()
        data = data.cpu().numpy()*255
        data = data.transpose(1, 2, 0)
        h,w = data.shape[:2]
        a = draw_box(data.copy(), gt_xywh,(h,w))
        cv2.imwrite(os.path.join('/disk2/project/test/v2.0/yolov5/dataset/123/test', str(i)+'+'+str(idx) + '.jpg'), a)
# for i in range(30):
#     result = dataset.__getitem__(i)
#     img = result['img']
#     gt_bboxes = result['gt_bboxes']
Exemplo n.º 3
0
    def get_yolov5_target(self, pred, img_metas, batch_size, gt_bbox, gt_class,
                          gt_score):
        device = pred[0].device
        gain = torch.ones(6, device=device)  # normalized to gridspace gain
        #ft = torch.cuda.FloatTensor if pred[0].is_cuda else torch.Tensor
        targets = torch.tensor([], dtype=torch.float32).to(
            device)  #ft([]).to(device)
        for i, gtb in enumerate(gt_bbox):
            gtc = torch.from_numpy(gt_class[i]).to(device)
            img_idx = torch.ones(len(gtb), 1, device=device) * i
            targets = torch.cat(
                (targets,
                 torch.cat((img_idx, gtc, torch.from_numpy(gtb).to(device)),
                           dim=-1)))
        na, nt = len(self.anchor_masks), len(targets)
        tcls, tbox, indices, anch, ignore_mask = [], [], [], [], []
        targets[..., 2:] = xyxy2xywh(targets[..., 2:])
        g = 0.5  # offset grid中心偏移
        off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]],
                           device=device).float(
                           )  # overlap offsets 按grid区域换算偏移区域, 附近的4个网格 上下左右
        at = torch.arange(na).view(na, 1).repeat(
            1, nt)  # anchor tensor, same as .repeat_interleave(nt)
        for idx, (mask, downsample_ratio) in enumerate(
                zip(self.anchor_masks, self.downsample_ratios)):
            anchors = np.array(
                self.anchors,
                dtype=np.float32)[mask] / downsample_ratio  # Scale
            # for i in range(len(self.anchor_masks)):
            #     anchors = self.anchors[i]
            gain[2:] = torch.tensor(pred[idx].shape)[[3, 2, 3, 2]]  # xyxy gain

            # Match targets to anchors
            a, t, offsets = [], targets * gain, 0
            if nt:
                r = t[None, :, 4:6] / torch.from_numpy(anchors[:, None]).to(
                    device)  # wh ratio
                j = torch.max(r, 1. / r).max(2)[0] < self.anchor_t  # compare
                # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
                a, t = at[j], t.repeat(
                    na, 1, 1)[j]  # filter t为过滤后所有匹配锚框缩放尺度小于4的真框 a 位置信息

                # overlaps
                gxy = t[:, 2:4]  # grid xy
                z = torch.zeros_like(gxy)
                # j,k 为小于0.5的偏移 ,l,m为大于0.5的偏移
                j, k = ((gxy % 1. < g) & (gxy > 1.)).T
                l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
                a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat(
                    (t, t[j], t[k], t[l], t[m]), 0
                )  # t 原始target, t[j] x<.5 偏移的target, t[k] y<.5 偏移的target, t[l] x>.5 偏移的target, t[m] y>.5 偏移的target
                offsets = torch.cat(
                    (z, z[j] + off[0], z[k] + off[1], z[l] + off[2],
                     z[m] + off[3]), 0
                ) * g  # z 原始target,x<0.5 +0.5 ,y<0.5 +0.5,x>.5 -0.5,y>.5 -0.5

            # Define
            b, c = t[:, :2].long().T  # image, class
            gxy = t[:, 2:4]  # grid xy
            gwh = t[:, 4:6]  # grid wh
            gij = (gxy - offsets).long()  # 获取所有的grid 位置 -0.5<offsets<0.5
            gi, gj = gij.T  # grid xy indices

            # Append
            indices.append((b, a, gj, gi))  # image, anchor, grid indices
            tbox.append(torch.cat((gxy - gij, gwh),
                                  1))  # box x,y 偏移范围在[-0.5,1.5]
            anch.append(anchors[a])  # anchors
            tcls.append(c)  # class
            ignore_mask.append([])

        return indices, tbox, tcls, anch, ignore_mask
Exemplo n.º 4
0
def single_gpu_test(model,
                    data_loader,
                    half=False,
                    conf_thres=0.001,
                    iou_thres=0.6,
                    merge=False,
                    save_json=False,
                    augment=False,
                    verbose=False,
                    coco_val_path=''):

    device = next(model.parameters()).device  # get model device
    # Half
    half = device.type != 'cpu' and half  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()

    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for [email protected]
    niou = iouv.numel()

    seen = 0
    nc = model.head.num_classes
    names = model.CLASSES if hasattr(
        model, 'CLASSES') else data_loader.dataset.CLASSES
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', '[email protected]:.95')
    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, batch in enumerate(tqdm(data_loader, desc=s)):

        img = batch['img'].to(device, non_blocking=True)
        batch['img'] = img.half() if half else img.float()  # uint8 to fp16/32
        nb, _, height, width = img.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        ft = torch.cuda.FloatTensor if half else torch.Tensor
        gt_bbox = batch['gt_bboxes']
        gt_class = batch['gt_class']
        img_metas = batch['img_metas']

        targets = ft([]).to(device)
        for i, gtb in enumerate(gt_bbox):
            gtc = torch.from_numpy(gt_class[i]).to(device)
            img_idx = torch.ones(len(gtb), 1, device=device) * i
            targets = torch.cat([
                targets,
                torch.cat((img_idx, gtc, torch.from_numpy(gtb).to(device)),
                          dim=-1)
            ])

            # Disable gradients
        with torch.no_grad():
            # Run model
            batch['eval'] = True
            if augment:
                batch['augment'] = True
            t = torch_utils.time_synchronized()
            inf_out, train_out = model(
                return_loss=False, **batch)  # inference and training outputs
            t0 += torch_utils.time_synchronized() - t

            # Run NMS
            t = torch_utils.time_synchronized()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres,
                                         merge=merge)
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1
            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # if save_txt:
            #     filename = img_metas[si]['filename']
            #     ori_shape = img_metas[si]['ori_shape']
            #     # img_shape = img_metas[si]['img_shape']
            #
            #     # gn = torch.tensor(ori_shape[:2])[[0, 1, 0, 1]]  # normalization gain whwh
            #     txt_path = str(out / Path(filename).stem)
            #     pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], ori_shape[:2])  # to original
            #     for *xyxy, conf, cls in pred:
            #         # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
            #         with open(txt_path + '.txt', 'a') as f:
            #             f.write(('%g ' * 5 + '\n') % (cls, *xyxy))  # label format

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))
            # if save:
            #     _pd = pred.cpu().numpy()
            #     for _p in _pd:
            #         left_top = (int(_p[0]), int(_p[1]))
            #         right_bottom = (int(_p[2]), int(_p[3]))
            #         cv2.rectangle(
            #             img, left_top, right_bottom, color=(0, 0, 255), thickness=2)
            #         label_text = str(_p[5])
            #         label_text += '|{:.02f}'.format(_p[4])
            #         cv2.putText(img, label_text, (int(_p[0]), int(_p[1]) - 2), cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                filename = img_metas[si]['filename']
                ori_shape = img_metas[si]['ori_shape']
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img[si].shape[1:], box,
                             ori_shape[:2])  # to original shape
                image_id = str(Path(filename).stem)
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id':
                        int(image_id) if image_id.isnumeric() else image_id,
                        'category_id':
                        coco91class[int(p[5])],
                        'bbox': [round(x, 3) for x in b],
                        'score':
                        round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                # tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                tbox = labels[:, 1:5] * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(
                        -1)  # prediction indices
                    pi = (cls == pred[:,
                                      5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
            # Plot images
            # if batch_i < 1:
            #     f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i)  # filename
            #     plot_images(img, targets, paths, str(f), names)  # ground truth
            #     f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i)
            #     plot_images(img, output_to_target(output, width, height), paths, str(f), names)  # predictions

        # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(
            1)  # [P, R, [email protected], [email protected]:0.95]
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%12.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
        height, width, data_loader.batch_size)  # tuple

    # Save JSON
    if save_json and len(jdict):
        filename = model.cfg.filename
        basename = os.path.basename(filename)
        bname = os.path.splitext(basename)[0]
        f = 'detections_val2017_%s_results.json' % bname  # filename
        print('\nCOCO mAP with pycocotools... saving %s...' % f)
        with open(f, 'w') as file:
            json.dump(jdict, file)

        print('\nCOCO mAP with pycocotools... saving %s finished' % f)
        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            imgIds = [int(Path(x).stem) for x in data_loader.dataset.imgs]
            cocoGt = COCO(
                glob.glob(coco_val_path + '/instances_val*.json')
                [0])  # initialize COCO ground truth api
            cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
            cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
            cocoEval.params.imgIds = imgIds  # image IDs to evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            map, map50 = cocoEval.stats[:
                                        2]  # update results ([email protected]:0.95, [email protected])
        except Exception as e:
            print('ERROR: pycocotools unable to run: %s' % e)

    # Return results
    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map50, map,
            *(loss.cpu() / len(data_loader)).tolist()), maps, t