def single_gpu_test(model, data_loader, half=False, conf_thres=0.001, iou_thres=0.6, merge=False, save_json=False, augment=False, verbose=False, coco_val_path=''): device = next(model.parameters()).device # get model device # Half half = device.type != 'cpu' and half # half precision only supported on CUDA if half: model.half() # Configure model.eval() iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected] niou = iouv.numel() seen = 0 nc = model.head.num_classes names = model.CLASSES if hasattr( model, 'CLASSES') else data_loader.dataset.CLASSES coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, batch in enumerate(tqdm(data_loader, desc=s)): img = batch['img'].to(device, non_blocking=True) batch['img'] = img.half() if half else img.float() # uint8 to fp16/32 nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) ft = torch.cuda.FloatTensor if half else torch.Tensor gt_bbox = batch['gt_bboxes'] gt_class = batch['gt_class'] img_metas = batch['img_metas'] targets = ft([]).to(device) for i, gtb in enumerate(gt_bbox): gtc = torch.from_numpy(gt_class[i]).to(device) img_idx = torch.ones(len(gtb), 1, device=device) * i targets = torch.cat([ targets, torch.cat((img_idx, gtc, torch.from_numpy(gtb).to(device)), dim=-1) ]) # Disable gradients with torch.no_grad(): # Run model batch['eval'] = True if augment: batch['augment'] = True t = torch_utils.time_synchronized() inf_out, train_out = model( return_loss=False, **batch) # inference and training outputs t0 += torch_utils.time_synchronized() - t # Run NMS t = torch_utils.time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += torch_utils.time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # if save_txt: # filename = img_metas[si]['filename'] # ori_shape = img_metas[si]['ori_shape'] # # img_shape = img_metas[si]['img_shape'] # # # gn = torch.tensor(ori_shape[:2])[[0, 1, 0, 1]] # normalization gain whwh # txt_path = str(out / Path(filename).stem) # pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], ori_shape[:2]) # to original # for *xyxy, conf, cls in pred: # # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * 5 + '\n') % (cls, *xyxy)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # if save: # _pd = pred.cpu().numpy() # for _p in _pd: # left_top = (int(_p[0]), int(_p[1])) # right_bottom = (int(_p[2]), int(_p[3])) # cv2.rectangle( # img, left_top, right_bottom, color=(0, 0, 255), thickness=2) # label_text = str(_p[5]) # label_text += '|{:.02f}'.format(_p[4]) # cv2.putText(img, label_text, (int(_p[0]), int(_p[1]) - 2), cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... filename = img_metas[si]['filename'] ori_shape = img_metas[si]['ori_shape'] box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, ori_shape[:2]) # to original shape image_id = str(Path(filename).stem) box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes # tbox = xywh2xyxy(labels[:, 1:5]) * whwh tbox = labels[:, 1:5] * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images # if batch_i < 1: # f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename # plot_images(img, targets, paths, str(f), names) # ground truth # f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) # plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + ( height, width, data_loader.batch_size) # tuple # Save JSON if save_json and len(jdict): filename = model.cfg.filename basename = os.path.basename(filename) bname = os.path.splitext(basename)[0] f = 'detections_val2017_%s_results.json' % bname # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) print('\nCOCO mAP with pycocotools... saving %s finished' % f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in data_loader.dataset.imgs] cocoGt = COCO( glob.glob(coco_val_path + '/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(data_loader)).tolist()), maps, t
def get_yolov4_target(self, pred, img_metas, batch_size, gt_bbox, gt_class, gt_score): device = pred[0].device h, w = img_metas[0]['img_shape'][:2] tcls, tbox, indices, ignore_mask, anch = [], [], [], [], [] ft = torch.cuda.FloatTensor if pred[0].is_cuda else torch.Tensor lt = torch.cuda.LongTensor if pred[0].is_cuda else torch.Tensor for index, (mask, downsample_ratio) in enumerate( zip(self.anchor_masks, self.downsample_ratios)): b, a, gj, gi, gxywh = lt([]).to(device), lt([]).to(device), lt( []).to(device), lt([]).to(device), ft([]).to(device) cls = lt([]).to(device) anchors = np.array( self.anchors, dtype=np.float32)[mask] / downsample_ratio # Scale batch_ignore_mask = torch.ones( (batch_size, len(mask), int(h / downsample_ratio), int(w / downsample_ratio), 1)).to(device) # large object for bs in range(batch_size): xywh = xyxy2xywh(gt_bbox[bs]) if isinstance( gt_bbox[bs], torch.Tensor) else xyxy2xywh( torch.from_numpy(gt_bbox[bs]).to(device)) if len(xywh) == 0: continue grid_h, grid_w = int(h / downsample_ratio), int( w / downsample_ratio) all_anchors_grid = np.array( self.anchors, dtype=np.float32) / downsample_ratio # Scale ref_anchors = np.zeros((len(all_anchors_grid), 4), dtype=np.float32) ref_anchors[:, 2:] = np.array(all_anchors_grid, dtype=np.float32) ref_anchors = torch.from_numpy( ref_anchors) # [0,0,anchor_w,anchor_h] gt = xywh * torch.tensor( ([grid_w, grid_h, grid_w, grid_h ])).to(device).float() # x,y ,w, h,Scale score, _cls = gt_score[bs], gt_class[bs] cx_grid = gt[:, 0].floor().cpu().numpy() # grid_x grid_y cy_grid = gt[:, 1].floor().cpu().numpy() # grid_y n = len(gt) truth_box = torch.zeros(n, 4) truth_box[:n, 2:4] = gt[:n, 2:4] anchor_ious = box_iou(truth_box, ref_anchors) best_n_all = anchor_ious.argmax(dim=1) # 返回按行比较最大值的位置 best_n = best_n_all % 3 best_n_mask = ( (best_n_all == mask[0]) | (best_n_all == mask[1]) | (best_n_all == mask[2])) # 查看是否和当前尺度有最大值得IOU交集,如果有为1,否则为0 if sum(best_n_mask) == 0: # 如果和当前尺度不是最大IOU交集,返回 continue truth_box[:n, 0:2] = gt[:n, 0:2] # cx 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量 # truth_box[:n, 1] = gt[:n, 1] # cy 包含位置和偏移量,整数位代表坐标位置,小数位代表偏移量 single_ignore_mask = np.zeros((len(mask), grid_h, grid_w, 1), dtype=np.float32) pred_ious = box_iou(pred[index][bs, ..., :4].reshape(-1, 4), truth_box.reshape(-1, 4).to(device), xyxy=False) # truth框和基本锚框的IOU,含位置信息 pred_best_iou, _ = pred_ious.max(dim=1) # [最大值,索引] pred_best_iou = (pred_best_iou > self.ignore_thre ) # 过滤掉小于阈值的数据,大于阈值1,小于0 pred_best_iou = pred_best_iou.view( single_ignore_mask.shape) # 映射到具体位置,是否有目标,1代表有目标物,0代表没有目标物 # set mask to zero (ignore) if pred matches truth single_ignore_mask = ~pred_best_iou # 取反,为未包含目标的框位置,1代表没有目标物,0代表有目标物 # torch.ones(len(truth_box))[best_n_mask].to(device) b = torch.cat(( b, torch.ones(len(truth_box))[best_n_mask].long().to(device) * bs)) a = torch.cat((a, best_n[best_n_mask].to(device).long())) gi = torch.cat( (gi, torch.from_numpy(cx_grid)[best_n_mask].to(device).long())) gj = torch.cat( (gj, torch.from_numpy(cy_grid)[best_n_mask].to(device).long())) gxywh = torch.cat((gxywh, truth_box[best_n_mask].to(device))) cls = torch.cat( (cls, torch.from_numpy(_cls)[best_n_mask].to(device).long())) single_ignore_mask[a, gj, gi] = 0 # ignore_mask[gj, gi, a] = 0 batch_ignore_mask[bs, :] = single_ignore_mask indices.append((b, a, gj, gi)) gxywh[..., :2] = gxywh[..., :2] - gxywh[..., :2].long() tbox.append(gxywh) tcls.append(cls) anch.append(anchors[a.cpu().numpy()]) # anchors ignore_mask.append(batch_ignore_mask) return indices, tbox, tcls, anch, ignore_mask
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ if prediction.dtype is torch.float16: prediction = prediction.float() # to FP32 nc = prediction[0].shape[1] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_det = 300 # maximum number of detections per image time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) t = time.time() output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero().t() x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output