max_iter = len(data_loader) - 1 timer.reset() main_gpu = dist.get_rank() == 0 num_gpu = dist.get_world_size() for i, (img_list_batch, box_list_batch) in enumerate(data_loader, start_iter): if main_gpu and i == start_iter + 1: timer.start() optim.update_lr(step=i) img_tensor_batch = torch.stack([aa.img for aa in img_list_batch], dim=0).cuda() for box_list in box_list_batch: box_list.to_cuda() with timer.counter('for+loss'): category_loss, box_loss, iou_loss = model(img_tensor_batch, box_list_batch) all_loss = torch.stack([category_loss, box_loss, iou_loss], dim=0) dist.reduce(all_loss, dst=0) if main_gpu: # get the mean loss across all GPUS l_c = all_loss[0].item() / num_gpu # seems when printing, need to call .item(), not sure l_b = all_loss[1].item() / num_gpu l_iou = all_loss[2].item() / num_gpu with timer.counter('backward'): losses = category_loss + box_loss + iou_loss optim.optimizer.zero_grad() losses.backward() with timer.counter('update'):
if cfg.image is not None: dataset = COCODetection(cfg, mode='detect') data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False, pin_memory=True, collate_fn=detect_onnx_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() img_h, img_w = img_origin.shape[0:2] with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = sess.run(None, {input_name: img}) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms_numpy(class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms_numpy(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy) aa = time.perf_counter() if i > 0:
def evaluate(net, cfg, step=None): dataset = COCODetection(cfg, mode='val') data_loader = data.DataLoader(dataset, 1, num_workers=4, shuffle=False, pin_memory=True, collate_fn=val_collate) ds = len(data_loader) progress_bar = ProgressBar(40, ds) timer.reset() ap_data = { 'box': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres], 'mask': [[APDataObject() for _ in cfg.class_names] for _ in iou_thres] } with torch.no_grad(): for i, (img, gt, gt_masks, img_h, img_w) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img, gt, gt_masks = img.cuda(), gt.cuda(), gt_masks.cuda() with timer.counter('forward'): class_p, box_p, coef_p, proto_p, anchors = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms( class_p, box_p, coef_p, proto_p, anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms( ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w) if ids_p is None: continue with timer.counter('metric'): ids_p = list(ids_p.cpu().numpy().astype(int)) class_p = list(class_p.cpu().numpy().astype(float)) if cfg.coco_api: boxes_p = boxes_p.cpu().numpy() masks_p = masks_p.cpu().numpy() for j in range(masks_p.shape[0]): if (boxes_p[j, 3] - boxes_p[j, 1]) * ( boxes_p[j, 2] - boxes_p[j, 0]) > 0: make_json.add_bbox(dataset.ids[i], ids_p[j], boxes_p[j, :], class_p[j]) make_json.add_mask(dataset.ids[i], ids_p[j], masks_p[j, :, :], class_p[j]) else: prep_metrics(ap_data, ids_p, class_p, boxes_p, masks_p, gt, gt_masks, img_h, img_w, iou_thres) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_me = timer.get_times( ['batch', 'data', 'forward', 'nms', 'after_nms', 'metric']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_metric: {t_me:.3f}', end='') if cfg.coco_api: make_json.dump() print( f'\nJson files dumped, saved in: \'results/\', start evaluating.' ) gt_annotations = COCO(cfg.val_ann) bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json') mask_dets = gt_annotations.loadRes(f'results/mask_detections.json') print('\nEvaluating BBoxes:') bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() print('\nEvaluating Masks:') bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() else: table, box_row, mask_row = calc_map(ap_data, iou_thres, len(cfg.class_names), step=step) print(table) return table, box_row, mask_row
def inference(model, cfg, during_training=False): model.eval() predictions, coco_results = {}, [] val_loader = make_data_loader(cfg, during_training=during_training) dataset = val_loader.dataset dl = len(val_loader) bar = ProgressBar(length=40, max_val=dl) timer.reset() with torch.no_grad(): for i, (img_list_batch, _) in enumerate(val_loader): if i == 1: timer.start() with timer.counter('forward'): img_tensor_batch = torch.stack( [aa.img for aa in img_list_batch], dim=0).cuda() c_pred, box_pred, iou_pred, anchors = model(img_tensor_batch) with timer.counter('post_process'): resized_size = [aa.resized_size for aa in img_list_batch] pred_batch = post_process(cfg, c_pred, box_pred, iou_pred, anchors, resized_size) with timer.counter('accumulate'): for pred in pred_batch: pred.to_cpu() for img_list, pred in zip(img_list_batch, pred_batch): if pred.box.shape[0] == 0: continue original_id = dataset.id_img_map[img_list.id] pred.resize(img_list.ori_size) pred.convert_mode("x1y1wh") boxes = pred.box.tolist() score = pred.score.tolist() label = pred.label.tolist() mapped_labels = [dataset.to_category_id[i] for i in label] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": score[k] } for k, box in enumerate(boxes)]) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) time_name = [ 'batch', 'data', 'forward', 'post_process', 'accumulate' ] t_t, t_d, t_f, t_pp, t_acc = timer.get_times(time_name) fps, t_fps = 1 / (t_d + t_f + t_pp), 1 / t_t bar_str = bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{dl}, fps: {fps:.2f} | total fps: {t_fps:.2f} | t_t: {t_t:.3f} | ' f't_d: {t_d:.3f} | t_f: {t_f:.3f} | t_pp: {t_pp:.3f} | t_acc: {t_acc:.3f}', end='') temp = aa print('\n\nTest ended, doing evaluation...') json_name = cfg.weight.split('/')[-1].split('.')[0] file_path = f'results/{json_name}.json' with open(file_path, "w") as f: json.dump(coco_results, f) coco_dt = dataset.coco.loadRes(file_path) if cfg.val_api == 'Improved COCO': from my_cocoeval.cocoeval import SelfEval bbox_eval = SelfEval(dataset.coco, coco_dt, all_points=True) else: from pycocotools.cocoeval import COCOeval bbox_eval = COCOeval(dataset.coco, coco_dt, iouType='bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() if not during_training: if cfg.val_api == 'Improved COCO': bbox_eval.draw_curve() else: compute_thre_per_class(bbox_eval)
def main(): parser = argparse.ArgumentParser(description='YOLACT Detection.') parser.add_argument('--weight', default='weights/best_30.5_res101_coco_392000.pth', type=str) parser.add_argument('--image', default=None, type=str, help='The folder of images for detecting.') parser.add_argument('--video', default=None, type=str, help='The path of the video to evaluate.') parser.add_argument('--img_size', type=int, default=544, help='The image size for validation.') parser.add_argument('--traditional_nms', default=False, action='store_true', help='Whether to use traditional nms.') parser.add_argument('--hide_mask', default=False, action='store_true', help='Hide masks in results.') parser.add_argument('--hide_bbox', default=False, action='store_true', help='Hide boxes in results.') parser.add_argument('--hide_score', default=False, action='store_true', help='Hide scores in results.') parser.add_argument('--cutout', default=False, action='store_true', help='Cut out each object and save.') parser.add_argument('--save_lincomb', default=False, action='store_true', help='Show the generating process of masks.') parser.add_argument('--no_crop', default=False, action='store_true', help='Do not crop the output masks with the predicted bounding box.') parser.add_argument('--real_time', default=False, action='store_true', help='Show the detection results real-timely.') parser.add_argument('--visual_thre', default=0.3, type=float, help='Detections with a score under this threshold will be removed.') args = parser.parse_args() prefix = re.findall(r'best_\d+\.\d+_', args.weight)[0] suffix = re.findall(r'_\d+\.pth', args.weight)[0] args.cfg = args.weight.split(prefix)[-1].split(suffix)[0] cfg = get_config(args, mode='detect') net = Yolact(cfg) net.load_weights(cfg.weight, cfg.cuda) net.eval() if cfg.cuda: cudnn.benchmark = True cudnn.fastest = True net = net.cuda() # detect images if cfg.image is not None: dataset = COCODetection(cfg, mode='detect') data_loader = data.DataLoader(dataset, 1, num_workers=2, shuffle=False, pin_memory=True, collate_fn=detect_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() if cfg.cuda: img = img.cuda() img_h, img_w = img_origin.shape[0:2] with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(img) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg, img_name=img_name) with timer.counter('save_img'): img_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, img_origin, cfg, img_name=img_name) cv2.imwrite(f'results/images/{img_name}', img_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rTesting: {bar_str} {i + 1}/{ds}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') print('\nFinished, saved in: results/images.') # detect videos elif cfg.video is not None: vid = cv2.VideoCapture(cfg.video) target_fps = round(vid.get(cv2.CAP_PROP_FPS)) frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT)) name = cfg.video.split('/')[-1] video_writer = cv2.VideoWriter(f'results/videos/{name}', cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height)) progress_bar = ProgressBar(40, num_frames) timer.reset() t_fps = 0 for i in range(num_frames): if i == 1: timer.start() frame_origin = vid.read()[1] img_h, img_w = frame_origin.shape[0:2] frame_trans = val_aug(frame_origin, cfg.img_size) frame_tensor = torch.tensor(frame_trans).float() if cfg.cuda: frame_tensor = frame_tensor.cuda() with torch.no_grad(), timer.counter('forward'): class_p, box_p, coef_p, proto_p = net(frame_tensor.unsqueeze(0)) with timer.counter('nms'): ids_p, class_p, box_p, coef_p, proto_p = nms(class_p, box_p, coef_p, proto_p, net.anchors, cfg) with timer.counter('after_nms'): ids_p, class_p, boxes_p, masks_p = after_nms(ids_p, class_p, box_p, coef_p, proto_p, img_h, img_w, cfg) with timer.counter('save_img'): frame_numpy = draw_img(ids_p, class_p, boxes_p, masks_p, frame_origin, cfg, fps=t_fps) if cfg.real_time: cv2.imshow('Detection', frame_numpy) cv2.waitKey(1) else: video_writer.write(frame_numpy) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) temp = aa if i > 0: t_t, t_d, t_f, t_nms, t_an, t_si = timer.get_times(['batch', 'data', 'forward', 'nms', 'after_nms', 'save_img']) fps, t_fps = 1 / (t_d + t_f + t_nms + t_an), 1 / t_t bar_str = progress_bar.get_bar(i + 1) print(f'\rDetecting: {bar_str} {i + 1}/{num_frames}, fps: {fps:.2f} | total fps: {t_fps:.2f} | ' f't_t: {t_t:.3f} | t_d: {t_d:.3f} | t_f: {t_f:.3f} | t_nms: {t_nms:.3f} | ' f't_after_nms: {t_an:.3f} | t_save_img: {t_si:.3f}', end='') if not cfg.real_time: print(f'\n\nFinished, saved in: results/videos/{name}') vid.release() video_writer.release()
test_dataset = Seg_dataset(cfg) model = DLASeg(cfg).cuda() model.load_state_dict(torch.load(cfg.trained_model), strict=True) model.eval() timer.reset() with torch.no_grad(): for i, (data_tuple, img_name) in enumerate(test_dataset): if i == 1: timer.start() # timer does not timing for the first image. img_name = img_name.replace('tif', 'png') image = data_tuple[0].unsqueeze(0).cuda().detach() with timer.counter('forward'): output = model(image) with timer.counter('save result'): pred = torch.max(output, 1)[1].squeeze(0).cpu().numpy() if cfg.colorful: pred = PALLETE[pred].astype('uint8') cv2.imwrite(f'results/{img_name}', pred) if cfg.overlay: pred = PALLETE[pred].astype('uint8') original_img = data_tuple[1].astype('uint8') fused = cv2.addWeighted(pred, 0.2, original_img, 0.8, gamma=0) cv2.imwrite(f'results/{img_name}', fused) else: pred *= int(255 / cfg.class_num)
num_workers=4, shuffle=False, pin_memory=False, collate_fn=detect_onnx_collate) ds = len(data_loader) assert ds > 0, 'No .jpg images found.' progress_bar = ProgressBar(40, ds) timer.reset() for i, (img, img_origin, img_name) in enumerate(data_loader): if i == 1: timer.start() with timer.counter( 'forward', trt_mode=True), engine.create_execution_context() as context: assert img.shape == (1, 3, cfg.img_size, cfg.img_size), 'Img shape error.' inputs[0].host = img # input dtype should be float32 # Transfer input data to the GPU. [ cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs ] # Run inference. context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) # Transfer predictions back from the GPU. [ cuda.memcpy_dtoh_async(out.host, out.device, stream)