def overlay_label(image, label, color_map=default_map, alpha=0.4): assert (image.dim() == 3 and image.size(2) == 3) if (label.dim() == 2): label = label.view(*label.size(), 1) assert (label.dim() == 3 and label.size(2) == 1) dim = (image.size(1), image.size(0)) label = cv.resize(label, dim, interpolation=cv.inter.nearest) label_color = colorize(label, color_map).float() mask = torch.FloatTensor(image.size()).fill_(alpha) if (label_color.size(2) == 4): mask = alpha * (label_color.narrow(2, 3, 1) / 255) label_color = label_color.narrow(2, 0, 3) return (image.float() * (1 - mask) + label_color * mask).type_as(image)
def evaluate_video(frames, evaluate, size, args, classes, fps=20, scale=1): detection_frames = [] start = time() last = start output_size = (int(size[0] // 2), int(size[1] // 2)) nms_params = detection_table.nms_defaults._extend(threshold=args.threshold) out = None if args.output: fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(args.output, fourcc, fps, size) for i, frame in enumerate(frames()): if i > args.start: if args.scale is not None: frame = cv.resize(frame, size) detections = evaluate(frame, nms_params=nms_params) if args.log: detection_frames.append(export_detections(detections)) if args.show or args.output: for prediction in detections._sequence(): label_class = classes[prediction.label] display.draw_box(frame, prediction.bbox, confidence=prediction.confidence, name=label_class.name, color=(int( (1.0 - prediction.confidence) * 255), int(255 * prediction.confidence), 0)) if args.show: frame = cv.resize(frame, output_size) cv.imshow(frame) if args.output: frame = cv.resize(frame, output_size) out.write(frame.numpy()) if args.end is not None and i >= args.end: break if i % 50 == 49: torch.cuda.current_stream().synchronize() now = time() elapsed = now - last print( "frame: {} 50 frames in {:.1f} seconds, at {:.2f} fps".format( i, elapsed, 50. / elapsed)) last = now if out: out.release() if args.log: with open(args.log, "w") as f: text = json.dumps( info._extend(filename=args.input, frames=detection_frames)._to_dicts()) f.write(text)
classes = model_args.dataset.classes model.to(device) encoder.to(device) frame = cv.imread_color(args.input) nms_params = detection_table.nms_defaults._extend(nms=args.threshold) pprint_struct(nms_params) detections = evaluate_image(model, frame, encoder, nms_params=nms_params, device=device, crop_boxes=True) for prediction in detections._sequence(): if prediction.confidence > 0.7: label_class = classes[prediction.label].name display.draw_box(frame, prediction.bbox, confidence=prediction.confidence, name=label_class.name, color=display.to_rgb(label_class.colour)) frame = cv.resize(frame, (frame.size(1) // 2, frame.size(0) // 2)) cv.display(frame)
def show_batch(t, cols=int(6), scale=1): tiled = tile_batch(t, cols) tiled = cv.resize (tiled, (tiled.size(0) * scale, tiled.size(1) * scale), interpolation = cv.INTER_NEAREST) return cv.display(tiled)
nms_params = detection_table.nms_defaults._extend(threshold=args.threshold) images = [] def print_timer(desc, frames, start): elapsed = time() - start print("{}: {} frames in {:.1f} seconds, at {:.2f} fps".format( desc, frames, elapsed, frames / elapsed)) start = time() for i, frame in enumerate(frames()): if scale != 1: frame = cv.resize(frame, size) images.append(frame) if len(images) >= args.frames: break fps = len(images) / (time() - start) print_timer("load", len(images), start) if args.tensorrt: print("compiling with tensorrt...") from torch2trt import torch2trt x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device) model = torch2trt(model, [x], fp16_mode=True) print("done")
def resize_to(image, dest_size): return cv.resize(image, dest_size, interpolation = cv.inter.area)