def build_sat_tester(task_cfg): # build model tracker_model = model_builder.build("track", task_cfg.tracker_model) tracker = pipeline_builder.build("track", task_cfg.tracker_pipeline, model=tracker_model) segmenter = model_builder.build('vos', task_cfg.segmenter) # build pipeline pipeline = pipeline_builder.build('vos', task_cfg.pipeline, segmenter=segmenter, tracker=tracker) # build tester testers = tester_builder('vos', task_cfg.tester, "tester", pipeline) return testers
def export_siamfcpp_track_fea_trt(task_cfg, parsed_args): """ export phase "freeze_track_fea" (basemodel/c_x/r_x) to trt model """ model = model_builder.build("track", task_cfg.model) model.eval().cuda() model.phase = "freeze_track_fea" search_im = torch.randn(1, 3, 303, 303).cuda() fea = model(search_im) output_path = parsed_args.output + "_track_fea.trt" logger.info("start cvt pytorch model") model_trt = torch2trt(model, [search_im]) torch.save(model_trt.state_dict(), output_path) logger.info("save trt model to {}".format(output_path)) model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_path)) trt_outs = model_trt(search_im) np.testing.assert_allclose(to_numpy(fea[0]), to_numpy(trt_outs[0]), rtol=1e-03, atol=1e-05) np.testing.assert_allclose(to_numpy(fea[1]), to_numpy(trt_outs[1]), rtol=1e-03, atol=1e-05) logger.info("test accuracy ok")
def build_siamfcpp_tester(task_cfg): # build model model = model_builder.build("track", task_cfg.model) # build pipeline pipeline = pipeline_builder.build("track", task_cfg.pipeline, model) # build tester testers = tester_builder("track", task_cfg.tester, "tester", pipeline) return testers
def build_siamfcpp_tester(task_cfg): parsed_args = parser.parse_args() # build model model = model_builder.build("track", task_cfg.model) # build pipeline pipeline = pipeline_builder.build("track", task_cfg.pipeline, model) # build tester testers = tester_builder(parsed_args.video, "track", task_cfg.tester, "tester", pipeline) return testers
def __init__(self): super(SiamFCPP, self).__init__("SiamFC++") root_cfg.merge_from_file(path_config.SIAMFCPP_CONFIG) task = "track" task_cfg = root_cfg["test"][task] task_cfg.freeze() # build model model = model_builder.build(task, task_cfg.model) # build pipeline self.pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device("cuda") self.pipeline.set_device(dev)
def export_siamfcpp_fea_trt(task_cfg, parsed_args): """ export phase "feature" (basemodel/c_z_k/r_z_k) to trt model """ model = model_builder.build("track", task_cfg.model) model = model.eval().cuda() model.phase = "feature" x = torch.randn(1, 3, 127, 127).cuda() fea = model(x) output_path = parsed_args.output + "_fea.trt" logger.info("start cvt pytorch model") model_trt = torch2trt(model, [x]) logger.info("save trt model to {}".format(output_path)) torch.save(model_trt.state_dict(), output_path) model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_path)) trt_out = model_trt(x) np.testing.assert_allclose(to_numpy(fea[0]), to_numpy(trt_out[0]), rtol=1e-03, atol=1e-05) logger.info("test accuracy ok")
with open(cfg_bak_file, "w") as f: f.write(task_cfg.dump()) logger.info("Task configuration backed up at %s" % cfg_bak_file) # build dummy dataloader (for dataset initialization) with Timer(name="Dummy dataloader building", verbose=True): dataloader = dataloader_builder.build(task, task_cfg.data) del dataloader logger.info("Dummy dataloader destroyed.") # device config world_size = task_cfg.num_processes assert torch.cuda.is_available(), "please check your devices" assert torch.cuda.device_count( ) >= world_size, "cuda device {} is less than {}".format( torch.cuda.device_count(), world_size) # build tracker model tracker_model = model_builder.build("track", task_cfg.tracker_model) # build model segmenter = model_builder.build("vos", task_cfg.segmenter) # get dist url if parsed_args.auto_dist: port = _find_free_port() dist_url = "tcp://127.0.0.1:{}".format(port) else: dist_url = parsed_args.dist_url # prepare to spawn torch.multiprocessing.set_start_method('spawn', force=True) # spawn trainer process mp.spawn(run_dist_training, args=(world_size, task, task_cfg, parsed_args, segmenter, tracker_model, dist_url), nprocs=world_size,
with open(cfg_bak_file, "w") as f: f.write(task_cfg.dump()) logger.info("Task configuration backed up at %s" % cfg_bak_file) # build dummy dataloader (for dataset initialization) with Timer(name="Dummy dataloader building", verbose=True): dataloader = dataloader_builder.build(task, task_cfg.data) del dataloader logger.info("Dummy dataloader destroyed.") # device config world_size = task_cfg.num_processes assert torch.cuda.is_available(), "please check your devices" assert torch.cuda.device_count( ) >= world_size, "cuda device {} is less than {}".format( torch.cuda.device_count(), world_size) # build model model = model_builder.build(task, task_cfg.model) # get dist url if parsed_args.auto_dist: port = _find_free_port() dist_url = "tcp://127.0.0.1:{}".format(port) else: dist_url = parsed_args.dist_url # prepare to spawn torch.multiprocessing.set_start_method('spawn', force=True) # spawn trainer process mp.spawn(run_dist_training, args=(world_size, task, task_cfg, parsed_args, model, dist_url), nprocs=world_size, join=True) logger.info("Distributed training completed.")
def main(args): root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device(args.device) pipeline.to_device(dev) init_box = None template = None vw = None if args.video == "webcam": logger.info("[INFO] starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) else: vs = cv2.VideoCapture(args.video) if args.output: fourcc = cv2.VideoWriter_fourcc(*'MJPG') width, height = vs.get(3), vs.get(4) vw = cv2.VideoWriter(args.output, fourcc, 25, (int(width), int(height))) while vs.isOpened(): ret, frame = vs.read() if ret: if init_box is not None: time_a = time.time() rect_pred = pipeline.update(frame) show_frame = frame.copy() time_cost = time.time() - time_a bbox_pred = xywh2xyxy(rect_pred) bbox_pred = tuple(map(int, bbox_pred)) cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) cv2.rectangle(show_frame, bbox_pred[:2], bbox_pred[2:], (0, 255, 0)) if template is not None: show_frame[:128, :128] = template else: show_frame = frame cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) key = cv2.waitKey(30) & 0xFF if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) box = cv2.selectROI(window_name, frame, fromCenter=False, showCrosshair=True) if box[2] > 0 and box[3] > 0: init_box = box template = cv2.resize( frame[box[1]:box[1] + box[3], box[0]:box[0] + box[2]], (128, 128)) pipeline.init(frame, init_box) elif key == ord("c"): init_box = None template = None vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()
# experiment config exp_cfg_path = osp.realpath(parsed_args.config) # from IPython import embed;embed() root_cfg.merge_from_file(exp_cfg_path) logger.info("Load experiment configuration at: %s" % exp_cfg_path) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() if task == 'track': # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build('track', task_cfg.pipeline, model=model) # build tester testers = tester_builder(task, task_cfg.tester, "tester", pipeline) elif task == 'vos': # build model tracker = model_builder.build("track_vos", task_cfg.tracker) segmenter = model_builder.build('vos', task_cfg.segmenter) # build pipeline pipeline = pipeline_builder.build('vos', task_cfg.pipeline, segmenter=segmenter,
def main(args): root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device(args.device) pipeline.set_device(dev) init_box = None template = None if len(args.init_bbox) == 4: init_box = args.init_bbox video_name = "untitled" vw = None resize_ratio = args.resize dump_only = args.dump_only # create video stream # from webcam if args.video == "webcam": logger.info("Starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) formated_time_str = time.strftime(r"%Y%m%d-%H%M%S", time.localtime()) video_name = "webcam-{}".format(formated_time_str) # from image files elif not osp.isfile(args.video): logger.info("Starting from video frame image files...") vs = ImageFileVideoStream(args.video, init_counter=args.start_index) video_name = osp.basename(osp.dirname(args.video)) # from video file else: logger.info("Starting from video file...") vs = cv2.VideoCapture(args.video) video_name = osp.splitext(osp.basename(args.video))[0] # create video writer to output video if args.output: # save as image files if not str(args.output).endswith(r".mp4"): vw = ImageFileVideoWriter(osp.join(args.output, video_name)) # save as a single video file else: vw = VideoWriter(args.output, fps=20) # loop over sequence frame_idx = 0 # global frame index while vs.isOpened(): key = 255 ret, frame = vs.read() if ret: logger.debug("frame: {}".format(frame_idx)) if template is not None: time_a = time.time() rect_pred = pipeline.update(frame) logger.debug(rect_pred) show_frame = frame.copy() time_cost = time.time() - time_a bbox_pred = xywh2xyxy(rect_pred) bbox_pred = tuple(map(int, bbox_pred)) cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) cv2.rectangle(show_frame, bbox_pred[:2], bbox_pred[2:], (0, 255, 0)) if template is not None: show_frame[:128, :128] = template else: show_frame = frame show_frame = cv2.resize( show_frame, (int(show_frame.shape[1] * resize_ratio), int(show_frame.shape[0] * resize_ratio))) # resize if not dump_only: cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) else: break # catch key if if (init_box is None) or (vw is None): logger.debug("Press key s to select object.") if (frame_idx == 0): wait_time = 5000 else: wait_time = 30 key = cv2.waitKey(wait_time) & 0xFF logger.debug("key: {}".format(key)) if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) logger.debug("Select object to track") box = cv2.selectROI(window_name, frame, fromCenter=False, showCrosshair=True) if box[2] > 0 and box[3] > 0: init_box = box elif key == ord("c"): logger.debug( "init_box/template released, press key s again to select object." ) init_box = None template = None if (init_box is not None) and (template is None): template = cv2.resize( frame[int(init_box[1]):int(init_box[1] + init_box[3]), int(init_box[0]):int(init_box[0] + init_box[2])], (128, 128)) pipeline.init(frame, init_box) logger.debug("pipeline initialized with bbox : {}".format(init_box)) frame_idx += 1 vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()
def main(args): global polygon_points, lbt_flag, rbt_flag root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name cv2.namedWindow(window_name) cv2.setMouseCallback(window_name, draw_polygon) # build model tracker_model = model_builder.build("track", task_cfg.tracker_model) tracker = pipeline_builder.build("track", task_cfg.tracker_pipeline, model=tracker_model) segmenter = model_builder.build('vos', task_cfg.segmenter) # build pipeline pipeline = pipeline_builder.build('vos', task_cfg.pipeline, segmenter=segmenter, tracker=tracker) dev = torch.device(args.device) pipeline.set_device(dev) init_mask = None init_box = None template = None video_name = "untitled" vw = None resize_ratio = args.resize dump_only = args.dump_only # create video stream # from webcam if args.video == "webcam": logger.info("Starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) formated_time_str = time.strftime(r"%Y%m%d-%H%M%S", time.localtime()) video_name = "webcam-{}".format(formated_time_str) # from image files elif not osp.isfile(args.video): logger.info("Starting from video frame image files...") vs = ImageFileVideoStream(args.video, init_counter=args.start_index) video_name = osp.basename(osp.dirname(args.video)) # from video file else: logger.info("Starting from video file...") vs = cv2.VideoCapture(args.video) video_name = osp.splitext(osp.basename(args.video))[0] # create video writer to output video if args.output: # save as image files if not str(args.output).endswith(r".mp4"): vw = ImageFileVideoWriter(osp.join(args.output, video_name)) # save as a single video file else: vw = VideoWriter(args.output, fps=20) # loop over sequence frame_idx = 0 # global frame index while vs.isOpened(): key = 255 ret, frame = vs.read() if ret: if template is not None: time_a = time.time() score_map = pipeline.update(frame) mask = (score_map > 0.5).astype(np.uint8) * 2 color_mask = mask_colorize(mask, 10, color_map) color_mask = cv2.resize(color_mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST) show_frame = cv2.addWeighted(frame, 0.6, color_mask, 0.4, 0) time_cost = time.time() - time_a cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) if template is not None: show_frame[:128, :128] = template else: show_frame = frame show_frame = cv2.resize( show_frame, (int(show_frame.shape[1] * resize_ratio), int(show_frame.shape[0] * resize_ratio))) # resize if not dump_only: cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) else: break # catch key if if (init_mask is None) or (vw is None): if (frame_idx == 0): wait_time = 5000 else: wait_time = 30 key = cv2.waitKey(wait_time) & 0xFF if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) logger.debug( "Select points object to track, left click for new pt, right click to finish" ) polygon_points = [] while not rbt_flag: if lbt_flag: print(polygon_points[-1]) cv2.circle(show_frame, polygon_points[-1], 5, (0, 0, 255), 2) if len(polygon_points) > 1: cv2.line(show_frame, polygon_points[-2], polygon_points[-1], (255, 0, 0), 2) lbt_flag = False cv2.imshow(window_name, show_frame) key = cv2.waitKey(10) & 0xFF if len(polygon_points) > 2: np_pts = np.array(polygon_points) init_box = cv2.boundingRect(np_pts) zero_mask = np.zeros( (show_frame.shape[0], show_frame.shape[1]), dtype=np.uint8) init_mask = cv2.fillPoly(zero_mask, [np_pts], (1, )) rbt_flag = False elif key == ord("c"): logger.debug( "init_box/template released, press key s again to select object." ) init_mask = None init_box = None template = None if (init_mask is not None) and (template is None): template = cv2.resize( frame[int(init_box[1]):int(init_box[1] + init_box[3]), int(init_box[0]):int(init_box[0] + init_box[2])], (128, 128)) pipeline.init(frame, init_box, init_mask) logger.debug( "pipeline initialized with bbox : {}".format(init_box)) frame_idx += 1 vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()