def __getitem__(self, item): """ :param item: int, video id :return: image_files annos meta (optional) """ record = COCODataset.data_items[item] image_file = record["file_name"] img_h = record["height"] img_w = record["width"] anno = record['annotations'] if self._hyper_params["with_mask"]: mask_anno = [] for obj in anno: raw_mask = obj['segmentation'] mask = self._generate_mask_from_anno(raw_mask, img_h, img_w) mask_anno.append(mask) sequence_data = dict(image=[image_file], anno=mask_anno) else: box_anno = [] for obj in anno: box_anno.append(obj['bbox']) if len(box_anno) <= 0: box_anno = self._DUMMY_ANNO box_anno = xywh2xyxy(box_anno) sequence_data = dict(image=[image_file], anno=box_anno) return sequence_data
def __getitem__(self, item: int) -> Dict: img_files, anno = self._state["dataset"][item] anno = xywh2xyxy(anno) sequence_data = dict(image=img_files, anno=anno) return sequence_data
def __call__(self, sampled_data): data1 = sampled_data["data1"] data2 = sampled_data["data2"] im_temp, mask_temp = data1["image"], data1["anno"] bbox_temp = cv2.boundingRect(mask_temp) bbox_temp = xywh2xyxy(bbox_temp) im_curr, mask_curr = data2["image"], data2["anno"] bbox_curr = cv2.boundingRect(mask_curr) bbox_curr = xywh2xyxy(bbox_curr) data_dict = crop_track_pair_for_sat(im_temp, bbox_temp, im_curr, bbox_curr, config=self._hyper_params, mask_tmp=mask_temp, mask_curr=mask_curr) if sampled_data["is_negative_pair"]: data_dict["seg_mask"] *= 0 return data_dict
def __getitem__(self, item): """ :param item: int, video id :return: image_files annos meta (optional) """ # frame_name = subset = self._hyper_params["subset"] image_file, anno = COCODataset.data_dict[subset][item] if len(anno) <= 0: anno = self._DUMMY_ANNO anno = xywh2xyxy(anno) sequence_data = dict(image=[image_file], anno=anno) return sequence_data
color["target"], thickness=bbox_thickness) cv2.rectangle(im, (0, 0), (im.shape[1] - 1, im.shape[0] - 1), color["border"], thickness=10) im = cv2.resize(im, (0, 0), fx=resize_factor, fy=resize_factor) im = cv2.putText(im, "template frame", (20, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, color["target"], font_width) # cv2.imshow("im", im) im_search = search_frame['image'] bbox_gt = search_frame['anno'] rect_gt = xyxy2xywh(bbox_gt) rect_pred = pipeline.update(im_search) bbox_pred = xywh2xyxy(rect_pred) bbox_gt = tuple(map(int, bbox_gt)) bbox_pred = tuple(map(int, bbox_pred)) im_ = im_search cv2.rectangle(im_, bbox_gt[:2], bbox_gt[2:], color["target"], thickness=bbox_thickness) cv2.rectangle(im_, bbox_pred[:2], bbox_pred[2:], color["pred"], thickness=bbox_thickness)
def main(args): root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device(args.device) pipeline.to_device(dev) init_box = None template = None vw = None if args.video == "webcam": logger.info("[INFO] starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) else: vs = cv2.VideoCapture(args.video) if args.output: fourcc = cv2.VideoWriter_fourcc(*'MJPG') width, height = vs.get(3), vs.get(4) vw = cv2.VideoWriter(args.output, fourcc, 25, (int(width), int(height))) while vs.isOpened(): ret, frame = vs.read() if ret: if init_box is not None: time_a = time.time() rect_pred = pipeline.update(frame) show_frame = frame.copy() time_cost = time.time() - time_a bbox_pred = xywh2xyxy(rect_pred) bbox_pred = tuple(map(int, bbox_pred)) cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) cv2.rectangle(show_frame, bbox_pred[:2], bbox_pred[2:], (0, 255, 0)) if template is not None: show_frame[:128, :128] = template else: show_frame = frame cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) key = cv2.waitKey(30) & 0xFF if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) box = cv2.selectROI(window_name, frame, fromCenter=False, showCrosshair=True) if box[2] > 0 and box[3] > 0: init_box = box template = cv2.resize( frame[box[1]:box[1] + box[3], box[0]:box[0] + box[2]], (128, 128)) pipeline.init(frame, init_box) elif key == ord("c"): init_box = None template = None vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()
def main(args): root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device(args.device) pipeline.set_device(dev) init_box = None template = None if len(args.init_bbox) == 4: init_box = args.init_bbox video_name = "untitled" vw = None resize_ratio = args.resize dump_only = args.dump_only # create video stream # from webcam if args.video == "webcam": logger.info("Starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) formated_time_str = time.strftime(r"%Y%m%d-%H%M%S", time.localtime()) video_name = "webcam-{}".format(formated_time_str) # from image files elif not osp.isfile(args.video): logger.info("Starting from video frame image files...") vs = ImageFileVideoStream(args.video, init_counter=args.start_index) video_name = osp.basename(osp.dirname(args.video)) # from video file else: logger.info("Starting from video file...") vs = cv2.VideoCapture(args.video) video_name = osp.splitext(osp.basename(args.video))[0] # create video writer to output video if args.output: # save as image files if not str(args.output).endswith(r".mp4"): vw = ImageFileVideoWriter(osp.join(args.output, video_name)) # save as a single video file else: vw = VideoWriter(args.output, fps=20) # loop over sequence frame_idx = 0 # global frame index while vs.isOpened(): key = 255 ret, frame = vs.read() if ret: logger.debug("frame: {}".format(frame_idx)) if template is not None: time_a = time.time() rect_pred = pipeline.update(frame) logger.debug(rect_pred) show_frame = frame.copy() time_cost = time.time() - time_a bbox_pred = xywh2xyxy(rect_pred) bbox_pred = tuple(map(int, bbox_pred)) cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) cv2.rectangle(show_frame, bbox_pred[:2], bbox_pred[2:], (0, 255, 0)) if template is not None: show_frame[:128, :128] = template else: show_frame = frame show_frame = cv2.resize( show_frame, (int(show_frame.shape[1] * resize_ratio), int(show_frame.shape[0] * resize_ratio))) # resize if not dump_only: cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) else: break # catch key if if (init_box is None) or (vw is None): logger.debug("Press key s to select object.") if (frame_idx == 0): wait_time = 5000 else: wait_time = 30 key = cv2.waitKey(wait_time) & 0xFF logger.debug("key: {}".format(key)) if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) logger.debug("Select object to track") box = cv2.selectROI(window_name, frame, fromCenter=False, showCrosshair=True) if box[2] > 0 and box[3] > 0: init_box = box elif key == ord("c"): logger.debug( "init_box/template released, press key s again to select object." ) init_box = None template = None if (init_box is not None) and (template is None): template = cv2.resize( frame[int(init_box[1]):int(init_box[1] + init_box[3]), int(init_box[0]):int(init_box[0] + init_box[2])], (128, 128)) pipeline.init(frame, init_box) logger.debug("pipeline initialized with bbox : {}".format(init_box)) frame_idx += 1 vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()