Exemplo n.º 1
0
def get_video_reader(args, video_name):
    date, hr_slot, camera = parse_meva_clip_name(video_name)
    video_path = os.path.join(args.video_path, date, hr_slot,
                              video_name + ".avi")
    if args.use_lijun_video_loader:
        vcap = VideoReader(video_path)
        frame_count = int(vcap.length)
    elif args.use_moviepy:
        vcap = VideoFileClip(video_path, audio=False)
        frame_count = int(vcap.fps * vcap.duration)  # uh
        vcap = vcap.iter_frames()
    else:
        try:
            vcap = cv2.VideoCapture(video_path)
            if not vcap.isOpened():
                raise Exception("cannot open %s" % video_path)
        except Exception as e:
            raise Exception("warning, cannot open %s" % video_path)
        # opencv 3/4
        frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

    # start reading frames into queues now
    video_queuer = VideoEnqueuer(
        args,
        vcap,
        frame_count,
        frame_gap=1,  # no skipping frames
        prefetch=100,
        start=True,
        is_moviepy=args.use_moviepy,
        batch_size=1)
    get_frame_batches = video_queuer.get()
    return get_frame_batches, video_queuer.num_batches
Exemplo n.º 2
0
                frame_count = int(vcap.fps * vcap.duration)  # uh
                vcap = vcap.iter_frames()
            else:
                try:
                    vcap = cv2.VideoCapture(videofile)
                    if not vcap.isOpened():
                        raise Exception("cannot open %s" % videofile)
                except Exception as e:
                    # raise e
                    # just move on to the next video
                    print("warning, cannot open %s" % videofile)
                    continue

                # opencv 2
                if cv2.__version__.split(".")[0] == "2":
                    frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
                else:
                    # opencv 3/4
                    frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

            # initialize tracking module
            if args.get_tracking:
                tracking_objs = args.tracking_objs.split(",")
                tracker_dict = {}
                tracking_results_dict = {}
                tmp_tracking_results_dict = {}
                for tracking_obj in tracking_objs:
                    metric = metric = nn_matching.NearestNeighborDistanceMetric(
                        "cosine", args.max_cosine_distance, args.nn_budget)
                    tracker_dict[tracking_obj] = Tracker(
                        metric, max_iou_distance=args.max_iou_distance)
Exemplo n.º 3
0
def load_track_and_features(args, video_name, p_file, v_file, p_extractor,
                            v_extractor, hs):
  date, hr_slot, camera = parse_meva_clip_name(video_name)
  # start loading video_frames first
  video_path = os.path.join(args.video_path, date, hr_slot, video_name + ".avi")
  if args.use_lijun_video_loader:
    vcap = VideoReader(video_path)
    frame_count = int(vcap.length)
  elif args.use_moviepy:
    vcap = VideoFileClip(video_path, audio=False)
    frame_count = int(vcap.fps * vcap.duration)  # uh
    vcap = vcap.iter_frames()
  else:
    try:
      vcap = cv2.VideoCapture(video_path)
      if not vcap.isOpened():
        raise Exception("cannot open %s" % video_path)
    except Exception as e:
      raise Exception("warning, cannot open %s" % video_path)
    # opencv 3/4
    frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

  # start reading frames into queues now
  video_queuer = VideoEnqueuer(
      args, vcap, frame_count, frame_gap=1,  # no skipping frames
      prefetch=100,
      start=True, is_moviepy=args.use_moviepy,
      batch_size=1)
  get_frame_batches = video_queuer.get()

  def load_track_file(file_path, homography):
    """load a tracking file into dict of numpy arrays."""
    # assuming sorted by frameid
    data = []
    with open(file_path, "r") as f:
      for line in f:
        frame_idx, track_id, left, top, width, height, conf, _, _, _ = line.strip().split(",")
        data.append([frame_idx, track_id, left, top, width, height, conf])

    if not data:
      return {}

    data = np.array(data, dtype="float32")  # [N, 7]

    # compute topdown points
    foot_points_x = data[:, 2] + data[:, 4] / 2.  # [N]
    foot_points_y = data[:, 3] + data[:, 5]
    foot_points = np.stack([foot_points_x, foot_points_y], axis=0)  # [2, N]
    # [2, N]
    top_down_points = warp_points(foot_points, homography)
    top_down_points = np.transpose(top_down_points, [1, 0])  # [N, 2]

    # [N, 9]
    data = np.concatenate([data, top_down_points], axis=1)

    track_ids = np.unique(data[:, 1]).tolist()
    track_data = {}  # [num_track, K, 9]
    for track_id in track_ids:
      track_data[track_id] = data[data[:, 1] == track_id, :]
    return track_data

  # track_id -> data
  p_tracks = load_track_file(p_file, hs[camera])
  v_tracks = load_track_file(v_file, hs[camera])

  # get each frame's boxes to extract
  frame_data = {}  # frame_idx -> a list of boxes,
  def get_track_boxes(tracks, cat_name):
    for track_id in tracks:
      idxs = list(range(0, len(tracks[track_id]), args.feature_box_gap))
      idxs = idxs[:args.feature_box_num]
      boxes = tracks[track_id][idxs, :]  # [k, 7]

      for box_idx, box in enumerate(boxes):
        frame_idx = box[0]
        tlwh = box[2:6]
        if not frame_idx in frame_data:
          frame_data[frame_idx] = []
        frame_data[frame_idx].append((tlwh, track_id, box_idx, cat_name))
  get_track_boxes(p_tracks, "Person")
  get_track_boxes(v_tracks, "Vehicle")

  # 2. go through the video once and crop all the images to extract features
  # assuming not conflict between person/vehicle track_id
  p_track_to_feat = {}  # "track_id" => features
  v_track_to_feat = {}  # "track_id" => features

  for batch in tqdm(get_frame_batches, total=video_queuer.num_batches):
    image, scale, frame_idx = batch[0]
    image = image.astype("uint8")  # need uint8 type
    if frame_idx in frame_data:
      for tlwh, track_id, box_idx, cat_name in frame_data[frame_idx]:

        # check box valid
        if valid_box(tlwh, image):

          x, y, w, h = tlwh
          x, y, w, h = int(x), int(y), int(w), int(h)
          #print(x, y, w, h)
          #print(image[y:y+h, x:x+w])
          box_img = cv2.cvtColor(
              image[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
          if cat_name == "Person":
            if track_id not in p_track_to_feat:
              p_track_to_feat[track_id] = []
            p_track_to_feat[track_id].append(box_img)
          elif cat_name == "Vehicle":
            if track_id not in v_track_to_feat:
              v_track_to_feat[track_id] = []
            v_track_to_feat[track_id].append(box_img)
  # extract features
  def get_features(track_to_imgs, extractor):
    for track_id in track_to_imgs:
      box_imgs = track_to_imgs[track_id]
      track_to_imgs[track_id] = extractor(box_imgs).cpu().numpy()  # [K, 512]
      if args.use_avg_pool:
        # [1, 512]
        track_to_imgs[track_id] = np.mean(
            track_to_imgs[track_id], axis=0, keepdims=True)

  get_features(p_track_to_feat, p_extractor)
  get_features(v_track_to_feat, v_extractor)

  data = {}
  def gather_data(track_data, track_features, cat_name):
    data[cat_name] = {}
    for track_id in track_data:
      # ignore track with no valid boxes
      if track_id in track_features:
        data[cat_name][track_id] = (
            track_data[track_id], track_features[track_id])
  gather_data(p_tracks, p_track_to_feat, "Person")
  gather_data(v_tracks, v_track_to_feat, "Vehicle")

  return data