def _compute_tracks_video(frames, dets): nframes = len(frames) video_tracks = [] next_track_id = FIRST_TRACK_ID for frame_id in range(nframes): frame_tracks = [] # each element is (roidb entry, idx in the dets/original roidb cur_boxes = dets['all_boxes'][1][frame_id] cur_poses = dets['all_keyps'][1][frame_id] if (frame_id == 0): matches = -np.ones((cur_boxes.shape[0], )) else: cur_frame_data = frames[frame_id] prev_boxes = dets['all_boxes'][1][frame_id - 1] prev_poses = dets['all_keyps'][1][frame_id - 1] # 0-index to remove the other index to the dets structure prev_frame_data = frames[frame_id-1] matches = _compute_matches( prev_frame_data, cur_frame_data, prev_boxes, cur_boxes, prev_poses, cur_poses, cost_types=cfg.TRACKING.DISTANCE_METRICS, cost_weights=cfg.TRACKING.DISTANCE_METRIC_WTS, bipart_match_algo=cfg.TRACKING.BIPARTITE_MATCHING_ALGO) prev_tracks = video_tracks[frame_id - 1] if frame_id > 0 else None for m in matches: if m == -1: # didn't match to any frame_tracks.append(next_track_id) next_track_id += 1 if next_track_id >= MAX_TRACK_IDS: next_track_id %= MAX_TRACK_IDS else: frame_tracks.append(prev_tracks[m]) video_tracks.append(frame_tracks) return video_tracks
def _compute_tracks_video_lstm(frames, dets, lstm_model): nframes = len(frames) video_tracks = [] next_track_id = FIRST_TRACK_ID # track_lstms contain track_id: <lstm_hidden_layer> track_lstms = {} for frame_id in range(nframes): frame_tracks = [] # each element is (roidb entry, idx in the dets/original roidb) cur_boxes = dets['all_boxes'][1][frame_id] cur_poses = dets['all_keyps'][1][frame_id] cur_boxposes = lstm_track_utils.encode_box_poses(cur_boxes, cur_poses) # Compute LSTM next matches # Need to keep prev_track_ids to make sure of ordering of output prev_track_ids = video_tracks[frame_id - 1] if frame_id > 1 else [] match_scores = lstm_track_utils.compute_matching_scores( track_lstms, prev_track_ids, cur_boxposes, lstm_model) if match_scores.size > 0: matches = _compute_matches(None, None, None, None, None, None, None, None, cfg.TRACKING.BIPARTITE_MATCHING_ALGO, C=(-match_scores)) else: matches = -np.ones((cur_boxes.shape[0], )) prev_tracks = video_tracks[frame_id - 1] if frame_id > 0 else None for m in matches: if m == -1: # didn't match to any frame_tracks.append(next_track_id) next_track_id += 1 if next_track_id >= MAX_TRACK_IDS: logger.warning( 'Exceeded max track ids ({})'.format(MAX_TRACK_IDS)) next_track_id %= MAX_TRACK_IDS else: frame_tracks.append(prev_tracks[m]) # based on the matches, update the lstm hidden weights # Whatever don't get matched, start a new track ID. Whatever previous # track IDs don't get matched, have to be deleted. lstm_track_utils.update_lstms(track_lstms, prev_track_ids, frame_tracks, cur_boxposes, lstm_model) video_tracks.append(frame_tracks) return video_tracks