def test_zarr_scenes_chunk(dmg: LocalDataManager, tmp_path: Path, zarr_dataset: ChunkedDataset, num_frames_to_copy: int) -> None: # first let's concat so we have multiple scenes concat_count = 10 zarr_input_path = dmg.require("single_scene.zarr") zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr") zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path) # now let's chunk it zarr_chopped_path = str(tmp_path / f"{uuid4()}.zarr") zarr_scenes_chop(zarr_concatenated_path, zarr_chopped_path, num_frames_to_copy=num_frames_to_copy) # open both and compare zarr_concatenated = ChunkedDataset(zarr_concatenated_path) zarr_concatenated.open() zarr_chopped = ChunkedDataset(zarr_chopped_path) zarr_chopped.open() assert len(zarr_concatenated.scenes) == len(zarr_chopped.scenes) assert len( zarr_chopped.frames) == num_frames_to_copy * len(zarr_chopped.scenes) for idx in range(len(zarr_concatenated.scenes)): scene_cat = zarr_concatenated.scenes[idx] scene_chopped = zarr_chopped.scenes[idx] frames_cat = zarr_concatenated.frames[ scene_cat["frame_index_interval"][0]: scene_cat["frame_index_interval"][0] + num_frames_to_copy] frames_chopped = zarr_chopped.frames[get_frames_slice_from_scenes( scene_chopped)] agents_cat = zarr_concatenated.agents[get_agents_slice_from_frames( *frames_cat[[0, -1]])] tl_faces_cat = zarr_concatenated.tl_faces[ get_tl_faces_slice_from_frames(*frames_cat[[0, -1]])] agents_chopped = zarr_chopped.agents[get_agents_slice_from_frames( *frames_chopped[[0, -1]])] tl_faces_chopped = zarr_chopped.tl_faces[ get_tl_faces_slice_from_frames(*frames_chopped[[0, -1]])] assert scene_chopped["host"] == scene_cat["host"] assert scene_chopped["start_time"] == scene_cat["start_time"] assert scene_chopped["end_time"] == scene_cat["end_time"] assert len(frames_chopped) == num_frames_to_copy assert np.all(frames_chopped["ego_translation"] == frames_cat["ego_translation"][:num_frames_to_copy]) assert np.all(frames_chopped["ego_rotation"] == frames_cat["ego_rotation"][:num_frames_to_copy]) assert np.all(agents_chopped == agents_cat) assert np.all(tl_faces_chopped == tl_faces_cat)
def test_zarr_split(dmg: LocalDataManager, tmp_path: Path, zarr_dataset: ChunkedDataset) -> None: concat_count = 10 zarr_input_path = dmg.require("single_scene.zarr") zarr_concatenated_path = str(tmp_path / f"{uuid4()}.zarr") zarr_concat([zarr_input_path] * concat_count, zarr_concatenated_path) split_infos = [ { "name": f"{uuid4()}.zarr", "split_size_GB": 0.002 }, # cut around 2MB { "name": f"{uuid4()}.zarr", "split_size_GB": 0.001 }, # cut around 0.5MB { "name": f"{uuid4()}.zarr", "split_size_GB": -1 }, ] # everything else scene_splits = zarr_split(zarr_concatenated_path, str(tmp_path), split_infos) # load the zarrs and check elements zarr_concatenated = ChunkedDataset(zarr_concatenated_path) zarr_concatenated.open() for scene_split, split_info in zip(scene_splits, split_infos): zarr_out = ChunkedDataset(str(tmp_path / str(split_info["name"]))) zarr_out.open() # compare elements at the start and end of each scene in both zarrs for idx_scene in range(len(zarr_out.scenes)): # compare elements in the scene input_scene = zarr_concatenated.scenes[scene_split[0] + idx_scene] input_frames = zarr_concatenated.frames[ get_frames_slice_from_scenes(input_scene)] input_agents = zarr_concatenated.agents[ get_agents_slice_from_frames(*input_frames[[0, -1]])] input_tl_faces = zarr_concatenated.tl_faces[ get_tl_faces_slice_from_frames(*input_frames[[0, -1]])] output_scene = zarr_out.scenes[idx_scene] output_frames = zarr_out.frames[get_frames_slice_from_scenes( output_scene)] output_agents = zarr_out.agents[get_agents_slice_from_frames( *output_frames[[0, -1]])] output_tl_faces = zarr_out.tl_faces[get_tl_faces_slice_from_frames( *output_frames[[0, -1]])] assert np.all(input_frames["ego_translation"] == output_frames["ego_translation"]) assert np.all( input_frames["ego_rotation"] == output_frames["ego_rotation"]) assert np.all(input_agents == output_agents) assert np.all(input_tl_faces == output_tl_faces)
def get_frames_subset(dataset: ChunkedDataset, frame_start_idx: int, frame_end_idx: int) -> ChunkedDataset: """Get a new dataset with frames between start (included) and end (excluded). Assumptions: - the dataset has only 1 scene - the dataset is in numpy format and not zarr anymore :param dataset: the single-scene dataset. :param frame_start_idx: first frame to keep. :param frame_end_idx: where to stop taking frames (excluded). """ if not len(dataset.scenes) == 1: raise ValueError( f"dataset should have a single scene, got {len(dataset.scenes)}") if not isinstance(dataset.agents, np.ndarray): raise ValueError("dataset agents should be an editable np array") if not isinstance(dataset.tl_faces, np.ndarray): raise ValueError("dataset tls should be an editable np array") if not isinstance(dataset.frames, np.ndarray): raise ValueError("dataset frames should be an editable np array") if frame_start_idx >= len(dataset.frames): raise ValueError( f"frame start {frame_start_idx} is over the length of the dataset") if frame_end_idx > len(dataset.frames): raise ValueError( f"frame end {frame_end_idx} is over the length of the dataset") if frame_start_idx >= frame_end_idx: raise ValueError( f"end frame {frame_end_idx} should be higher than start {frame_start_idx}" ) if frame_start_idx < 0: raise ValueError(f"start frame {frame_start_idx} should be positive") new_dataset = ChunkedDataset("") new_dataset.scenes = dataset.scenes.copy() new_dataset.scenes[0]["start_time"] = dataset.frames[frame_start_idx][ "timestamp"] new_dataset.scenes[0]["end_time"] = dataset.frames[frame_end_idx - 1]["timestamp"] new_dataset.frames = dataset.frames[frame_start_idx:frame_end_idx].copy() new_dataset.scenes[0]["frame_index_interval"] = (0, len(new_dataset.frames)) agent_slice = get_agents_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) tls_slice = get_tl_faces_slice_from_frames( *dataset.frames[[frame_start_idx, frame_end_idx - 1]]) new_dataset.frames["agent_index_interval"] -= new_dataset.frames[ "agent_index_interval"][0, 0] new_dataset.frames[ "traffic_light_faces_index_interval"] -= new_dataset.frames[ "traffic_light_faces_index_interval"][0, 0] new_dataset.agents = dataset.agents[agent_slice].copy() new_dataset.tl_faces = dataset.tl_faces[tls_slice].copy() return new_dataset
def test_get_frame_data(ego_cat_dataset: EgoDataset, frame_index: int) -> None: mapAPI = ego_cat_dataset.rasterizer.sem_rast.mapAPI # type: ignore frame = ego_cat_dataset.dataset.frames[frame_index] agent_slice = get_agents_slice_from_frames(frame) tls_slice = get_tl_faces_slice_from_frames(frame) agents = ego_cat_dataset.dataset.agents[agent_slice] tls = ego_cat_dataset.dataset.tl_faces[tls_slice] frame_out = _get_frame_data(mapAPI, frame, agents, tls) assert isinstance(frame_out, FrameVisualization) assert len(frame_out.agents) > 0 assert len(frame_out.trajectories) == 0
def test_get_agent_context(zarr_dataset: ChunkedDataset, state_index: int, history_steps: int, future_steps: int) -> None: scene = zarr_dataset.scenes[0] frames = zarr_dataset.frames[get_frames_slice_from_scenes(scene)] agents = zarr_dataset.agents[get_agents_slice_from_frames( *frames[[0, -1]])] tls = zarr_dataset.tl_faces[get_tl_faces_slice_from_frames( *frames[[0, -1]])] frames_his_f, frames_fut_f, agents_his_f, agents_fut_f, tls_his_f, tls_fut_f = get_agent_context( state_index, frames, agents, tls, history_steps, future_steps) # test future using timestamp first_idx = state_index + 1 last_idx = state_index + 1 + future_steps frames_fut = frames[first_idx:last_idx] agents_fut = filter_agents_by_frames(frames_fut, zarr_dataset.agents) tls_fut = filter_tl_faces_by_frames(frames_fut, zarr_dataset.tl_faces) assert np.all(frames_fut_f["timestamp"] == frames_fut["timestamp"]) assert len(agents_fut) == len(agents_fut_f) for idx in range(len(agents_fut)): assert np.all(agents_fut_f[idx] == agents_fut[idx]) assert len(tls_fut) == len(tls_fut_f) for idx in range(len(tls_fut)): assert np.all(tls_fut_f[idx] == tls_fut[idx]) # test past (which is reversed and include present) first_idx = max(state_index - history_steps, 0) last_idx = state_index + 1 frames_his = frames[first_idx:last_idx] agents_his = filter_agents_by_frames(frames_his, zarr_dataset.agents) tls_his = filter_tl_faces_by_frames(frames_his, zarr_dataset.tl_faces) assert np.all(frames_his_f["timestamp"] == frames_his["timestamp"][::-1]) assert len(agents_his) == len(agents_his_f) for idx in range(len(agents_his)): assert np.all(agents_his_f[idx] == agents_his[len(agents_his) - idx - 1]) assert len(tls_his) == len(tls_his_f) for idx in range(len(tls_his)): assert np.all(tls_his_f[idx] == tls_his[len(tls_his) - idx - 1])
def test_get_tl_faces_slice_from_frames(slice_end: int, zarr_dataset: ChunkedDataset) -> None: # get agents for first N using function frame_slice = slice(0, slice_end) tl_slice = get_tl_faces_slice_from_frames( *zarr_dataset.frames[frame_slice][[0, -1]]) tl_faces_new = zarr_dataset.tl_faces[tl_slice] # get agents for first N using standard approach frames = zarr_dataset.frames[frame_slice] frame_a = frames[0] frame_b = frames[-1] tl_faces = zarr_dataset.tl_faces[ frame_a["traffic_light_faces_index_interval"][0]: frame_b["traffic_light_faces_index_interval"][1]] assert np.all(tl_faces_new == tl_faces)
def test_get_frames_slice_from_scenes(zarr_dataset: ChunkedDataset) -> None: scene_a = zarr_dataset.scenes[0] frame_slice = get_frames_slice_from_scenes(scene_a) assert len(zarr_dataset.frames) == len(zarr_dataset.frames[frame_slice]) # test e2e starting from scene frame_range = get_frames_slice_from_scenes(zarr_dataset.scenes[0]) agents_range = get_agents_slice_from_frames( *zarr_dataset.frames[frame_range][[0, -1]]) tl_faces_range = get_tl_faces_slice_from_frames( *zarr_dataset.frames[frame_range][[0, -1]]) agents = zarr_dataset.agents[agents_range] tl_faces = zarr_dataset.tl_faces[tl_faces_range] assert len(agents) == len(zarr_dataset.agents) assert len(tl_faces) == len(zarr_dataset.tl_faces)
def test_dataset_frames_subset(zarr_dataset: ChunkedDataset) -> None: zarr_dataset = zarr_dataset.get_scene_dataset(0) frame_start = 10 frame_end = 25 zarr_cut = get_frames_subset(zarr_dataset, frame_start, frame_end) assert len(zarr_cut.scenes) == 1 assert len(zarr_cut.frames) == frame_end - frame_start assert np.all( zarr_cut.frames["ego_translation"] == zarr_dataset.frames["ego_translation"][frame_start:frame_end] ) agents_slice = get_agents_slice_from_frames( *zarr_dataset.frames[[frame_start, frame_end - 1]] ) tls_slice = get_tl_faces_slice_from_frames( *zarr_dataset.frames[[frame_start, frame_end - 1]] ) assert np.all(zarr_cut.agents == zarr_dataset.agents[agents_slice]) assert np.all(zarr_cut.tl_faces == zarr_dataset.tl_faces[tls_slice]) assert np.all(zarr_cut.scenes["frame_index_interval"] == (0, len(zarr_cut.frames)))
def generate_agent_sample_tl_persistence( state_index: int, frames: np.ndarray, agents: np.ndarray, tl_faces: np.ndarray, selected_track_id: Optional[int], render_context: RenderContext, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int, filter_agents_threshold: float, rasterizer: Optional[Rasterizer] = None, perturbation: Optional[Perturbation] = None, ) -> dict: """Generates the inputs and targets to train a deep prediction model. A deep prediction model takes as input the state of the world (here: an image we will call the "raster"), and outputs where that agent will be some seconds into the future. This function has a lot of arguments and is intended for internal use, you should try to use higher level classes and partials that use this function. Args: state_index (int): The anchor frame index, i.e. the "current" timestep in the scene frames (np.ndarray): The scene frames array, can be numpy array or a zarr array agents (np.ndarray): The full agents array, can be numpy array or a zarr array tl_faces (np.ndarray): The full traffic light faces array, can be numpy array or a zarr array selected_track_id (Optional[int]): Either None for AV, or the ID of an agent that you want to predict the future of. This agent is centered in the raster and the returned targets are derived from their future states. raster_size (Tuple[int, int]): Desired output raster dimensions pixel_size (np.ndarray): Size of one pixel in the real world ego_center (np.ndarray): Where in the raster to draw the ego, [0.5,0.5] would be the center history_num_frames (int): Amount of history frames to draw into the rasters history_step_size (int): Steps to take between frames, can be used to subsample history frames future_num_frames (int): Amount of history frames to draw into the rasters future_step_size (int): Steps to take between targets into the future filter_agents_threshold (float): Value between 0 and 1 to use as cutoff value for agent filtering based on their probability of being a relevant agent rasterizer (Optional[Rasterizer]): Rasterizer of some sort that draws a map image perturbation (Optional[Perturbation]): Object that perturbs the input and targets, used to train models that can recover from slight divergence from training set data Raises: ValueError: A ValueError is returned if the specified ``selected_track_id`` is not present in the scene or was filtered by applying the ``filter_agent_threshold`` probability filtering. Returns: dict: a dict object with the raster array, the future offset coordinates (meters), the future yaw angular offset, the future_availability as a binary mask """ # the history slice is ordered starting from the latest frame and goes backward in time., ex. slice(100, 91, -2) all_history_slice = get_history_slice(state_index, state_index, history_step_size, include_current_state=True) history_slice = get_history_slice(state_index, history_num_frames, history_step_size, include_current_state=True) future_slice = get_future_slice(state_index, future_num_frames, future_step_size) all_history_frames = frames[all_history_slice].copy() # TL data will be based on all history history_frames = frames[history_slice].copy() # copy() required if the object is a np.ndarray future_frames = frames[future_slice].copy() sorted_frames = np.concatenate((history_frames[::-1], future_frames)) # from past to future # get agents (past and future) agent_slice = get_agents_slice_from_frames(sorted_frames[0], sorted_frames[-1]) agents = agents[agent_slice].copy() # this is the minimum slice of agents we need history_frames["agent_index_interval"] -= agent_slice.start # sync interval with the agents array future_frames["agent_index_interval"] -= agent_slice.start # sync interval with the agents array history_agents = filter_agents_by_frames(history_frames, agents) future_agents = filter_agents_by_frames(future_frames, agents) # sync interval with the traffic light faces array tl_slice = get_tl_faces_slice_from_frames(all_history_frames[-1], all_history_frames[0]) # -1 is the farthest all_history_frames["traffic_light_faces_index_interval"] -= tl_slice.start history_tl_faces = filter_tl_faces_by_frames(all_history_frames, tl_faces[tl_slice].copy()) # State you want to predict the future of. cur_frame = history_frames[0] cur_agents = history_agents[0] if selected_track_id is None: agent_centroid_m = cur_frame["ego_translation"][:2] agent_yaw_rad = rotation33_as_yaw(cur_frame["ego_rotation"]) agent_extent_m = np.asarray((EGO_EXTENT_LENGTH, EGO_EXTENT_WIDTH, EGO_EXTENT_HEIGHT)) selected_agent = None else: # this will raise IndexError if the agent is not in the frame or under agent-threshold # this is a strict error, we cannot recover from this situation try: agent = filter_agents_by_track_id( filter_agents_by_labels(cur_agents, filter_agents_threshold), selected_track_id )[0] except IndexError: raise ValueError(f" track_id {selected_track_id} not in frame or below threshold") agent_centroid_m = agent["centroid"] agent_yaw_rad = float(agent["yaw"]) agent_extent_m = agent["extent"] selected_agent = agent input_im = ( None if not rasterizer else rasterizer.rasterize(history_frames, history_agents, history_tl_faces, selected_agent) ) world_from_agent = compute_agent_pose(agent_centroid_m, agent_yaw_rad) agent_from_world = np.linalg.inv(world_from_agent) raster_from_world = render_context.raster_from_world(agent_centroid_m, agent_yaw_rad) future_coords_offset, future_yaws_offset, future_availability = _create_targets_for_deep_prediction( future_num_frames, future_frames, selected_track_id, future_agents, agent_from_world, agent_yaw_rad ) # history_num_frames + 1 because it also includes the current frame history_coords_offset, history_yaws_offset, history_availability = _create_targets_for_deep_prediction( history_num_frames + 1, history_frames, selected_track_id, history_agents, agent_from_world, agent_yaw_rad ) return { "image": input_im, "target_positions": future_coords_offset, "target_yaws": future_yaws_offset, "target_availabilities": future_availability, "history_positions": history_coords_offset, "history_yaws": history_yaws_offset, "history_availabilities": history_availability, "world_to_image": raster_from_world, # TODO deprecate "raster_from_agent": raster_from_world @ world_from_agent, "raster_from_world": raster_from_world, "agent_from_world": agent_from_world, "world_from_agent": world_from_agent, "centroid": agent_centroid_m, "yaw": agent_yaw_rad, "extent": agent_extent_m, }
def generate_frame_sample_without_hist( state_index: int, frames: zarr.core.Array, tl_faces: zarr.core.Array, agents: zarr.core.Array, agents_from_standard_mask_only: bool = False, mask_agent_indices: zarr.core.Array = None, ) -> dict: frame = frames[state_index] if not agents_from_standard_mask_only: agent_slice = get_agents_slice_from_frames(frame) agents = agents[agent_slice].copy() else: masked_indices_slice = slice(*frame["mask_agent_index_interval"]) masked_agent_indices = [ el[0] for el in mask_agent_indices[masked_indices_slice] ] if masked_agent_indices: agents = agents.get_coordinate_selection( masked_agent_indices).copy() else: agents = [] ego_centroid = frame["ego_translation"][:2] # try to estimate ego velocity if state_index > 0: prev_frame_candidate = frames[state_index - 1] prev_ego_centroid = prev_frame_candidate["ego_translation"][:2] translation_m = np.hypot( prev_ego_centroid[0] - ego_centroid[0], prev_ego_centroid[1] - ego_centroid[1], ) if translation_m < 10: timestamp = datetime.fromtimestamp( frame["timestamp"] / 10**9).astimezone(timezone("US/Pacific")) timestamp_prev = datetime.fromtimestamp( prev_frame_candidate["timestamp"] / 10**9).astimezone( timezone("US/Pacific")) timediff_sec = (timestamp - timestamp_prev).total_seconds() if timestamp > timestamp_prev and timediff_sec < 0.2: ego_speed = (ego_centroid - prev_ego_centroid) / timediff_sec else: ego_speed = None else: ego_speed = None else: ego_speed = None try: tl_slice = get_tl_faces_slice_from_frames(frame) # -1 is the farthest frame["traffic_light_faces_index_interval"] -= tl_slice.start tl_faces_this = filter_tl_faces_by_frames([frame], tl_faces[tl_slice].copy())[0] tl_faces_this = filter_tl_faces_by_status(tl_faces_this, "ACTIVE") except ValueError: tl_faces_this = [] return { "ego_centroid": ego_centroid, "ego_speed": ego_speed, "ego_yaw": rotation33_as_yaw(frame["ego_rotation"]), "tl_faces": tl_faces_this, "agents": agents, }
def generate_multi_agent_sample( state_index: int, frames: np.ndarray, agents: np.ndarray, tl_faces: np.ndarray, selected_track_id: Optional[int], render_context: RenderContext, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int, filter_agents_threshold: float, rasterizer: Optional[Rasterizer] = None, perturbation: Optional[Perturbation] = None, min_frame_history: int = MIN_FRAME_HISTORY, min_frame_future: int = MIN_FRAME_FUTURE, ) -> dict: """Generates the inputs and targets to train a deep prediction model. A deep prediction model takes as input the state of the world (here: an image we will call the "raster"), and outputs where that agent will be some seconds into the future. This function has a lot of arguments and is intended for internal use, you should try to use higher level classes and partials that use this function. Args: state_index (int): The anchor frame index, i.e. the "current" timestep in the scene frames (np.ndarray): The scene frames array, can be numpy array or a zarr array agents (np.ndarray): The full agents array, can be numpy array or a zarr array tl_faces (np.ndarray): The full traffic light faces array, can be numpy array or a zarr array selected_track_id (Optional[int]): Either None for AV, or the ID of an agent that you want to predict the future of. This agent is centered in the raster and the returned targets are derived from their future states. render_context (RenderContext): raster_size (Tuple[int, int]): Desired output raster dimensions pixel_size (np.ndarray): Size of one pixel in the real world ego_center (np.ndarray): Where in the raster to draw the ego, [0.5,0.5] would be the center history_num_frames (int): Amount of history frames to draw into the rasters history_step_size (int): Steps to take between frames, can be used to subsample history frames future_num_frames (int): Amount of history frames to draw into the rasters future_step_size (int): Steps to take between targets into the future filter_agents_threshold (float): Value between 0 and 1 to use as cutoff value for agent filtering based on their probability of being a relevant agent rasterizer (Optional[Rasterizer]): Rasterizer of some sort that draws a map image perturbation (Optional[Perturbation]): Object that perturbs the input and targets, used to train models that can recover from slight divergence from training set data Raises: ValueError: A ValueError is returned if the specified ``selected_track_id`` is not present in the scene or was filtered by applying the ``filter_agent_threshold`` probability filtering. Returns: dict: a dict object with the raster array, the future offset coordinates (meters), the future yaw angular offset, the future_availability as a binary mask """ # the history slice is ordered starting from the latest frame and goes backward in time., ex. slice(100, 91, -2) history_slice = get_history_slice(state_index, history_num_frames, history_step_size, include_current_state=True) future_slice = get_future_slice(state_index, future_num_frames, future_step_size) history_frames = frames[history_slice].copy( ) # copy() required if the object is a np.ndarray future_frames = frames[future_slice].copy() sorted_frames = np.concatenate( (history_frames[::-1], future_frames)) # from past to future # get agents (past and future) agent_slice = get_agents_slice_from_frames(sorted_frames[0], sorted_frames[-1]) agents = agents[agent_slice].copy( ) # this is the minimum slice of agents we need history_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array future_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array history_agents = filter_agents_by_frames(history_frames, agents) future_agents = filter_agents_by_frames(future_frames, agents) try: tl_slice = get_tl_faces_slice_from_frames( history_frames[-1], history_frames[0]) # -1 is the farthest # sync interval with the traffic light faces array history_frames["traffic_light_faces_index_interval"] -= tl_slice.start history_tl_faces = filter_tl_faces_by_frames(history_frames, tl_faces[tl_slice].copy()) except ValueError: history_tl_faces = [ np.empty(0, dtype=TL_FACE_DTYPE) for _ in history_frames ] if perturbation is not None: history_frames, future_frames = perturbation.perturb( history_frames=history_frames, future_frames=future_frames) # State you want to predict the future of. cur_frame = history_frames[0] cur_agents = history_agents[0] cur_agents = filter_agents_by_labels(cur_agents, filter_agents_threshold) agent_track_ids_u64 = cur_agents["track_id"] # uint64 --> int64 agent_track_ids = agent_track_ids_u64.astype(np.int64) assert np.alltrue(agent_track_ids == agent_track_ids_u64) agent_track_ids = np.concatenate( [np.array([-1], dtype=np.int64), agent_track_ids]) # Draw image with Ego car in center selected_agent = None input_im = (None if not rasterizer else rasterizer.rasterize( history_frames, history_agents, history_tl_faces, selected_agent)) future_coords_offset_list = [] future_yaws_offset_list = [] future_availability_list = [] history_coords_offset_list = [] history_yaws_offset_list = [] history_availability_list = [] agent_centroid_list = [] agent_yaw_list = [] agent_extent_list = [] filtered_track_ids_list = [] for selected_track_id in agent_track_ids: if selected_track_id == -1: agent_centroid = cur_frame["ego_translation"][:2] agent_yaw_rad = rotation33_as_yaw(cur_frame["ego_rotation"]) agent_extent = np.asarray( (EGO_EXTENT_LENGTH, EGO_EXTENT_WIDTH, EGO_EXTENT_HEIGHT)) world_from_agent = compute_agent_pose(agent_centroid, agent_yaw_rad) agent_from_world = np.linalg.inv(world_from_agent) raster_from_world = render_context.raster_from_world( agent_centroid, agent_yaw_rad) agent_origin = np.zeros((2, ), dtype=np.float32) else: # this will raise IndexError if the agent is not in the frame or under agent-threshold # this is a strict error, we cannot recover from this situation try: agent = filter_agents_by_track_id(cur_agents, selected_track_id)[0] except IndexError: raise ValueError( f" track_id {selected_track_id} not in frame or below threshold" ) agent_centroid = agent["centroid"] agent_yaw_rad = agent["yaw"] agent_extent = agent["extent"] agent_origin = transform_point(agent_centroid, agent_from_world) future_coords_offset, future_yaws_offset, future_availability = _create_targets_for_deep_prediction( future_num_frames, future_frames, selected_track_id, future_agents, agent_from_world, agent_yaw_rad, agent_origin) if selected_track_id != -1 and np.sum( future_availability) < min_frame_future: # Not enough future to predict, skip this agent. continue # history_num_frames + 1 because it also includes the current frame history_coords_offset, history_yaws_offset, history_availability = _create_targets_for_deep_prediction( history_num_frames + 1, history_frames, selected_track_id, history_agents, agent_from_world, agent_yaw_rad, agent_origin) if selected_track_id != -1 and np.sum( history_availability) < min_frame_history: # Not enough history to predict, skip this agent. continue future_coords_offset_list.append(future_coords_offset) future_yaws_offset_list.append(future_yaws_offset) future_availability_list.append(future_availability) history_coords_offset_list.append(history_coords_offset) history_yaws_offset_list.append(history_yaws_offset) history_availability_list.append(history_availability) agent_centroid_list.append(agent_centroid) agent_yaw_list.append(agent_yaw_rad) agent_extent_list.append(agent_extent) filtered_track_ids_list.append(selected_track_id) # Get pixel coordinate agent_centroid_array = np.array(agent_centroid_list) agent_centroid_in_pixel = transform_points(agent_centroid_array, raster_from_world) return { "image": input_im, # (h, w, ch) # --- All below is in world coordinate --- "target_positions": np.array(future_coords_offset_list), # (n_agents, num_frames, 2) "target_yaws": np.array(future_yaws_offset_list), # (n_agents, num_frames, 1) "target_availabilities": np.array(future_availability_list), # (n_agents, num_frames) "history_positions": np.array(history_coords_offset_list), # (n_agents, num_frames, 2) "history_yaws": np.array(history_yaws_offset_list), # (n_agents, num_frames, 1) "history_availabilities": np.array(history_availability_list), # (n_agents, num_frames) # "world_to_image": raster_from_world, # (3, 3) "raster_from_world": raster_from_world, # (3, 3) "centroid": agent_centroid_array, # (n_agents, 2) "yaw": np.array(agent_yaw_list), # (n_agents, 1) "extent": np.array(agent_extent_list), # (n_agents, 3) "track_ids": np.array(filtered_track_ids_list), # (n_agents) "centroid_pixel": agent_centroid_in_pixel, # (n_agents, 2) }