class CoverNetBaseline: def __init__(self, sec_from_now: float, helper: PredictHelper): """ Inits Baseline. :param sec_from_now: How many seconds into the future to make the prediction. :param helper: Instance of PredictHelper. """ assert sec_from_now % 0.5 == 0, f"Parameter sec from now must be divisible by 0.5. Received {sec_from_now}." self.helper = helper self.sec_from_now = sec_from_now self.sampled_at = 2 # 2 Hz between annotations. backbone = ResNetBackbone('resnet50') self.mtp = MTP(backbone, num_modes=2) self.covernet = CoverNet(backbone, num_modes=64) # Note that the value of num_modes depends on the size of the lattice used for CoverNet. static_layer_rasterizer = StaticLayerRasterizer(helper) agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=1) self.mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) self.trajectories = pickle.load(open(PATH_TO_EPSILON_8_SET, 'rb')) self.trajectories = torch.Tensor(self.trajectories) def __call__(self, token: str) -> Prediction: """ Makes prediction. :param token: string of format {instance_token}_{sample_token}. """ instance_token_img, sample_token_img = token.split("_") # kinematics = _kinematics_from_tokens(self.helper, instance, sample) # cv_heading = _constant_velocity_heading_from_kinematics(kinematics, self.sec_from_now, self.sampled_at) # anns = [ann for ann in nuscenes.sample_annotation if ann['instance_token'] == instance_token_img] img = self.mtp_input_representation.make_input_representation(instance_token_img, sample_token_img) image_tensor = torch.Tensor(img).permute(2, 0, 1).unsqueeze(0) # plt.imshow(img) agent_state_vector = torch.Tensor([[self.helper.get_velocity_for_agent(instance_token_img, sample_token_img), self.helper.get_acceleration_for_agent(instance_token_img, sample_token_img), self.helper.get_heading_change_rate_for_agent(instance_token_img, sample_token_img)]]) mtp_out = self.mtp(image_tensor, agent_state_vector) covernet_logits = self.covernet(image_tensor, agent_state_vector) covernet_probabilities = covernet_logits.argsort(descending=True).squeeze() covernet_probabilities = covernet_probabilities[:5] # Print 5 most likely output covernet_trajectories = self.trajectories[covernet_probabilities] covernet_trajectories = covernet_trajectories.detach().cpu().numpy() covernet_probabilities = covernet_probabilities.detach().cpu().numpy() # Need the prediction to have 2d. return Prediction(instance_token_img, sample_token_img, covernet_trajectories, covernet_probabilities)
class MTPDataset(Dataset): def __init__(self, instance_sample_tokens, helper): self.instance_sample_tokens = instance_sample_tokens self.helper = helper self.static_layer_rasterizer = StaticLayerRasterizer(self.helper) self.agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=SECONDS_OF_HISTORY) self.mtp_input_representation = InputRepresentation( self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer()) self.transform_fn = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) def __len__(self): return len(self.instance_sample_tokens) def __getitem__(self, idx): instance_token, sample_token = self.instance_sample_tokens[idx].split("_") image = self.mtp_input_representation.make_input_representation(instance_token, sample_token) image = np.reshape(image, (3,500,500)) image = image/255 image_tensor = torch.from_numpy(image) image_tensor = self.transform_fn(image_tensor) agent_state_vector = np.array([self.helper.get_velocity_for_agent(instance_token, sample_token), self.helper.get_acceleration_for_agent(instance_token, sample_token), self.helper.get_heading_change_rate_for_agent(instance_token, sample_token)]) # For MTP: "the targets are of shape [batch_size, 1, n_timesteps, 2]" # where n_timesteps = 2 * seconds predicted = 12 ground_truth = self.helper.get_future_for_agent(instance_token, sample_token, seconds=SECONDS_TO_PREDICT, in_agent_frame=True) return (image_tensor, agent_state_vector, ground_truth.reshape((1, 12, 2)), instance_token, sample_token)
def get_format_mha_jam_maps(self, states_filepath, out_file): with open(states_filepath) as fr: agents_states = fr.readlines() # format # agen t_id, 20x(frame_id, x, y, v, a, yaw_rate)] agents_states = [[float(x.rstrip()) for x in s.split(',')] for s in agents_states] mode = "train" if out_file.find("_train") != -1 else "val" mini = "mini" if out_file.find("mini") != -1 else "main" with open("dicts_sample_and_instances_id2token_" + mode + "_" + mini + ".json") as fr: instance_dict_id_token, sample_dict_id_token = json.load(fr) # Get map for each sample in states agent_ind = 0 static_layer_rasterizer = StaticLayerRasterizer(self.helper) agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=1) mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) for agent in tqdm(agents_states): instance_token = instance_dict_id_token[str(int(agent[0]))] mid_frame_id = int(agent[1 + 6 * (MAX_TRAJ_LEN)]) sample_token = sample_dict_id_token[str(mid_frame_id)] img = mtp_input_representation.make_input_representation( instance_token, sample_token) # img = cv2.resize(img, (1024, 1024)) cv2.imwrite( out_file.replace("_.jpg", "__" + str(agent_ind) + ".jpg"), img) agent_ind += 1
def main(version: str, data_root: str, split_name: str, output_dir: str, config_name: str = 'predict_2020_icra.json') -> None: """ Performs inference for all of the baseline models defined in the physics model module. :param version: nuScenes data set version. :param data_root: Directory where the NuScenes data is stored. :param split_name: nuScenes data split name, e.g. train, val, mini_train, etc. :param output_dir: Directory where predictions should be stored. :param config_name: Name of config file. """ print('timing point A') nusc = NuScenes(version=version, dataroot=data_root) print('timing point B') helper = PredictHelper(nusc) print('timing point C') dataset = get_prediction_challenge_split(split_name, dataroot=data_root) print('timing point D') config = load_prediction_config(helper, config_name) print('timing point E') # rasterization static_layer_rasterizer = StaticLayerRasterizer(helper) agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=3) mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) # loop through training tasks for token in dataset[40:60:2]: fig, axes = plt.subplots(1, 3, figsize=(18, 9)) print(token) instance_token, sample_token = token.split('_') plot_cam_view(axes[1], nusc, token) plot_cam_view(axes[2], nusc, token, cam_name='CAM_FRONT_RIGHT') axes[0].imshow(mtp_input_representation.make_input_representation(instance_token, sample_token)) plt.show()
from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory from nuscenes.prediction.input_representation.combinators import Rasterizer from nuscenes.prediction.input_representation.interface import InputRepresentation from nuscenes import NuScenes import matplotlib.pyplot as plt import torch DATAROOT = '/data/sets/nuscenes' nuscenes = NuScenes('v1.0-mini', dataroot=DATAROOT) # Data Splits for the Prediction Challenge # input representation static_layer_rasterizer = StaticLayerRasterizer(helper) agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history) mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) instance_token_img, sample_token_img = 'bc38961ca0ac4b14ab90e547ba79fbb6', '7626dde27d604ac28a0240bdd54eba7a' anns = [ ann for ann in nuscenes.sample_annotation if ann['instance_token'] == instance_token_img ] img = mtp_input_representation.make_input_representation( instance_token_img, sample_token_img) plt.imshow(img) # Model Implementations
class NuscenesDataset(Dataset): def __init__(self, nusc, helper, maps_dir, save_maps_dataset=False, config_name='predict_2020_icra.json', history=1, num_examples=None, in_agent_frame=True): self.nusc = nusc self.helper = helper #initialize the data set if maps_dir == 'maps_train': dataset_version = "train" elif maps_dir == 'maps': dataset_version = "train_val" elif maps_dir == 'maps_val': dataset_version = "val" #initialize maps directory where everything will be saved self.maps_dir = os.path.join(os.getcwd(), maps_dir) self.data_set = get_prediction_challenge_split( dataset_version, dataroot=self.nusc.dataroot) if num_examples: self.data_set = self.data_set[:num_examples] #initialize rasterizers for map generation self.static_layer_rasterizer = StaticLayerRasterizer(self.helper) self.agent_rasterizer = AgentBoxesWithFadedHistory( self.helper, seconds_of_history=history) self.mtp_input_representation = InputRepresentation( self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer()) self.in_agent_frame = in_agent_frame self.config = load_prediction_config(self.helper, config_name) self.save_maps_dataset = save_maps_dataset if self.save_maps_dataset: self.save_maps() self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") def save_maps(self): ''' Input: None Output: None saves all the maps to the maps_dir directory specified in constructor ''' print("starting to save maps") for i, token in enumerate(self.data_set): instance_token_img, sample_token_img = self.data_set[i].split('_') file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(i)) instance_token_img, sample_token_img = self.data_set[i].split('_') img = self.mtp_input_representation.make_input_representation( instance_token_img, sample_token_img) im = Image.fromarray(img) im.save(file_path) print("{0}/{1} image saved".format(i, len(self.data_set))) print("done saving maps") def __len__(self): return len(self.data_set) #return the image tensor, agent state vector, and the ground truth def __getitem__(self, index): token = self.data_set[index] instance_token_img, sample_token_img = self.data_set[index].split('_') velocity = self.helper.get_velocity_for_agent(instance_token_img, sample_token_img) acceleration = self.helper.get_acceleration_for_agent( instance_token_img, sample_token_img) heading = self.helper.get_heading_change_rate_for_agent( instance_token_img, sample_token_img) #using a padding token of -1 if np.isnan(velocity) or np.isnan(acceleration) or np.isnan(heading): velocity = acceleration = heading = -1 #construct agent state vector agent_state_vec = torch.Tensor([velocity, acceleration, heading]) #change image from (N,N,3) -> (3, N, N), will have data loader take care of unsqueezing outputs (Batch Size, 3, N, N) #get image and construct tensor file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(index)) im = Image.open(file_path) img = np.array(im) image_tensor = torch.Tensor(img).permute(2, 0, 1) #get ground truth ground_truth = self.helper.get_future_for_agent( instance_token_img, sample_token_img, self.config.seconds, in_agent_frame=self.in_agent_frame) ground_truth = torch.Tensor(ground_truth).unsqueeze(0) return image_tensor, agent_state_vec, ground_truth, token
class NS(Dataset): def __init__(self, dataroot: str, split: str, t_h: float = 2, t_f: float = 6, grid_dim: int = 25, img_size: int = 200, horizon: int = 40, grid_extent: Tuple[int, int, int, int] = (-25, 25, -10, 40), num_actions: int = 4, image_extraction_mode: bool = False): """ Initializes dataset class for nuScenes prediction :param dataroot: Path to tables and data :param split: Dataset split for prediction benchmark ('train'/'train_val'/'val') :param t_h: Track history in seconds :param t_f: Prediction horizon in seconds :param grid_dim: Size of grid, default: 25x25 :param img_size: Size of raster map image in pixels, default: 200x200 :param horizon: MDP horizon :param grid_extent: Map extents in meters, (-left, right, -behind, front) :param num_actions: Number of actions for each state (4: [D,R,U,L] or 8: [D, R, U, L, DR, UR, DL, UL]) :param image_extraction_mode: Whether dataset class is being used for image extraction """ # Nuscenes dataset and predict helper self.dataroot = dataroot self.ns = NuScenes('v1.0-trainval', dataroot=dataroot) self.helper = PredictHelper(self.ns) self.token_list = get_prediction_challenge_split(split, dataroot=dataroot) # Useful parameters self.grid_dim = grid_dim self.grid_extent = grid_extent self.img_size = img_size self.t_f = t_f self.t_h = t_h self.horizon = horizon self.num_actions = num_actions # Map row, column and velocity states to actual values grid_size_m = self.grid_extent[1] - self.grid_extent[0] self.row_centers = np.linspace( self.grid_extent[3] - grid_size_m / (self.grid_dim * 2), self.grid_extent[2] + grid_size_m / (self.grid_dim * 2), self.grid_dim) self.col_centers = np.linspace( self.grid_extent[0] + grid_size_m / (self.grid_dim * 2), self.grid_extent[1] - grid_size_m / (self.grid_dim * 2), self.grid_dim) # Surrounding agent input representation: populate grid with velocity, acc, yaw-rate self.agent_ip = AgentMotionStatesOnGrid(self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) # Image extraction mode is used for extracting map images offline prior to training self.image_extraction_mode = image_extraction_mode if self.image_extraction_mode: # Raster map representation self.map_ip = StaticLayerRasterizer(self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) # Raster map with agent boxes. Only used for visualization static_layer_rasterizer = StaticLayerRasterizer( self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) agent_rasterizer = AgentBoxesWithFadedHistory( self.helper, seconds_of_history=1, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) self.map_ip_agents = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) def __len__(self): return len(self.token_list) def __getitem__(self, idx): """ Returns inputs, ground truth values and other utilities for data point at given index :return hist: snippet of track history, default 2s at 0.5 Hz sampling frequency :return fut: ground truth future trajectory, default 6s at 0.5 Hz sampling frequency :return img: Imagenet normalized bird's eye view map around the target vehicle :return svf_e: Goal and path state visitation frequencies for expert demonstration, ie. path from train set :return motion_feats: motion and position features used for reward model :return waypts_e: (x,y) BEV co-ordinates corresponding to grid cells of svf_e :return agents: tensor of surrounding agent states populated in grid around target agent :return grid_idcs: grid co-ordinates of svf_e :return bc_targets: ground truth actions for training behavior cloning model :return img_agents: image with agent boxes for visualization / debugging :return instance_token: nuScenes instance token for prediction instance :return sample_token: nuScenes sample token for prediction instance :return idx: instance id (mainly for debugging) """ # Nuscenes instance and sample token for prediction data point instance_token, sample_token = self.token_list[idx].split("_") # If dataset is being used for image extraction grid_size_m = self.grid_extent[1] - self.grid_extent[0] if self.image_extraction_mode: # Make directory to store raster map images img_dir = os.path.join( self.dataroot, 'prediction_raster_maps', 'images' + str(self.img_size) + "_" + str(int(grid_size_m)) + 'm') if not os.path.isdir(img_dir): os.mkdir(img_dir) # Generate and save raster map image with just static elements img = self.map_ip.make_representation(instance_token, sample_token) img_save = Image.fromarray(img) img_save.save( os.path.join(img_dir, instance_token + "_" + sample_token + '.png')) # Generate and save raster map image with static elements and agent boxes (for visualization only) img_agents = self.map_ip_agents.make_input_representation( instance_token, sample_token) img_agents_save = Image.fromarray(img_agents) img_agents_save.save( os.path.join( img_dir, instance_token + "_" + sample_token + 'agents.png')) # Return dummy values return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 # If dataset is being used for training/validation/testing else: # Get track history for agent: hist = self.get_hist(instance_token, sample_token) hist = torch.from_numpy(hist) # Get ground truth future for agent: fut = self.helper.get_future_for_agent(instance_token, sample_token, seconds=self.t_f, in_agent_frame=True) fut = torch.from_numpy(fut) # Get indefinite future for computing expert State visitation frequencies (SVF): fut_indefinite = self.helper.get_future_for_agent( instance_token, sample_token, seconds=300, in_agent_frame=True) # Up sample indefinite future by a factor of 10 fut_interpolated = np.zeros((fut_indefinite.shape[0] * 10 + 1, 2)) param_query = np.linspace(0, fut_indefinite.shape[0], fut_indefinite.shape[0] * 10 + 1) param_given = np.linspace(0, fut_indefinite.shape[0], fut_indefinite.shape[0] + 1) val_given_x = np.concatenate(([0], fut_indefinite[:, 0])) val_given_y = np.concatenate(([0], fut_indefinite[:, 1])) fut_interpolated[:, 0] = np.interp(param_query, param_given, val_given_x) fut_interpolated[:, 1] = np.interp(param_query, param_given, val_given_y) # Read pre-extracted raster map image img_dir = os.path.join( self.dataroot, 'prediction_raster_maps', 'images' + str(self.img_size) + "_" + str(int(grid_size_m)) + 'm') img = cv2.imread( os.path.join(img_dir, instance_token + "_" + sample_token + '.png')) # Pre-process image img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute((2, 0, 1)).float() / 255 # Normalize using Imagenet stats img = normalize_imagenet(img) # Read pre-extracted raster map with agent boxes (for visualization + debugging) img_agents = cv2.imread( os.path.join( img_dir, instance_token + "_" + sample_token + 'agents.png')) # Pre-process image img_agents = cv2.cvtColor(img_agents, cv2.COLOR_BGR2RGB) img_agents = torch.from_numpy(img_agents) img_agents = img_agents.permute((2, 0, 1)).float() / 255 # Get surrounding agent states agents = torch.from_numpy( self.agent_ip.make_representation(instance_token, sample_token)) agents = agents.permute((2, 0, 1)).float() # Sum pool states to down-sample to grid dimensions agents = f.avg_pool2d(agents[None, :, :, :], self.img_size // self.grid_dim) agents = agents.squeeze(dim=0) * ( (self.img_size // self.grid_dim)**2) # Get expert SVF: svf_e, waypts_e, grid_idcs = self.get_expert_waypoints( fut_interpolated) svf_e = torch.from_numpy(svf_e) waypts_e = torch.from_numpy(waypts_e) grid_idcs = torch.from_numpy(grid_idcs) # Get motion and position feats: motion_feats = self.get_motion_feats(instance_token, sample_token) motion_feats = torch.from_numpy(motion_feats) # Targets for behavior cloning model: bc_targets = self.get_bc_targets(fut_interpolated) bc_targets = torch.from_numpy(bc_targets) return hist, fut, img, svf_e, motion_feats, waypts_e, agents, grid_idcs, bc_targets, img_agents, \ instance_token, sample_token, idx def get_hist(self, instance_token: str, sample_token: str): """ Function to get track history of agent :param instance_token: nuScenes instance token for datapoint :param sample_token nuScenes sample token for datapoint """ # x, y co-ordinates in agent's frame of reference xy = self.helper.get_past_for_agent(instance_token, sample_token, seconds=self.t_h, in_agent_frame=True) # Get all history records for obtaining velocity, acceleration and turn rate values hist_records = self.helper.get_past_for_agent(instance_token, sample_token, seconds=self.t_h, in_agent_frame=True, just_xy=False) if xy.shape[0] > self.t_h * 2: xy = xy[0:int(self.t_h) * 2] if len(hist_records) > self.t_h * 2: hist_records = hist_records[0:int(self.t_h) * 2] # Initialize hist tensor and set x and y co-ordinates returned by prediction helper hist = np.zeros((xy.shape[0], 5)) hist[:, 0:2] = xy # Instance and sample tokens from history records i_tokens = [ hist_records[i]['instance_token'] for i in range(len(hist_records)) ] i_tokens.insert(0, instance_token) s_tokens = [ hist_records[i]['sample_token'] for i in range(len(hist_records)) ] s_tokens.insert(0, sample_token) # Set velocity, acc and turn rate values for hist for k in range(hist.shape[0]): i_t = i_tokens[k] s_t = s_tokens[k] v = self.helper.get_velocity_for_agent(i_t, s_t) a = self.helper.get_acceleration_for_agent(i_t, s_t) theta = self.helper.get_heading_change_rate_for_agent(i_t, s_t) # If function returns nan values due to short tracks, set corresponding value to 0 if np.isnan(v): v = 0 if np.isnan(a): a = 0 if np.isnan(theta): theta = 0 hist[k, 2] = v hist[k, 3] = a hist[k, 4] = theta # Zero pad for track histories shorter than t_h hist_zeropadded = np.zeros((int(self.t_h) * 2, 5)) # Flip to have correct order of timestamps hist = np.flip(hist, 0) hist_zeropadded[-hist.shape[0]:] = hist return hist_zeropadded def get_expert_waypoints(self, fut: np.ndarray): """ Function to get the expert's state visitation frequencies based on their trajectory :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10 """ # Expert state visitation frequencies for training reward model, waypoints in meters and grid indices svf_e = np.zeros((2, self.grid_dim, self.grid_dim)) waypts_e = np.zeros((self.horizon, 2)) grid_idcs = np.zeros((self.horizon, 2)) count = 0 row_prev = np.nan column_prev = np.nan for k in range(fut.shape[0]): # Convert trajectory (x,y) co-ordinates to grid locations: column = np.argmin(np.absolute(fut[k, 0] - self.col_centers)) row = np.argmin(np.absolute(fut[k, 1] - self.row_centers)) # Demonstration ends when expert leaves the image crop corresponding to the grid: if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and \ self.grid_extent[2] <= fut[k, 1] <= self.grid_extent[3]: # Check if cell location has changed if row != row_prev or column != column_prev: # Add cell location to path states of expert svf_e[0, row.astype(int), column.astype(int)] = 1 if count < self.horizon: # Get BEV coordinates corresponding to cell locations waypts_e[count, 0] = self.row_centers[row] waypts_e[count, 1] = self.col_centers[column] grid_idcs[count, 0] = row grid_idcs[count, 1] = column count += 1 else: break column_prev = column row_prev = row # Last cell location where demonstration terminates is the goal state: svf_e[1, row_prev.astype(int), column_prev.astype(int)] = 1 return svf_e, waypts_e, grid_idcs def get_motion_feats(self, instance_token: str, sample_token: str): """ Function to get motion and position features over grid for reward model :param instance_token: NuScenes instance token for datapoint :param sample_token: NuScenes sample token for datapoint """ feats = np.zeros((3, self.grid_dim, self.grid_dim)) # X and Y co-ordinates over grid grid_size_m = self.grid_extent[1] - self.grid_extent[0] y = (np.linspace( self.grid_extent[3] - grid_size_m / (self.grid_dim * 2), self.grid_extent[2] + grid_size_m / (self.grid_dim * 2), self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1) x = (np.linspace( self.grid_extent[0] + grid_size_m / (self.grid_dim * 2), self.grid_extent[1] - grid_size_m / (self.grid_dim * 2), self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1).transpose() # Velocity of agent v = self.helper.get_velocity_for_agent(instance_token, sample_token) if np.isnan(v): v = 0 # Normalize X and Y co-ordinates over grid feats[0] = v feats[1] = x / grid_size_m feats[2] = y / grid_size_m return feats def get_bc_targets(self, fut: np.ndarray): """ Function to get targets for behavior cloning model :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10 """ bc_targets = np.zeros( (self.num_actions + 1, self.grid_dim, self.grid_dim)) column_prev = np.argmin(np.absolute(fut[0, 0] - self.col_centers)) row_prev = np.argmin(np.absolute(fut[0, 1] - self.row_centers)) for k in range(fut.shape[0]): # Convert trajectory (x,y) co-ordinates to grid locations: column = np.argmin(np.absolute(fut[k, 0] - self.col_centers)) row = np.argmin(np.absolute(fut[k, 1] - self.row_centers)) # Demonstration ends when expert leaves the image crop corresponding to the grid: if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and self.grid_extent[2] <= fut[k, 1] <= \ self.grid_extent[3]: # Check if cell location has changed if row != row_prev or column != column_prev: bc_targets[:, int(row_prev), int(column_prev)] = 0 d_x = column - column_prev d_y = row - row_prev theta = np.arctan2(d_y, d_x) # Assign ground truth actions for expert demonstration if self.num_actions == 4: # [D,R,U,L,end] if np.pi / 4 <= theta < 3 * np.pi / 4: bc_targets[0, int(row_prev), int(column_prev)] = 1 elif -np.pi / 4 <= theta < np.pi / 4: bc_targets[1, int(row_prev), int(column_prev)] = 1 elif -3 * np.pi / 4 <= theta < -np.pi / 4: bc_targets[2, int(row_prev), int(column_prev)] = 1 else: bc_targets[3, int(row_prev), int(column_prev)] = 1 else: # [D, R, U, L, DR, UR, DL, UL, end] if 3 * np.pi / 8 <= theta < 5 * np.pi / 8: bc_targets[0, int(row_prev), int(column_prev)] = 1 elif -np.pi / 8 <= theta < np.pi / 8: bc_targets[1, int(row_prev), int(column_prev)] = 1 elif -5 * np.pi / 8 <= theta < -3 * np.pi / 8: bc_targets[2, int(row_prev), int(column_prev)] = 1 elif np.pi / 8 <= theta < 3 * np.pi / 8: bc_targets[4, int(row_prev), int(column_prev)] = 1 elif -3 * np.pi / 8 <= theta < -np.pi / 8: bc_targets[5, int(row_prev), int(column_prev)] = 1 elif 5 * np.pi / 8 <= theta < 7 * np.pi / 8: bc_targets[6, int(row_prev), int(column_prev)] = 1 elif -7 * np.pi / 8 <= theta < -5 * np.pi / 8: bc_targets[7, int(row_prev), int(column_prev)] = 1 else: bc_targets[3, int(row_prev), int(column_prev)] = 1 else: break column_prev = column row_prev = row # Final action is the end action to transition to the goal state: bc_targets[self.num_actions, int(row_prev), int(column_prev)] = 1 return bc_targets
def __getitem__(self, test_idx): #get the scene scene = self.trainset[test_idx] #get all the tokens in the scene #List of scene tokens in the given scene where each item comprises of an instance token and a sample token seperated by underscore scene_tokens = self.prediction_scenes[scene] #Return if fewer than 2 tokens in this scene if len(scene_tokens) < 2: print("Not enough agents in the scene") return [] #get the tokens in the scene: we will be using the instance tokens as that is the agent in the scene tokens = [scene_tok.split("_") for scene_tok in scene_tokens] #List of instance tokens and sample tokens instance_tokens, sample_tokens = list(list(zip(*tokens))[0]), list( list(zip(*tokens))[1]) assert len(instance_tokens) == len( sample_tokens), "Instance and Sample tokens count does not match" ''' 1. Convert list of sample and instance tokens into an ordered dict where sample tokens are the keys 2. Iterate over all combinations (of length TRAJECOTRY_TIME_INTERVAL) of consecutive samples 3. Form a list of data points where each data point has TRAJECOTRY_TIME_INTERVAL sample tokens where each sample token has data for all instance tokens identified in step 2 4. Create 3 numy arrays each for coordinates, heading_change_rate and map with appropriate shapes 5. Iterate: per sample per instance and fill in numpy arrays with respective data 6. Form a dict containing the 3 numpyarrays and return it ''' ordered_tokens = OrderedDict(zip(sample_tokens, instance_tokens)) print("Printing Ordered_tokens: ", ordered_tokens) return [] #Dictionary containing count for number of samples per token token_count = Counter(instance_tokens) #used to find n agents with highest number of sample_tokens minCount = sorted(list(token_count.values()), reverse=True)[NUM_AGENTS - 1] #Convert isntance and sample tokens to dict format instance_sample_tokens = {} for instance_token, sample_token in zip(instance_tokens, sample_tokens): if token_count[instance_token] >= minCount: try: instance_sample_tokens[instance_token].append(sample_token) except: instance_sample_tokens[instance_token] = [sample_token] # print("Instance:samples ===============================================================================") # print(instance_sample_tokens) if len(list(instance_sample_tokens.keys())) != NUM_AGENTS: print() # print("Instance_sample_tokens: \n", instance_sample_tokens) ''' Format: {coordinates: [[coord_at_t0, coord_at_t1, coord_at_t2, ..., coord_at_tTAJECTORY_TIME_INTERVAL],...numDatapointsInScene ], heading_change_rate; [[h_at_t0, h_at_t1, h_at_t2, ..., h_at_tTAJECTORY_TIME_INTERVAL], ...numDatapointaInScene] } ''' #Initialize map rasterizers static_layer_rasterizer = StaticLayerRasterizer(self.helper) agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=2.5) mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) #Initialize Output data output_data = { "coordinates": np.zeros((len(instance_sample_tokens.keys()), 1)), "heading_change_rate": np.zeros((len(instance_sample_tokens.keys()), 1)), "map": [0] * len(instance_sample_tokens.keys()) } for t, instance_token in enumerate(instance_sample_tokens.keys()): instance_coordinates = np.zeros((int( len(instance_sample_tokens[instance_token]) / TRAJECTORY_TIME_INTERVAL), TRAJECTORY_TIME_INTERVAL, 3)) instance_heading_change_rate = np.zeros((int( len(instance_sample_tokens[instance_token]) / TRAJECTORY_TIME_INTERVAL), TRAJECTORY_TIME_INTERVAL)) print("Shape of instance_coordinates: ", instance_coordinates.shape) idx = 0 #0 --> numData points for this instance (dimension 1) num = 0 #0 --> TRAJECTORY_TIME_INTERVAL (dimension 2) for sample_token in (instance_sample_tokens[instance_token]): # print(idx, " ", num) # print(self.nusc.get('sample', sample_token)["timestamp"]) #how to get the annotation for the instance in the sample annotation = self.helper.get_sample_annotation( instance_token, sample_token) instance_coordinates[idx][num] = annotation["translation"] #get the heading change rate of the agent heading_change_rate = self.helper.get_heading_change_rate_for_agent( instance_token, sample_token) instance_heading_change_rate[idx][num] = heading_change_rate num = num + 1 #reached the number of records per sample if num == TRAJECTORY_TIME_INTERVAL: idx = idx + 1 num = 0 if idx == instance_coordinates.shape[0]: break img = mtp_input_representation.make_input_representation( instance_token, sample_token) # cv2.imshow("map",img) output_data["map"][t] = (img) # plt.imsave('test'+str(test_idx)+str(t)+'.jpg',img) output_data["coordinates"][t] = instance_coordinates output_data["heading_change_rate"][ t] = instance_heading_change_rate # test = pd.DataFrame(output_data,columns=["coordinates", "heading_change_rate", "map"]) # test.to_csv('test'+str(test_idx)+'.csv') print("Printing Output data") print((output_data["coordinates"])) print(len(output_data["heading_change_rate"])) print(len(output_data["coordinates"])) return output_data