Exemplo n.º 1
0
    def __init__(self,
                 instance_sample_tokens,
                 helper):
        self.instance_sample_tokens = instance_sample_tokens
        self.helper = helper

        self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        self.agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=SECONDS_OF_HISTORY)
        self.mtp_input_representation = InputRepresentation(
            self.static_layer_rasterizer,
            self.agent_rasterizer,
            Rasterizer())

        self.transform_fn = transforms.Normalize(
                                mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    def __init__(self,
                 nusc,
                 helper,
                 maps_dir,
                 save_maps_dataset=False,
                 config_name='predict_2020_icra.json',
                 history=1,
                 num_examples=None,
                 in_agent_frame=True):

        self.nusc = nusc
        self.helper = helper

        #initialize the data set
        if maps_dir == 'maps_train':
            dataset_version = "train"
        elif maps_dir == 'maps':
            dataset_version = "train_val"
        elif maps_dir == 'maps_val':
            dataset_version = "val"

        #initialize maps directory where everything will be saved
        self.maps_dir = os.path.join(os.getcwd(), maps_dir)
        self.data_set = get_prediction_challenge_split(
            dataset_version, dataroot=self.nusc.dataroot)

        if num_examples:
            self.data_set = self.data_set[:num_examples]

        #initialize rasterizers for map generation
        self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        self.agent_rasterizer = AgentBoxesWithFadedHistory(
            self.helper, seconds_of_history=history)
        self.mtp_input_representation = InputRepresentation(
            self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer())

        self.in_agent_frame = in_agent_frame

        self.config = load_prediction_config(self.helper, config_name)

        self.save_maps_dataset = save_maps_dataset

        if self.save_maps_dataset:
            self.save_maps()

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
Exemplo n.º 3
0
class CoverNetBaseline:
    def __init__(self, sec_from_now: float, helper: PredictHelper):
        """
        Inits Baseline.
        :param sec_from_now: How many seconds into the future to make the prediction.
        :param helper: Instance of PredictHelper.
        """
        assert sec_from_now % 0.5 == 0, f"Parameter sec from now must be divisible by 0.5. Received {sec_from_now}."
        self.helper = helper
        self.sec_from_now = sec_from_now
        self.sampled_at = 2  # 2 Hz between annotations.

        backbone = ResNetBackbone('resnet50')
        self.mtp = MTP(backbone, num_modes=2)

        self.covernet = CoverNet(backbone, num_modes=64)    # Note that the value of num_modes depends on the size of the lattice used for CoverNet.

        static_layer_rasterizer = StaticLayerRasterizer(helper)
        agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=1)
        self.mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer())

        self.trajectories = pickle.load(open(PATH_TO_EPSILON_8_SET, 'rb'))
        self.trajectories = torch.Tensor(self.trajectories)

    def __call__(self, token: str) -> Prediction:
        """
        Makes prediction.
        :param token: string of format {instance_token}_{sample_token}.
        """
        instance_token_img, sample_token_img = token.split("_")
        # kinematics = _kinematics_from_tokens(self.helper, instance, sample)
        # cv_heading = _constant_velocity_heading_from_kinematics(kinematics, self.sec_from_now, self.sampled_at)

        # anns = [ann for ann in nuscenes.sample_annotation if ann['instance_token'] == instance_token_img]
        img = self.mtp_input_representation.make_input_representation(instance_token_img, sample_token_img)
        image_tensor = torch.Tensor(img).permute(2, 0, 1).unsqueeze(0)

        # plt.imshow(img)

        agent_state_vector = torch.Tensor([[self.helper.get_velocity_for_agent(instance_token_img, sample_token_img),
                                            self.helper.get_acceleration_for_agent(instance_token_img, sample_token_img),
                                            self.helper.get_heading_change_rate_for_agent(instance_token_img,
                                                                                     sample_token_img)]])

        mtp_out = self.mtp(image_tensor, agent_state_vector)
        covernet_logits = self.covernet(image_tensor, agent_state_vector)

        covernet_probabilities = covernet_logits.argsort(descending=True).squeeze()
        covernet_probabilities = covernet_probabilities[:5]     # Print 5 most likely output
        covernet_trajectories = self.trajectories[covernet_probabilities]
        covernet_trajectories = covernet_trajectories.detach().cpu().numpy()
        covernet_probabilities = covernet_probabilities.detach().cpu().numpy()

        # Need the prediction to have 2d.
        return Prediction(instance_token_img, sample_token_img, covernet_trajectories, covernet_probabilities)
Exemplo n.º 4
0
    def __init__(self, sec_from_now: float, helper: PredictHelper):
        """
        Inits Baseline.
        :param sec_from_now: How many seconds into the future to make the prediction.
        :param helper: Instance of PredictHelper.
        """
        assert sec_from_now % 0.5 == 0, f"Parameter sec from now must be divisible by 0.5. Received {sec_from_now}."
        self.helper = helper
        self.sec_from_now = sec_from_now
        self.sampled_at = 2  # 2 Hz between annotations.

        backbone = ResNetBackbone('resnet50')
        self.mtp = MTP(backbone, num_modes=2)

        self.covernet = CoverNet(backbone, num_modes=64)    # Note that the value of num_modes depends on the size of the lattice used for CoverNet.

        static_layer_rasterizer = StaticLayerRasterizer(helper)
        agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=1)
        self.mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer())

        self.trajectories = pickle.load(open(PATH_TO_EPSILON_8_SET, 'rb'))
        self.trajectories = torch.Tensor(self.trajectories)
Exemplo n.º 5
0
    def get_format_mha_jam_maps(self, states_filepath, out_file):
        with open(states_filepath) as fr:
            agents_states = fr.readlines()

        # format
        # agen t_id, 20x(frame_id, x, y, v, a, yaw_rate)]
        agents_states = [[float(x.rstrip()) for x in s.split(',')]
                         for s in agents_states]

        mode = "train" if out_file.find("_train") != -1 else "val"
        mini = "mini" if out_file.find("mini") != -1 else "main"

        with open("dicts_sample_and_instances_id2token_" + mode + "_" + mini +
                  ".json") as fr:
            instance_dict_id_token, sample_dict_id_token = json.load(fr)

        # Get map for each sample in states
        agent_ind = 0
        static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        agent_rasterizer = AgentBoxesWithFadedHistory(self.helper,
                                                      seconds_of_history=1)
        mtp_input_representation = InputRepresentation(static_layer_rasterizer,
                                                       agent_rasterizer,
                                                       Rasterizer())

        if not os.path.exists(os.path.dirname(out_file)):
            os.makedirs(os.path.dirname(out_file))

        for agent in tqdm(agents_states):
            instance_token = instance_dict_id_token[str(int(agent[0]))]
            mid_frame_id = int(agent[1 + 6 * (MAX_TRAJ_LEN)])
            sample_token = sample_dict_id_token[str(mid_frame_id)]
            img = mtp_input_representation.make_input_representation(
                instance_token, sample_token)
            # img = cv2.resize(img, (1024, 1024))
            cv2.imwrite(
                out_file.replace("_.jpg", "__" + str(agent_ind) + ".jpg"), img)
            agent_ind += 1
Exemplo n.º 6
0
def main(version: str, data_root: str,
         split_name: str, output_dir: str, config_name: str = 'predict_2020_icra.json') -> None:
    """
    Performs inference for all of the baseline models defined in the physics model module.
    :param version: nuScenes data set version.
    :param data_root: Directory where the NuScenes data is stored.
    :param split_name: nuScenes data split name, e.g. train, val, mini_train, etc.
    :param output_dir: Directory where predictions should be stored.
    :param config_name: Name of config file.
    """

    print('timing point A')
    nusc = NuScenes(version=version, dataroot=data_root)
    print('timing point B')
    helper = PredictHelper(nusc)
    print('timing point C')
    dataset = get_prediction_challenge_split(split_name, dataroot=data_root)
    print('timing point D')
    config = load_prediction_config(helper, config_name)
    print('timing point E')

    # rasterization
    static_layer_rasterizer = StaticLayerRasterizer(helper)
    agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=3)
    mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer())

    # loop through training tasks
    for token in dataset[40:60:2]:
        fig, axes = plt.subplots(1, 3, figsize=(18, 9))
        print(token)
        instance_token, sample_token = token.split('_')

        plot_cam_view(axes[1], nusc, token)
        plot_cam_view(axes[2], nusc, token, cam_name='CAM_FRONT_RIGHT')
        axes[0].imshow(mtp_input_representation.make_input_representation(instance_token, sample_token))
    plt.show()
Exemplo n.º 7
0
class MTPDataset(Dataset):
    def __init__(self,
                 instance_sample_tokens,
                 helper):
        self.instance_sample_tokens = instance_sample_tokens
        self.helper = helper

        self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        self.agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=SECONDS_OF_HISTORY)
        self.mtp_input_representation = InputRepresentation(
            self.static_layer_rasterizer,
            self.agent_rasterizer,
            Rasterizer())

        self.transform_fn = transforms.Normalize(
                                mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

    def __len__(self):
        return len(self.instance_sample_tokens)

    def __getitem__(self, idx):
        instance_token, sample_token = self.instance_sample_tokens[idx].split("_")

        image = self.mtp_input_representation.make_input_representation(instance_token, sample_token)
        image = np.reshape(image, (3,500,500))
        image = image/255
        image_tensor = torch.from_numpy(image)
        image_tensor = self.transform_fn(image_tensor)

        agent_state_vector = np.array([self.helper.get_velocity_for_agent(instance_token, sample_token),
                                            self.helper.get_acceleration_for_agent(instance_token, sample_token),
                                            self.helper.get_heading_change_rate_for_agent(instance_token, sample_token)])

        # For MTP: "the targets are of shape [batch_size, 1, n_timesteps, 2]"
        # where n_timesteps = 2 * seconds predicted = 12
        ground_truth = self.helper.get_future_for_agent(instance_token, sample_token, seconds=SECONDS_TO_PREDICT, in_agent_frame=True)


        return (image_tensor, agent_state_vector,
                ground_truth.reshape((1, 12, 2)),
                instance_token, sample_token)
Exemplo n.º 8
0
from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory
from nuscenes.prediction.input_representation.combinators import Rasterizer
from nuscenes.prediction.input_representation.interface import InputRepresentation

from nuscenes import NuScenes

import matplotlib.pyplot as plt
import torch

DATAROOT = '/data/sets/nuscenes'

nuscenes = NuScenes('v1.0-mini', dataroot=DATAROOT)
# Data Splits for the Prediction Challenge

# input representation
static_layer_rasterizer = StaticLayerRasterizer(helper)
agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history)
mtp_input_representation = InputRepresentation(static_layer_rasterizer,
                                               agent_rasterizer, Rasterizer())

instance_token_img, sample_token_img = 'bc38961ca0ac4b14ab90e547ba79fbb6', '7626dde27d604ac28a0240bdd54eba7a'
anns = [
    ann for ann in nuscenes.sample_annotation
    if ann['instance_token'] == instance_token_img
]
img = mtp_input_representation.make_input_representation(
    instance_token_img, sample_token_img)
plt.imshow(img)

# Model Implementations
class NuscenesDataset(Dataset):
    def __init__(self,
                 nusc,
                 helper,
                 maps_dir,
                 save_maps_dataset=False,
                 config_name='predict_2020_icra.json',
                 history=1,
                 num_examples=None,
                 in_agent_frame=True):

        self.nusc = nusc
        self.helper = helper

        #initialize the data set
        if maps_dir == 'maps_train':
            dataset_version = "train"
        elif maps_dir == 'maps':
            dataset_version = "train_val"
        elif maps_dir == 'maps_val':
            dataset_version = "val"

        #initialize maps directory where everything will be saved
        self.maps_dir = os.path.join(os.getcwd(), maps_dir)
        self.data_set = get_prediction_challenge_split(
            dataset_version, dataroot=self.nusc.dataroot)

        if num_examples:
            self.data_set = self.data_set[:num_examples]

        #initialize rasterizers for map generation
        self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        self.agent_rasterizer = AgentBoxesWithFadedHistory(
            self.helper, seconds_of_history=history)
        self.mtp_input_representation = InputRepresentation(
            self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer())

        self.in_agent_frame = in_agent_frame

        self.config = load_prediction_config(self.helper, config_name)

        self.save_maps_dataset = save_maps_dataset

        if self.save_maps_dataset:
            self.save_maps()

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

    def save_maps(self):
        '''
        Input: None 
        Output: None

        saves all the maps to the maps_dir directory specified in constructor
        
        '''
        print("starting to save maps")
        for i, token in enumerate(self.data_set):
            instance_token_img, sample_token_img = self.data_set[i].split('_')

            file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(i))

            instance_token_img, sample_token_img = self.data_set[i].split('_')
            img = self.mtp_input_representation.make_input_representation(
                instance_token_img, sample_token_img)
            im = Image.fromarray(img)
            im.save(file_path)

            print("{0}/{1} image saved".format(i, len(self.data_set)))

        print("done saving maps")

    def __len__(self):
        return len(self.data_set)

    #return the image tensor, agent state vector, and the ground truth
    def __getitem__(self, index):
        token = self.data_set[index]
        instance_token_img, sample_token_img = self.data_set[index].split('_')

        velocity = self.helper.get_velocity_for_agent(instance_token_img,
                                                      sample_token_img)
        acceleration = self.helper.get_acceleration_for_agent(
            instance_token_img, sample_token_img)
        heading = self.helper.get_heading_change_rate_for_agent(
            instance_token_img, sample_token_img)

        #using a padding token of -1
        if np.isnan(velocity) or np.isnan(acceleration) or np.isnan(heading):
            velocity = acceleration = heading = -1

        #construct agent state vector
        agent_state_vec = torch.Tensor([velocity, acceleration, heading])

        #change image from (N,N,3) -> (3, N, N), will have data loader take care of unsqueezing outputs (Batch Size, 3, N, N)
        #get image and construct tensor
        file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(index))

        im = Image.open(file_path)
        img = np.array(im)
        image_tensor = torch.Tensor(img).permute(2, 0, 1)

        #get ground truth
        ground_truth = self.helper.get_future_for_agent(
            instance_token_img,
            sample_token_img,
            self.config.seconds,
            in_agent_frame=self.in_agent_frame)

        ground_truth = torch.Tensor(ground_truth).unsqueeze(0)
        return image_tensor, agent_state_vec, ground_truth, token
Exemplo n.º 10
0
    def __init__(self,
                 dataroot: str,
                 split: str,
                 t_h: float = 2,
                 t_f: float = 6,
                 grid_dim: int = 25,
                 img_size: int = 200,
                 horizon: int = 40,
                 grid_extent: Tuple[int, int, int, int] = (-25, 25, -10, 40),
                 num_actions: int = 4,
                 image_extraction_mode: bool = False):
        """
        Initializes dataset class for nuScenes prediction

        :param dataroot: Path to tables and data
        :param split: Dataset split for prediction benchmark ('train'/'train_val'/'val')
        :param t_h: Track history in seconds
        :param t_f: Prediction horizon in seconds
        :param grid_dim: Size of grid, default: 25x25
        :param img_size: Size of raster map image in pixels, default: 200x200
        :param horizon: MDP horizon
        :param grid_extent: Map extents in meters, (-left, right, -behind, front)
        :param num_actions: Number of actions for each state (4: [D,R,U,L] or 8: [D, R, U, L, DR, UR, DL, UL])
        :param image_extraction_mode: Whether dataset class is being used for image extraction
        """

        # Nuscenes dataset and predict helper
        self.dataroot = dataroot
        self.ns = NuScenes('v1.0-trainval', dataroot=dataroot)
        self.helper = PredictHelper(self.ns)
        self.token_list = get_prediction_challenge_split(split,
                                                         dataroot=dataroot)

        # Useful parameters
        self.grid_dim = grid_dim
        self.grid_extent = grid_extent
        self.img_size = img_size
        self.t_f = t_f
        self.t_h = t_h
        self.horizon = horizon
        self.num_actions = num_actions

        # Map row, column and velocity states to actual values
        grid_size_m = self.grid_extent[1] - self.grid_extent[0]
        self.row_centers = np.linspace(
            self.grid_extent[3] - grid_size_m / (self.grid_dim * 2),
            self.grid_extent[2] + grid_size_m / (self.grid_dim * 2),
            self.grid_dim)

        self.col_centers = np.linspace(
            self.grid_extent[0] + grid_size_m / (self.grid_dim * 2),
            self.grid_extent[1] - grid_size_m / (self.grid_dim * 2),
            self.grid_dim)

        # Surrounding agent input representation: populate grid with velocity, acc, yaw-rate
        self.agent_ip = AgentMotionStatesOnGrid(self.helper,
                                                resolution=grid_size_m /
                                                img_size,
                                                meters_ahead=grid_extent[3],
                                                meters_behind=-grid_extent[2],
                                                meters_left=-grid_extent[0],
                                                meters_right=grid_extent[1])

        # Image extraction mode is used for extracting map images offline prior to training
        self.image_extraction_mode = image_extraction_mode
        if self.image_extraction_mode:

            # Raster map representation
            self.map_ip = StaticLayerRasterizer(self.helper,
                                                resolution=grid_size_m /
                                                img_size,
                                                meters_ahead=grid_extent[3],
                                                meters_behind=-grid_extent[2],
                                                meters_left=-grid_extent[0],
                                                meters_right=grid_extent[1])

            # Raster map with agent boxes. Only used for visualization
            static_layer_rasterizer = StaticLayerRasterizer(
                self.helper,
                resolution=grid_size_m / img_size,
                meters_ahead=grid_extent[3],
                meters_behind=-grid_extent[2],
                meters_left=-grid_extent[0],
                meters_right=grid_extent[1])

            agent_rasterizer = AgentBoxesWithFadedHistory(
                self.helper,
                seconds_of_history=1,
                resolution=grid_size_m / img_size,
                meters_ahead=grid_extent[3],
                meters_behind=-grid_extent[2],
                meters_left=-grid_extent[0],
                meters_right=grid_extent[1])

            self.map_ip_agents = InputRepresentation(static_layer_rasterizer,
                                                     agent_rasterizer,
                                                     Rasterizer())
Exemplo n.º 11
0
class NS(Dataset):
    def __init__(self,
                 dataroot: str,
                 split: str,
                 t_h: float = 2,
                 t_f: float = 6,
                 grid_dim: int = 25,
                 img_size: int = 200,
                 horizon: int = 40,
                 grid_extent: Tuple[int, int, int, int] = (-25, 25, -10, 40),
                 num_actions: int = 4,
                 image_extraction_mode: bool = False):
        """
        Initializes dataset class for nuScenes prediction

        :param dataroot: Path to tables and data
        :param split: Dataset split for prediction benchmark ('train'/'train_val'/'val')
        :param t_h: Track history in seconds
        :param t_f: Prediction horizon in seconds
        :param grid_dim: Size of grid, default: 25x25
        :param img_size: Size of raster map image in pixels, default: 200x200
        :param horizon: MDP horizon
        :param grid_extent: Map extents in meters, (-left, right, -behind, front)
        :param num_actions: Number of actions for each state (4: [D,R,U,L] or 8: [D, R, U, L, DR, UR, DL, UL])
        :param image_extraction_mode: Whether dataset class is being used for image extraction
        """

        # Nuscenes dataset and predict helper
        self.dataroot = dataroot
        self.ns = NuScenes('v1.0-trainval', dataroot=dataroot)
        self.helper = PredictHelper(self.ns)
        self.token_list = get_prediction_challenge_split(split,
                                                         dataroot=dataroot)

        # Useful parameters
        self.grid_dim = grid_dim
        self.grid_extent = grid_extent
        self.img_size = img_size
        self.t_f = t_f
        self.t_h = t_h
        self.horizon = horizon
        self.num_actions = num_actions

        # Map row, column and velocity states to actual values
        grid_size_m = self.grid_extent[1] - self.grid_extent[0]
        self.row_centers = np.linspace(
            self.grid_extent[3] - grid_size_m / (self.grid_dim * 2),
            self.grid_extent[2] + grid_size_m / (self.grid_dim * 2),
            self.grid_dim)

        self.col_centers = np.linspace(
            self.grid_extent[0] + grid_size_m / (self.grid_dim * 2),
            self.grid_extent[1] - grid_size_m / (self.grid_dim * 2),
            self.grid_dim)

        # Surrounding agent input representation: populate grid with velocity, acc, yaw-rate
        self.agent_ip = AgentMotionStatesOnGrid(self.helper,
                                                resolution=grid_size_m /
                                                img_size,
                                                meters_ahead=grid_extent[3],
                                                meters_behind=-grid_extent[2],
                                                meters_left=-grid_extent[0],
                                                meters_right=grid_extent[1])

        # Image extraction mode is used for extracting map images offline prior to training
        self.image_extraction_mode = image_extraction_mode
        if self.image_extraction_mode:

            # Raster map representation
            self.map_ip = StaticLayerRasterizer(self.helper,
                                                resolution=grid_size_m /
                                                img_size,
                                                meters_ahead=grid_extent[3],
                                                meters_behind=-grid_extent[2],
                                                meters_left=-grid_extent[0],
                                                meters_right=grid_extent[1])

            # Raster map with agent boxes. Only used for visualization
            static_layer_rasterizer = StaticLayerRasterizer(
                self.helper,
                resolution=grid_size_m / img_size,
                meters_ahead=grid_extent[3],
                meters_behind=-grid_extent[2],
                meters_left=-grid_extent[0],
                meters_right=grid_extent[1])

            agent_rasterizer = AgentBoxesWithFadedHistory(
                self.helper,
                seconds_of_history=1,
                resolution=grid_size_m / img_size,
                meters_ahead=grid_extent[3],
                meters_behind=-grid_extent[2],
                meters_left=-grid_extent[0],
                meters_right=grid_extent[1])

            self.map_ip_agents = InputRepresentation(static_layer_rasterizer,
                                                     agent_rasterizer,
                                                     Rasterizer())

    def __len__(self):
        return len(self.token_list)

    def __getitem__(self, idx):
        """
        Returns inputs, ground truth values and other utilities for data point at given index

        :return hist: snippet of track history, default 2s at 0.5 Hz sampling frequency
        :return fut: ground truth future trajectory, default 6s at 0.5 Hz sampling frequency
        :return img: Imagenet normalized bird's eye view map around the target vehicle
        :return svf_e: Goal and path state visitation frequencies for expert demonstration, ie. path from train set
        :return motion_feats: motion and position features used for reward model
        :return waypts_e: (x,y) BEV co-ordinates corresponding to grid cells of svf_e
        :return agents: tensor of surrounding agent states populated in grid around target agent
        :return grid_idcs: grid co-ordinates of svf_e
        :return bc_targets: ground truth actions for training behavior cloning model
        :return img_agents: image with agent boxes for visualization / debugging
        :return instance_token: nuScenes instance token for prediction instance
        :return sample_token: nuScenes sample token for prediction instance
        :return idx: instance id (mainly for debugging)
        """

        # Nuscenes instance and sample token for prediction data point
        instance_token, sample_token = self.token_list[idx].split("_")

        # If dataset is being used for image extraction
        grid_size_m = self.grid_extent[1] - self.grid_extent[0]
        if self.image_extraction_mode:

            # Make directory to store raster map images
            img_dir = os.path.join(
                self.dataroot, 'prediction_raster_maps', 'images' +
                str(self.img_size) + "_" + str(int(grid_size_m)) + 'm')
            if not os.path.isdir(img_dir):
                os.mkdir(img_dir)

            # Generate and save raster map image with just static elements
            img = self.map_ip.make_representation(instance_token, sample_token)
            img_save = Image.fromarray(img)
            img_save.save(
                os.path.join(img_dir,
                             instance_token + "_" + sample_token + '.png'))

            # Generate and save raster map image with static elements and agent boxes (for visualization only)
            img_agents = self.map_ip_agents.make_input_representation(
                instance_token, sample_token)
            img_agents_save = Image.fromarray(img_agents)
            img_agents_save.save(
                os.path.join(
                    img_dir,
                    instance_token + "_" + sample_token + 'agents.png'))

            # Return dummy values
            return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

        # If dataset is being used for training/validation/testing
        else:

            # Get track history for agent:
            hist = self.get_hist(instance_token, sample_token)
            hist = torch.from_numpy(hist)

            # Get ground truth future for agent:
            fut = self.helper.get_future_for_agent(instance_token,
                                                   sample_token,
                                                   seconds=self.t_f,
                                                   in_agent_frame=True)
            fut = torch.from_numpy(fut)

            # Get indefinite future for computing expert State visitation frequencies (SVF):
            fut_indefinite = self.helper.get_future_for_agent(
                instance_token, sample_token, seconds=300, in_agent_frame=True)

            # Up sample indefinite future by a factor of 10
            fut_interpolated = np.zeros((fut_indefinite.shape[0] * 10 + 1, 2))
            param_query = np.linspace(0, fut_indefinite.shape[0],
                                      fut_indefinite.shape[0] * 10 + 1)
            param_given = np.linspace(0, fut_indefinite.shape[0],
                                      fut_indefinite.shape[0] + 1)
            val_given_x = np.concatenate(([0], fut_indefinite[:, 0]))
            val_given_y = np.concatenate(([0], fut_indefinite[:, 1]))
            fut_interpolated[:, 0] = np.interp(param_query, param_given,
                                               val_given_x)
            fut_interpolated[:, 1] = np.interp(param_query, param_given,
                                               val_given_y)

            # Read pre-extracted raster map image
            img_dir = os.path.join(
                self.dataroot, 'prediction_raster_maps', 'images' +
                str(self.img_size) + "_" + str(int(grid_size_m)) + 'm')
            img = cv2.imread(
                os.path.join(img_dir,
                             instance_token + "_" + sample_token + '.png'))

            # Pre-process image
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = torch.from_numpy(img)
            img = img.permute((2, 0, 1)).float() / 255

            # Normalize using Imagenet stats
            img = normalize_imagenet(img)

            # Read pre-extracted raster map with agent boxes (for visualization + debugging)
            img_agents = cv2.imread(
                os.path.join(
                    img_dir,
                    instance_token + "_" + sample_token + 'agents.png'))

            # Pre-process image
            img_agents = cv2.cvtColor(img_agents, cv2.COLOR_BGR2RGB)
            img_agents = torch.from_numpy(img_agents)
            img_agents = img_agents.permute((2, 0, 1)).float() / 255

            # Get surrounding agent states
            agents = torch.from_numpy(
                self.agent_ip.make_representation(instance_token,
                                                  sample_token))
            agents = agents.permute((2, 0, 1)).float()

            # Sum pool states to down-sample to grid dimensions
            agents = f.avg_pool2d(agents[None, :, :, :],
                                  self.img_size // self.grid_dim)
            agents = agents.squeeze(dim=0) * (
                (self.img_size // self.grid_dim)**2)

            # Get expert SVF:
            svf_e, waypts_e, grid_idcs = self.get_expert_waypoints(
                fut_interpolated)
            svf_e = torch.from_numpy(svf_e)
            waypts_e = torch.from_numpy(waypts_e)
            grid_idcs = torch.from_numpy(grid_idcs)

            # Get motion and position feats:
            motion_feats = self.get_motion_feats(instance_token, sample_token)
            motion_feats = torch.from_numpy(motion_feats)

            # Targets for behavior cloning model:
            bc_targets = self.get_bc_targets(fut_interpolated)
            bc_targets = torch.from_numpy(bc_targets)

            return hist, fut, img, svf_e, motion_feats, waypts_e, agents, grid_idcs, bc_targets, img_agents, \
                instance_token, sample_token, idx

    def get_hist(self, instance_token: str, sample_token: str):
        """
        Function to get track history of agent
        :param instance_token: nuScenes instance token for datapoint
        :param sample_token nuScenes sample token for datapoint
        """
        # x, y co-ordinates in agent's frame of reference
        xy = self.helper.get_past_for_agent(instance_token,
                                            sample_token,
                                            seconds=self.t_h,
                                            in_agent_frame=True)

        # Get all history records for obtaining velocity, acceleration and turn rate values
        hist_records = self.helper.get_past_for_agent(instance_token,
                                                      sample_token,
                                                      seconds=self.t_h,
                                                      in_agent_frame=True,
                                                      just_xy=False)
        if xy.shape[0] > self.t_h * 2:
            xy = xy[0:int(self.t_h) * 2]
        if len(hist_records) > self.t_h * 2:
            hist_records = hist_records[0:int(self.t_h) * 2]

        # Initialize hist tensor and set x and y co-ordinates returned by prediction helper
        hist = np.zeros((xy.shape[0], 5))
        hist[:, 0:2] = xy

        # Instance and sample tokens from history records
        i_tokens = [
            hist_records[i]['instance_token'] for i in range(len(hist_records))
        ]
        i_tokens.insert(0, instance_token)
        s_tokens = [
            hist_records[i]['sample_token'] for i in range(len(hist_records))
        ]
        s_tokens.insert(0, sample_token)

        # Set velocity, acc and turn rate values for hist
        for k in range(hist.shape[0]):
            i_t = i_tokens[k]
            s_t = s_tokens[k]
            v = self.helper.get_velocity_for_agent(i_t, s_t)
            a = self.helper.get_acceleration_for_agent(i_t, s_t)
            theta = self.helper.get_heading_change_rate_for_agent(i_t, s_t)

            # If function returns nan values due to short tracks, set corresponding value to 0
            if np.isnan(v):
                v = 0
            if np.isnan(a):
                a = 0
            if np.isnan(theta):
                theta = 0
            hist[k, 2] = v
            hist[k, 3] = a
            hist[k, 4] = theta

        # Zero pad for track histories shorter than t_h
        hist_zeropadded = np.zeros((int(self.t_h) * 2, 5))

        # Flip to have correct order of timestamps
        hist = np.flip(hist, 0)
        hist_zeropadded[-hist.shape[0]:] = hist

        return hist_zeropadded

    def get_expert_waypoints(self, fut: np.ndarray):
        """
        Function to get the expert's state visitation frequencies based on their trajectory
        :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10
        """

        # Expert state visitation frequencies for training reward model, waypoints in meters and grid indices
        svf_e = np.zeros((2, self.grid_dim, self.grid_dim))
        waypts_e = np.zeros((self.horizon, 2))
        grid_idcs = np.zeros((self.horizon, 2))

        count = 0
        row_prev = np.nan
        column_prev = np.nan
        for k in range(fut.shape[0]):

            # Convert trajectory (x,y) co-ordinates to grid locations:
            column = np.argmin(np.absolute(fut[k, 0] - self.col_centers))
            row = np.argmin(np.absolute(fut[k, 1] - self.row_centers))

            # Demonstration ends when expert leaves the image crop corresponding to the grid:
            if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and \
                    self.grid_extent[2] <= fut[k, 1] <= self.grid_extent[3]:

                # Check if cell location has changed
                if row != row_prev or column != column_prev:

                    # Add cell location to path states of expert
                    svf_e[0, row.astype(int), column.astype(int)] = 1

                    if count < self.horizon:

                        # Get BEV coordinates corresponding to cell locations
                        waypts_e[count, 0] = self.row_centers[row]
                        waypts_e[count, 1] = self.col_centers[column]
                        grid_idcs[count, 0] = row
                        grid_idcs[count, 1] = column
                        count += 1
            else:
                break
            column_prev = column
            row_prev = row

        # Last cell location where demonstration terminates is the goal state:
        svf_e[1, row_prev.astype(int), column_prev.astype(int)] = 1

        return svf_e, waypts_e, grid_idcs

    def get_motion_feats(self, instance_token: str, sample_token: str):
        """
        Function to get motion and position features over grid for reward model
        :param instance_token: NuScenes instance token for datapoint
        :param sample_token: NuScenes sample token for datapoint
        """
        feats = np.zeros((3, self.grid_dim, self.grid_dim))

        # X and Y co-ordinates over grid
        grid_size_m = self.grid_extent[1] - self.grid_extent[0]
        y = (np.linspace(
            self.grid_extent[3] - grid_size_m / (self.grid_dim * 2),
            self.grid_extent[2] + grid_size_m / (self.grid_dim * 2),
            self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1)
        x = (np.linspace(
            self.grid_extent[0] + grid_size_m / (self.grid_dim * 2),
            self.grid_extent[1] - grid_size_m / (self.grid_dim * 2),
            self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim,
                                                  axis=1).transpose()

        # Velocity of agent
        v = self.helper.get_velocity_for_agent(instance_token, sample_token)
        if np.isnan(v):
            v = 0

        # Normalize X and Y co-ordinates over grid
        feats[0] = v
        feats[1] = x / grid_size_m
        feats[2] = y / grid_size_m

        return feats

    def get_bc_targets(self, fut: np.ndarray):
        """
        Function to get targets for behavior cloning model
        :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10
        """
        bc_targets = np.zeros(
            (self.num_actions + 1, self.grid_dim, self.grid_dim))
        column_prev = np.argmin(np.absolute(fut[0, 0] - self.col_centers))
        row_prev = np.argmin(np.absolute(fut[0, 1] - self.row_centers))

        for k in range(fut.shape[0]):

            # Convert trajectory (x,y) co-ordinates to grid locations:
            column = np.argmin(np.absolute(fut[k, 0] - self.col_centers))
            row = np.argmin(np.absolute(fut[k, 1] - self.row_centers))

            # Demonstration ends when expert leaves the image crop corresponding to the grid:
            if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and self.grid_extent[2] <= fut[k, 1] <= \
                    self.grid_extent[3]:

                # Check if cell location has changed
                if row != row_prev or column != column_prev:
                    bc_targets[:, int(row_prev), int(column_prev)] = 0
                    d_x = column - column_prev
                    d_y = row - row_prev
                    theta = np.arctan2(d_y, d_x)

                    # Assign ground truth actions for expert demonstration
                    if self.num_actions == 4:  # [D,R,U,L,end]
                        if np.pi / 4 <= theta < 3 * np.pi / 4:
                            bc_targets[0, int(row_prev), int(column_prev)] = 1
                        elif -np.pi / 4 <= theta < np.pi / 4:
                            bc_targets[1, int(row_prev), int(column_prev)] = 1
                        elif -3 * np.pi / 4 <= theta < -np.pi / 4:
                            bc_targets[2, int(row_prev), int(column_prev)] = 1
                        else:
                            bc_targets[3, int(row_prev), int(column_prev)] = 1

                    else:  # [D, R, U, L, DR, UR, DL, UL, end]
                        if 3 * np.pi / 8 <= theta < 5 * np.pi / 8:
                            bc_targets[0, int(row_prev), int(column_prev)] = 1
                        elif -np.pi / 8 <= theta < np.pi / 8:
                            bc_targets[1, int(row_prev), int(column_prev)] = 1
                        elif -5 * np.pi / 8 <= theta < -3 * np.pi / 8:
                            bc_targets[2, int(row_prev), int(column_prev)] = 1
                        elif np.pi / 8 <= theta < 3 * np.pi / 8:
                            bc_targets[4, int(row_prev), int(column_prev)] = 1
                        elif -3 * np.pi / 8 <= theta < -np.pi / 8:
                            bc_targets[5, int(row_prev), int(column_prev)] = 1
                        elif 5 * np.pi / 8 <= theta < 7 * np.pi / 8:
                            bc_targets[6, int(row_prev), int(column_prev)] = 1
                        elif -7 * np.pi / 8 <= theta < -5 * np.pi / 8:
                            bc_targets[7, int(row_prev), int(column_prev)] = 1
                        else:
                            bc_targets[3, int(row_prev), int(column_prev)] = 1
            else:
                break
            column_prev = column
            row_prev = row

        # Final action is the end action to transition to the goal state:
        bc_targets[self.num_actions, int(row_prev), int(column_prev)] = 1

        return bc_targets
    def __getitem__(self, test_idx):

        #get the scene
        scene = self.trainset[test_idx]

        #get all the tokens in the scene
        #List of scene tokens in the given scene where each item comprises of an instance token and a sample token seperated by underscore
        scene_tokens = self.prediction_scenes[scene]

        #Return if fewer than 2 tokens in this scene
        if len(scene_tokens) < 2:
            print("Not enough agents in the scene")
            return []

        #get the tokens in the scene: we will be using the instance tokens as that is the agent in the scene
        tokens = [scene_tok.split("_") for scene_tok in scene_tokens]

        #List of instance tokens and sample tokens
        instance_tokens, sample_tokens = list(list(zip(*tokens))[0]), list(
            list(zip(*tokens))[1])

        assert len(instance_tokens) == len(
            sample_tokens), "Instance and Sample tokens count does not match"
        '''
        1. Convert list of sample and instance tokens into an ordered dict where sample tokens are the keys
        2. Iterate over all combinations (of length TRAJECOTRY_TIME_INTERVAL) of consecutive samples 
        3. Form a list of data points where each data point has TRAJECOTRY_TIME_INTERVAL sample tokens where 
            each sample token has data for all instance tokens identified in step 2
        4. Create 3 numy arrays each for coordinates, heading_change_rate and map with appropriate shapes
        5. Iterate: per sample per instance and fill in numpy arrays with respective data
        6. Form a dict containing the 3 numpyarrays and return it
        '''

        ordered_tokens = OrderedDict(zip(sample_tokens, instance_tokens))

        print("Printing Ordered_tokens: ", ordered_tokens)
        return []

        #Dictionary containing count for number of samples per token
        token_count = Counter(instance_tokens)

        #used to find n agents with highest number of sample_tokens
        minCount = sorted(list(token_count.values()),
                          reverse=True)[NUM_AGENTS - 1]

        #Convert isntance and sample tokens to dict format
        instance_sample_tokens = {}
        for instance_token, sample_token in zip(instance_tokens,
                                                sample_tokens):
            if token_count[instance_token] >= minCount:
                try:
                    instance_sample_tokens[instance_token].append(sample_token)
                except:
                    instance_sample_tokens[instance_token] = [sample_token]

#         print("Instance:samples ===============================================================================")
#         print(instance_sample_tokens)

        if len(list(instance_sample_tokens.keys())) != NUM_AGENTS:
            print()
#             print("Instance_sample_tokens: \n", instance_sample_tokens)
        '''
        Format: 
        {coordinates: [[coord_at_t0, coord_at_t1, coord_at_t2, ..., coord_at_tTAJECTORY_TIME_INTERVAL],...numDatapointsInScene ], 
         heading_change_rate; [[h_at_t0, h_at_t1, h_at_t2, ..., h_at_tTAJECTORY_TIME_INTERVAL], ...numDatapointaInScene] 
        }
        '''

        #Initialize map rasterizers
        static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        agent_rasterizer = AgentBoxesWithFadedHistory(self.helper,
                                                      seconds_of_history=2.5)
        mtp_input_representation = InputRepresentation(static_layer_rasterizer,
                                                       agent_rasterizer,
                                                       Rasterizer())

        #Initialize Output data
        output_data = {
            "coordinates":
            np.zeros((len(instance_sample_tokens.keys()), 1)),
            "heading_change_rate":
            np.zeros((len(instance_sample_tokens.keys()), 1)),
            "map": [0] * len(instance_sample_tokens.keys())
        }

        for t, instance_token in enumerate(instance_sample_tokens.keys()):

            instance_coordinates = np.zeros((int(
                len(instance_sample_tokens[instance_token]) /
                TRAJECTORY_TIME_INTERVAL), TRAJECTORY_TIME_INTERVAL, 3))
            instance_heading_change_rate = np.zeros((int(
                len(instance_sample_tokens[instance_token]) /
                TRAJECTORY_TIME_INTERVAL), TRAJECTORY_TIME_INTERVAL))

            print("Shape of instance_coordinates: ",
                  instance_coordinates.shape)
            idx = 0  #0 --> numData points for this instance (dimension 1)
            num = 0  #0 --> TRAJECTORY_TIME_INTERVAL (dimension 2)
            for sample_token in (instance_sample_tokens[instance_token]):
                #                 print(idx, "     ", num)
                #                 print(self.nusc.get('sample', sample_token)["timestamp"])

                #how to get the annotation for the instance in the sample
                annotation = self.helper.get_sample_annotation(
                    instance_token, sample_token)
                instance_coordinates[idx][num] = annotation["translation"]

                #get the heading change rate of the agent
                heading_change_rate = self.helper.get_heading_change_rate_for_agent(
                    instance_token, sample_token)
                instance_heading_change_rate[idx][num] = heading_change_rate

                num = num + 1

                #reached the number of records per sample
                if num == TRAJECTORY_TIME_INTERVAL:
                    idx = idx + 1
                    num = 0

                if idx == instance_coordinates.shape[0]:
                    break

                img = mtp_input_representation.make_input_representation(
                    instance_token, sample_token)
#                 cv2.imshow("map",img)

            output_data["map"][t] = (img)
            #             plt.imsave('test'+str(test_idx)+str(t)+'.jpg',img)
            output_data["coordinates"][t] = instance_coordinates
            output_data["heading_change_rate"][
                t] = instance_heading_change_rate

#         test = pd.DataFrame(output_data,columns=["coordinates", "heading_change_rate", "map"])
#         test.to_csv('test'+str(test_idx)+'.csv')

        print("Printing Output data")
        print((output_data["coordinates"]))
        print(len(output_data["heading_change_rate"]))
        print(len(output_data["coordinates"]))

        return output_data