def rl_loss_default(cls, alg: str, steps: Optional[int] = None): if alg == "ppo": assert steps is not None return { "loss": Builder( PPO, kwargs={"clip_decay": LinearDecay(steps)}, default=PPOConfig, ), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } elif alg == "a2c": return { "loss": Builder( A2C, default=A2CConfig, ), "num_mini_batch": 1, "update_repeats": 1, } elif alg == "imitation": return { "loss": Builder(Imitation), "num_mini_batch": cls.PPO_NUM_MINI_BATCH, "update_repeats": 4, } else: raise NotImplementedError
def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) return TrainingPipeline( save_interval=200000, metric_accumulate_interval=1, optimizer_builder=Builder(optim.Adam, dict(lr=3e-4)), num_mini_batch=2, update_repeats=3, max_grad_norm=0.5, num_steps=30, named_losses={ "ppo_loss": Builder( PPO, kwargs={}, default=PPOConfig, ) }, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def training_pipeline(cls, **kwargs): ppo_steps = int(1e6) lr = 2.5e-4 num_mini_batch = 2 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 metric_accumulate_interval = cls.MAX_STEPS * 10 # Log every 10 max length tasks save_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 1.0 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), )
def training_pipeline(cls, **kwargs): imitate_steps = int(75000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 if torch.cuda.is_available() else 1 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"imitation_loss": Imitation()}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["imitation_loss"], max_stage_steps=imitate_steps, # teacher_forcing=LinearDecay(steps=int(1e5), startp=1.0, endp=0.0,), ), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=imitate_steps)}), )
def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def __init__(self): super().__init__() self.ENV_ARGS["renderDepthImage"] = True self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), DepthSensorRoboThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_normalization=True, uuid="depth_lowres", ), GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES, ), ] self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, }, ), Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["depth_lowres"], "output_uuid": "depth_resnet", "parallel": False, }, ), ] self.OBSERVATIONS = [ "rgb_resnet", "depth_resnet", "goal_object_type_ind", ]
def training_pipeline(self, **kwargs): ppo_steps = int(300000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 4 num_steps = 128 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 action_strs = ObjectNavTask.class_action_names() non_end_action_inds_set = { i for i, a in enumerate(action_strs) if a != robothor_constants.END } end_action_ind_set = {action_strs.index(robothor_constants.END)} return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": PPO(**PPOConfig), "grouped_action_imitation": GroupedActionImitation( nactions=len(ObjectNavTask.class_action_names()), action_groups=[ non_end_action_inds_set, end_action_ind_set ], ), }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage( loss_names=["ppo_loss", "grouped_action_imitation"], max_stage_steps=ppo_steps, ) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), )
def machine_params(self, mode="train", **kwargs): if mode == "train": devices = self.TRAIN_GPUS nprocesses = self.split_num_processes(len(devices)) elif mode == "valid": nprocesses = 0 devices = self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 devices = self.TESTING_GPUS else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") observation_set = (Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if mode == "train" or nprocesses > 0 else None) return MachineParams(nprocesses=nprocesses, devices=devices, observation_set=observation_set)
def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else self.split_num_processes(self.NUM_PROCESSES, ndevices=len(gpu_ids)) ) sampler_devices = self.SAMPLER_GPU_IDS elif mode == "valid": nprocesses = 1 if torch.cuda.is_available() else 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": nprocesses = 15 if torch.cuda.is_available() else 1 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensors = [*self.SENSORS] observations = [*self.OBSERVATIONS] if mode != "train": sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] observations = [o for o in observations if "expert_action" not in o] # Disable parallelization for validation process if mode == "valid": for prep in self.PREPROCESSORS: prep.kwargs["parallel"] = False observation_set = ( Builder( ObservationSet, kwargs=dict( source_ids=observations, all_preprocessors=self.PREPROCESSORS, all_sensors=sensors, ), ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return { "nprocesses": nprocesses, "gpu_ids": gpu_ids, "sampler_devices": sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids "observation_set": observation_set, }
def _training_pipeline( # type:ignore cls, named_losses: Dict[str, Union[Loss, Builder]], pipeline_stages: List[PipelineStage], num_mini_batch: int, update_repeats: int, total_train_steps: int, lr: Optional[float] = None, ): lr = cls.DEFAULT_LR num_steps = cls.ROLLOUT_STEPS metric_accumulate_interval = ( cls.METRIC_ACCUMULATE_INTERVAL() ) # Log every 10 max length tasks save_interval = 2 ** 31 gamma = 0.99 use_gae = "reinforce_loss" not in named_losses gae_lambda = 0.99 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=metric_accumulate_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses=named_losses, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=None, should_log=cls.SHOULD_LOG, pipeline_stages=pipeline_stages, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)} # type: ignore ) if cls.USE_LR_DECAY else None, )
def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAIN_GPU_IDS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else self.split_num_processes(len(gpu_ids)) ) sampler_devices = self.TRAIN_GPU_IDS render_video = False elif mode == "valid": nprocesses = 15 gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS render_video = False elif mode == "test": nprocesses = 15 gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS render_video = False else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") # Disable parallelization for validation process if mode == "valid": for prep in self.PREPROCESSORS: prep.kwargs["parallel"] = False observation_set = ( Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if mode == "train" or nprocesses > 0 else None ) return { "nprocesses": nprocesses, "gpu_ids": gpu_ids, "sampler_devices": sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids "observation_set": observation_set, "render_video": render_video, }
def training_pipeline(cls, **kwargs) -> TrainingPipeline: ppo_steps = int(150000) return TrainingPipeline( named_losses=dict(ppo_loss=PPO(**PPOConfig)), # type:ignore pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], optimizer_builder=Builder(optim.Adam, dict(lr=1e-4)), num_mini_batch=4, update_repeats=3, max_grad_norm=0.5, num_steps=16, gamma=0.99, use_gae=True, gae_lambda=0.95, advance_scene_rollout_period=None, save_interval=10000, metric_accumulate_interval=1, lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} # type:ignore ), )
def __init__(self): self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, dict( input_height=self.SCREEN_SIZE, input_width=self.SCREEN_SIZE, output_width=7, output_height=7, output_dims=512, pool=False, torchvision_resnet_model=models.resnet18, input_uuids=[self.VISION_UUID], output_uuid=self.RESNET_OUTPUT_UUID, parallel=False, ), ), ]
def machine_params(self, mode="train", **kwargs): res = super().machine_params(mode, **kwargs) nprocesses = res["nprocesses"] res["observation_set"] = ( Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, List) and max(nprocesses) > 0) else None ) return res
def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ([] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device) nprocesses = (1 if not torch.cuda.is_available() else self.split_num_processes(len(gpu_ids))) render_video = False elif mode == "valid": nprocesses = 1 if not torch.cuda.is_available(): gpu_ids = [] else: gpu_ids = self.VALIDATION_GPUS render_video = False elif mode == "test": nprocesses = 1 if not torch.cuda.is_available(): gpu_ids = [] else: gpu_ids = self.TESTING_GPUS render_video = True else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") observation_set = (Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if mode == "train" or nprocesses > 0 else None) return { "nprocesses": nprocesses, "gpu_ids": gpu_ids, "observation_set": observation_set, "render_video": render_video, }
def __init__(self): super().__init__() self.REWARD_CONFIG["shaping_weight"] = 0 self.SENSORS = [ RGBSensorThor( height=self.SCREEN_SIZE, width=self.SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GoalObjectTypeThorSensor(object_types=self.TARGET_TYPES, ), ExpertActionSensor(nactions=len( ObjectNavTask.class_action_names()), ), ] self.PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": self.SCREEN_SIZE, "input_width": self.SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, }, ), ] self.OBSERVATIONS = [ "rgb_resnet", "goal_object_type_ind", "expert_action", ]
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in RoboThor.""" # %% """ We then define the task parameters. For PointNav, these include the maximum number of steps our agent can take before being reset (this prevents the agent from wandering on forever), and a configuration for the reward function that we will be using. """ # %% # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } # %% """ In this case, we set the maximum number of steps to 500. We give the agent a reward of -0.01 for each action that it takes (this is to encourage it to reach the goal in as few actions as possible), and a reward of 10.0 if the agent manages to successfully reach its destination. If the agent selects the `stop` action without reaching the target we do not punish it (although this is sometimes useful for preventing the agent from stopping prematurely). Finally, our agent gets rewarded if it moves closer to the target and gets punished if it moves further away. `shaping_weight` controls how strong this signal should be and is here set to 1.0. These parameters work well for training an agent on PointNav, but feel free to play around with them. Next, we set the parameters of the simulator itself. Here we select a resolution at which the engine will render every frame (640 by 480) and a resolution at which the image will be fed into the neural network (here it is set to a 224 by 224 box). """ # %% # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # %% """ Next, we set the hardware parameters for the training engine. `NUM_PROCESSES` sets the total number of parallel processes that will be used to train the model. In general, more processes result in faster training, but since each process is a unique instance of the environment in which we are training they can take up a lot of memory. Depending on the size of the model, the environment, and the hardware we are using, we may need to adjust this number, but for a setup with 8 GTX Titans, 60 processes work fine. 60 also happens to be the number of training scenes in RoboTHOR, which allows each process to load only a single scene into memory, saving time and space. `TRAINING_GPUS` takes the ids of the GPUS on which the model should be trained. Similarly `VALIDATION_GPUS` and `TESTING_GPUS` hold the ids of the GPUS on which the validation and testing will occur. During training, a validation process is constantly running and evaluating the current model, to show the progress on the validation set, so reserving a GPU for validation can be a good idea. If our hardware setup does not include a GPU, these fields can be set to empty lists, as the codebase will default to running everything on the CPU with only 1 process. """ # %% ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = 20 TRAINING_GPUS: Sequence[int] = [0] VALIDATION_GPUS: Sequence[int] = [0] TESTING_GPUS: Sequence[int] = [0] # %% """ Since we are using a dataset to train our model we need to define the path to where we have stored it. If we download the dataset instructed above we can define the path as follows """ # %% TRAIN_DATASET_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "datasets/robothor-pointnav/debug") VAL_DATASET_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "datasets/robothor-pointnav/debug") # %% """ Next, we define the sensors. `RGBSensorThor` is the environment's implementation of an RGB sensor. It takes the raw image outputted by the simulator and resizes it, to the input dimensions for our neural network that we specified above. It also performs normalization if we want. `GPSCompassSensorRoboThor` is a sensor that tracks the point our agent needs to move to. It tells us the direction and distance to our goal at every time step. """ # %% SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] # %% """ For the sake of this example, we are also going to be using a preprocessor with our model. In *AllenAct* the preprocessor abstraction is designed with large models with frozen weights in mind. These models often hail from the ResNet family and transform the raw pixels that our agent observes in the environment, into a complex embedding, which then gets stored and used as input to our trainable model instead of the original image. Most other preprocessing work is done in the sensor classes (as we just saw with the RGB sensor scaling and normalizing our input), but for the sake of efficiency, all neural network preprocessing should use this abstraction. """ # %% PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] # %% """ Next, we must define all of the observation inputs that our model will use. These are just the hardcoded ids of the sensors we are using in the experiment. """ # %% OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] # %% """ Finally, we must define the settings of our simulator. We set the camera dimensions to the values we defined earlier. We set rotateStepDegrees to 30 degrees, which means that every time the agent takes a turn action, they will rotate by 30 degrees. We set grid size to 0.25 which means that every time the agent moves forward, it will do so by 0.25 meters. """ # %% ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, ) # %% """ Now we move on to the methods that we must define to finish implementing an experiment config. Firstly we have a simple method that just returns the name of the experiment. """ # %% @classmethod def tag(cls): return "PointNavRobothorRGBPPO" # %% """ Next, we define the training pipeline. In this function, we specify exactly which algorithm or algorithms we will use to train our model. In this simple example, we are using the PPO loss with a learning rate of 3e-4. We specify 250 million steps of training and a rollout length of 30 with the `ppo_steps` and `num_steps` parameters respectively. All the other standard PPO parameters are also present in this function. `metric_accumulate_interval` sets the frequency at which data is accumulated from all the processes and logged while `save_interval` sets how often we save the model weights and run validation on them. """ # %% @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), ) # %% """ The `machine_params` method returns the hardware parameters of each process, based on the list of devices we defined above. """ # %% def machine_params(self, mode="train", **kwargs): sampler_devices: List[int] = [] if mode == "train": workers_per_device = 1 gpu_ids = ([] if not torch.cuda.is_available() else list(self.TRAINING_GPUS) * workers_per_device) nprocesses = (8 if not torch.cuda.is_available() else evenly_distribute_count_into_bins( self.NUM_PROCESSES, len(gpu_ids))) sampler_devices = list(self.TRAINING_GPUS) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.TESTING_GPUS else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = (SensorPreprocessorGraph( source_observation_spaces=SensorSuite( self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)) else None) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids sensor_preprocessor_graph=sensor_preprocessor_graph, ) # %% """ Now we define the actual model that we will be using. **AllenAct** offers first-class support for PyTorch, so any PyTorch model that implements the provided `ActorCriticModel` class will work here. Here we borrow a modelfrom the `pointnav_baselines` project (which unsurprisingly contains several PointNav baselines). It is a small convolutional network that expects the output of a ResNet as its rgb input followed by a single-layered GRU. The model accepts as input the number of different actions our agent can perform in the environment through the `action_space` parameter, which we get from the task definition. We also define the shape of the inputs we are going to be passing to the model with `observation_space` We specify the names of our sensors with `goal_sensor_uuid` and `rgb_resnet_preprocessor_uuid`. Finally, we define the size of our RNN with `hidden_layer` and the size of the embedding of our goal sensor data (the direction and distance to the target) with `goal_dims`. """ # %% @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete( len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"]. observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # %% """ We also need to define the task sampler that we will be using. This is a piece of code that generates instances of tasks for our agent to perform (essentially starting locations and targets for PointNav). Since we are getting our tasks from a dataset, the task sampler is a very simple code that just reads the specified file and sets the agent to the next starting locations whenever the agent exceeds the maximum number of steps or selects the `stop` action. """ # %% @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) # %% """ You might notice that we did not specify the task sampler's arguments, but are rather passing them in. The reason for this is that each process will have its own task sampler, and we need to specify exactly which scenes each process should work with. If we have several GPUS and many scenes this process of distributing the work can be rather complicated so we define a few helper functions to do just this. """ # %% @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(np.int32) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } # %% """ The very last things we need to define are the sampler arguments themselves. We define them separately for a train, validation, and test sampler, but in this case, they are almost the same. The arguments need to include the location of the dataset and distance cache as well as the environment arguments for our simulator, both of which we defined above and are just referencing here. The only consequential differences between these task samplers are the path to the dataset we are using (train or validation) and whether we want to loop over the dataset or not (we want this for training since we want to train for several epochs, but we do not need this for validation and testing). Since the test scenes of RoboTHOR are private we are also testing on our validation set. """ # %% def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = (("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None) res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = (("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) return res
class ObjectNavRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in RoboThor.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD = 10000000000000 NUM_PROCESSES = 60 TRAINING_GPUS = [0, 1, 2, 3, 4, 5, 6] VALIDATION_GPUS = [7] TESTING_GPUS = [7] TRAIN_SCENES = ( "habitat/habitat-api/data/datasets/pointnav/gibson/v1/train/train.json.gz" ) VALID_SCENES = ( "habitat/habitat-api/data/datasets/pointnav/gibson/v1/val/val.json.gz") TEST_SCENES = ( "habitat/habitat-api/data/datasets/pointnav/gibson/v1/test/test.json.gz" ) TRAIN_GPUS = [0, 1, 2, 3, 4, 5, 6, 7] VALIDATION_GPUS = [7] TESTING_GPUS = [7] NUM_PROCESSES = 80 CONFIG = habitat.get_config("configs/gibson.yaml") CONFIG.defrost() CONFIG.NUM_PROCESSES = NUM_PROCESSES CONFIG.SIMULATOR_GPU_IDS = TRAIN_GPUS CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/" CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"] CONFIG.DATASET.DATA_PATH = TRAIN_SCENES CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"] CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT CONFIG.SIMULATOR.TURN_ANGLE = 30 CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25 CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS CONFIG.TASK.TYPE = "Nav-v0" CONFIG.TASK.SUCCESS_DISTANCE = 0.2 CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"] CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR" CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2 CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass" CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SPL"] CONFIG.TASK.SPL.TYPE = "SPL" CONFIG.TASK.SPL.SUCCESS_DISTANCE = 0.2 CONFIG.MODE = "train" SENSORS = [ RGBSensorHabitat( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, # TODO False for debugging }, ), ] OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] TRAIN_CONFIGS = construct_env_configs(CONFIG) @classmethod def tag(cls): return "PointNavHabitatRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": Builder( PPO, kwargs={}, default=PPOConfig, ) }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), ) def split_num_processes(self, ndevices): assert self.NUM_PROCESSES >= ndevices, "NUM_PROCESSES {} < ndevices {}".format( self.NUM_PROCESSES, ndevices) res = [0] * ndevices for it in range(self.NUM_PROCESSES): res[it % ndevices] += 1 return res def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ([] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device) nprocesses = (1 if not torch.cuda.is_available() else self.split_num_processes(len(gpu_ids))) sampler_devices = self.TRAINING_GPUS render_video = False elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.VALIDATION_GPUS render_video = False elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.TESTING_GPUS render_video = False else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") # Disable parallelization for validation process if mode == "valid": for prep in self.PREPROCESSORS: prep.kwargs["parallel"] = False observation_set = (Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if mode == "train" or nprocesses > 0 else None) return { "nprocesses": nprocesses, "gpu_ids": gpu_ids, "observation_set": observation_set, "render_video": render_video, } # Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete( len(PointNavTask.class_action_names())), observation_space=kwargs["observation_set"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TEST_CONFIGS[process_ind] # type:ignore return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore }
class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in RoboThor.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD = 10**13 NUM_PROCESSES = 20 TRAINING_GPUS = [0] VALIDATION_GPUS = [0] TESTING_GPUS = [0] # Dataset Parameters TRAIN_DATASET_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "datasets/robothor-pointnav/debug") VAL_DATASET_DIR = os.path.join(ABS_PATH_OF_TOP_LEVEL_DIR, "datasets/robothor-pointnav/debug") SENSORS = [ RGBSensorThor( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, uuid="rgb_lowres", ), GPSCompassSensorRoboThor(), ] PREPROCESSORS = [ Builder( ResnetPreProcessorHabitat, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", "parallel": False, }, ), ] OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] ENV_ARGS = dict( width=CAMERA_WIDTH, height=CAMERA_HEIGHT, rotateStepDegrees=30.0, visibilityDistance=1.0, gridSize=0.25, ) @classmethod def tag(cls): return "PointNavRobothorRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 1000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={ "ppo_loss": Builder( PPO, kwargs={}, default=PPOConfig, ) }, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}), ) def split_num_processes(self, ndevices): assert self.NUM_PROCESSES >= ndevices, "NUM_PROCESSES {} < ndevices {}".format( self.NUM_PROCESSES, ndevices) res = [0] * ndevices for it in range(self.NUM_PROCESSES): res[it % ndevices] += 1 return res def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ([] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device) nprocesses = (8 if not torch.cuda.is_available() else self.split_num_processes(len(gpu_ids))) sampler_devices = self.TRAINING_GPUS render_video = False elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.VALIDATION_GPUS render_video = False elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available( ) else self.TESTING_GPUS render_video = False else: raise NotImplementedError( "mode must be 'train', 'valid', or 'test'.") # Disable parallelization for validation process if mode == "valid": for prep in self.PREPROCESSORS: prep.kwargs["parallel"] = False observation_set = (Builder( ObservationSet, kwargs=dict( source_ids=self.OBSERVATIONS, all_preprocessors=self.PREPROCESSORS, all_sensors=self.SENSORS, ), ) if mode == "train" or nprocesses > 0 else None) return { "nprocesses": nprocesses, "gpu_ids": gpu_ids, "sampler_devices": sampler_devices if mode == "train" else gpu_ids, "observation_set": observation_set, "render_video": render_video, } # Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete( len(PointNavTask.class_action_names())), observation_space=kwargs["observation_set"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavDatasetTaskSampler(**kwargs) # Utility Functions for distributing scenes between GPUs @staticmethod def _partition_inds(n: int, num_parts: int): return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(np.int32) def _get_sampler_args_for_scene_split( self, scenes_dir: str, process_ind: int, total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: path = os.path.join(scenes_dir, "*.json.gz") scenes = [ scene.split("/")[-1].split(".")[0] for scene in glob.glob(path) ] if len(scenes) == 0: raise RuntimeError(( "Could find no scene dataset information in directory {}." " Are you sure you've downloaded them? " " If not, see https://allenact.org/installation/download-datasets/ information" " on how this can be done.").format(scenes_dir)) if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisible by the number of scenes" ) scenes = scenes * int(ceil(total_processes / len(scenes))) scenes = scenes[:total_processes * (len(scenes) // total_processes)] else: if len(scenes) % total_processes != 0: print( "Warning: oversampling some of the scenes to feed all processes." " You can avoid this by setting a number of workers divisor of the number of scenes" ) inds = self._partition_inds(len(scenes), total_processes) return { "scenes": scenes[inds[process_ind]:inds[process_ind + 1]], "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.TRAIN_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.TRAIN_DATASET_DIR res["loop_dataset"] = True res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = (("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None) res["allow_flipping"] = True return res def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = (("0.%d" % devices[process_ind % len(devices)]) if devices is not None and len(devices) > 0 else None) return res def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: res = self._get_sampler_args_for_scene_split( os.path.join(self.VAL_DATASET_DIR, "episodes"), process_ind, total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) res["scene_directory"] = self.VAL_DATASET_DIR res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) return res
class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig): """A Point Navigation experiment configuration in Habitat.""" # Task Parameters MAX_STEPS = 500 REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, "failed_stop_reward": 0.0, "shaping_weight": 1.0, } DISTANCE_TO_GOAL = 0.2 # Simulator Parameters CAMERA_WIDTH = 640 CAMERA_HEIGHT = 480 SCREEN_SIZE = 224 # Training Engine Parameters ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None NUM_PROCESSES = max(5 * torch.cuda.device_count() - 1, 4) TRAINING_GPUS = list(range(torch.cuda.device_count())) VALIDATION_GPUS = [torch.cuda.device_count() - 1] TESTING_GPUS = [torch.cuda.device_count() - 1] task_data_dir_template = os.path.join( HABITAT_DATASETS_DIR, "pointnav/gibson/v1/{}/{}.json.gz" ) TRAIN_SCENES = task_data_dir_template.format(*(["train"] * 2)) VALID_SCENES = task_data_dir_template.format(*(["val"] * 2)) TEST_SCENES = task_data_dir_template.format(*(["test"] * 2)) CONFIG = get_habitat_config( os.path.join(HABITAT_CONFIGS_DIR, "tasks/pointnav_gibson.yaml") ) CONFIG.defrost() CONFIG.NUM_PROCESSES = NUM_PROCESSES CONFIG.SIMULATOR_GPU_IDS = TRAINING_GPUS CONFIG.DATASET.SCENES_DIR = "habitat/habitat-api/data/scene_datasets/" CONFIG.DATASET.POINTNAVV1.CONTENT_SCENES = ["*"] CONFIG.DATASET.DATA_PATH = TRAIN_SCENES CONFIG.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"] CONFIG.SIMULATOR.RGB_SENSOR.WIDTH = CAMERA_WIDTH CONFIG.SIMULATOR.RGB_SENSOR.HEIGHT = CAMERA_HEIGHT CONFIG.SIMULATOR.TURN_ANGLE = 30 CONFIG.SIMULATOR.FORWARD_STEP_SIZE = 0.25 CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = MAX_STEPS CONFIG.TASK.TYPE = "Nav-v0" CONFIG.TASK.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.TASK.SENSORS = ["POINTGOAL_WITH_GPS_COMPASS_SENSOR"] CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.GOAL_FORMAT = "POLAR" CONFIG.TASK.POINTGOAL_WITH_GPS_COMPASS_SENSOR.DIMENSIONALITY = 2 CONFIG.TASK.GOAL_SENSOR_UUID = "pointgoal_with_gps_compass" CONFIG.TASK.MEASUREMENTS = ["DISTANCE_TO_GOAL", "SUCCESS", "SPL"] CONFIG.TASK.SPL.TYPE = "SPL" CONFIG.TASK.SPL.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.TASK.SUCCESS.SUCCESS_DISTANCE = DISTANCE_TO_GOAL CONFIG.MODE = "train" SENSORS = [ RGBSensorHabitat( height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, ), TargetCoordinatesSensorHabitat(coordinate_dims=2), ] PREPROCESSORS = [ Builder( ResNetPreprocessor, { "input_height": SCREEN_SIZE, "input_width": SCREEN_SIZE, "output_width": 7, "output_height": 7, "output_dims": 512, "pool": False, "torchvision_resnet_model": models.resnet18, "input_uuids": ["rgb_lowres"], "output_uuid": "rgb_resnet", }, ), ] OBSERVATIONS = [ "rgb_resnet", "target_coordinates_ind", ] TRAIN_CONFIGS = construct_env_configs(CONFIG) @classmethod def tag(cls): return "PointNavHabitatRGBPPO" @classmethod def training_pipeline(cls, **kwargs): ppo_steps = int(250000000) lr = 3e-4 num_mini_batch = 1 update_repeats = 3 num_steps = 30 save_interval = 5000000 log_interval = 10000 gamma = 0.99 use_gae = True gae_lambda = 0.95 max_grad_norm = 0.5 return TrainingPipeline( save_interval=save_interval, metric_accumulate_interval=log_interval, optimizer_builder=Builder(optim.Adam, dict(lr=lr)), num_mini_batch=num_mini_batch, update_repeats=update_repeats, max_grad_norm=max_grad_norm, num_steps=num_steps, named_losses={"ppo_loss": PPO(**PPOConfig)}, gamma=gamma, use_gae=use_gae, gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps) ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} ), ) def machine_params(self, mode="train", **kwargs): if mode == "train": workers_per_device = 1 gpu_ids = ( [] if not torch.cuda.is_available() else self.TRAINING_GPUS * workers_per_device ) nprocesses = ( 1 if not torch.cuda.is_available() else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) ) elif mode == "valid": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.VALIDATION_GPUS elif mode == "test": nprocesses = 1 gpu_ids = [] if not torch.cuda.is_available() else self.TESTING_GPUS else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(self.SENSORS).observation_spaces, preprocessors=self.PREPROCESSORS, ) if mode == "train" or ( (isinstance(nprocesses, int) and nprocesses > 0) or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) ) else None ) return MachineParams( nprocesses=nprocesses, devices=gpu_ids, sensor_preprocessor_graph=sensor_preprocessor_graph, ) # Define Model @classmethod def create_model(cls, **kwargs) -> nn.Module: return ResnetTensorPointNavActorCritic( action_space=gym.spaces.Discrete(len(PointNavTask.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid="target_coordinates_ind", rgb_resnet_preprocessor_uuid="rgb_resnet", hidden_size=512, goal_dims=32, ) # Define Task Sampler @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: return PointNavTaskSampler(**kwargs) def train_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.TRAIN_CONFIGS[process_ind] return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def valid_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: config = self.CONFIG.clone() config.defrost() config.DATASET.DATA_PATH = self.VALID_SCENES config.MODE = "validate" config.freeze() return { "env_config": config, "max_steps": self.MAX_STEPS, "sensors": self.SENSORS, "action_space": gym.spaces.Discrete(len(PointNavTask.class_action_names())), "distance_to_goal": self.DISTANCE_TO_GOAL, # type:ignore } def test_task_sampler_args( self, process_ind: int, total_processes: int, devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, ) -> Dict[str, Any]: raise NotImplementedError("Testing not implemented for this tutorial.")