def test_action_space(): space = ActionSpace( { "move": gym.spaces.Dict( { "position": gym.spaces.Discrete(2), "velocity": gym.spaces.Discrete(3), } ), "move_forward": EmptySpace(), } ) assert space.contains(space.sample()) assert space.contains( {"action": "move", "action_args": {"position": 0, "velocity": 1}} ) assert space.contains({"action": "move_forward"}) assert not space.contains([0, 1, 2]) assert not space.contains({"zero": None}) assert not space.contains({"action": "bad"}) assert not space.contains({"action": "move"}) assert not space.contains( {"action": "move", "action_args": {"position": 0}} ) assert not space.contains( {"action": "move_forward", "action_args": {"position": 0}} )
def action_space(self): return EmptySpace()
def _worker_fn(world_rank: int, world_size: int, port: int, unused_params: bool): device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) tcp_store = distrib.TCPStore( # type: ignore "127.0.0.1", port, world_size, world_rank == 0) distrib.init_process_group("gloo", store=tcp_store, rank=world_rank, world_size=world_size) config = get_config("habitat_baselines/config/test/ppo_pointnav_test.yaml") obs_space = gym.spaces.Dict({ IntegratedPointGoalGPSAndCompassSensor.cls_uuid: gym.spaces.Box( low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(2, ), dtype=np.float32, ) }) action_space = ActionSpace({"move": EmptySpace()}) actor_critic = PointNavBaselinePolicy.from_config(config, obs_space, action_space) # This use adds some arbitrary parameters that aren't part of the computation # graph, so they will mess up DDP if they aren't correctly ignored by it if unused_params: actor_critic.unused = nn.Linear(64, 64) actor_critic.to(device=device) ppo_cfg = config.RL.PPO agent = DDPPO( actor_critic=actor_critic, clip_param=ppo_cfg.clip_param, ppo_epoch=ppo_cfg.ppo_epoch, num_mini_batch=ppo_cfg.num_mini_batch, value_loss_coef=ppo_cfg.value_loss_coef, entropy_coef=ppo_cfg.entropy_coef, lr=ppo_cfg.lr, eps=ppo_cfg.eps, max_grad_norm=ppo_cfg.max_grad_norm, use_normalized_advantage=ppo_cfg.use_normalized_advantage, ) agent.init_distributed() rollouts = RolloutStorage( ppo_cfg.num_steps, 2, obs_space, action_space, ppo_cfg.hidden_size, num_recurrent_layers=actor_critic.net.num_recurrent_layers, is_double_buffered=False, ) rollouts.to(device) for k, v in rollouts.buffers["observations"].items(): rollouts.buffers["observations"][k] = torch.randn_like(v) # Add two steps so batching works rollouts.advance_rollout() rollouts.advance_rollout() # Get a single batch batch = next(rollouts.recurrent_generator(rollouts.buffers["returns"], 1)) # Call eval actions through the internal wrapper that is used in # agent.update value, action_log_probs, dist_entropy, _ = agent._evaluate_actions( batch["observations"], batch["recurrent_hidden_states"], batch["prev_actions"], batch["masks"], batch["actions"], ) # Backprop on things (value.mean() + action_log_probs.mean() + dist_entropy.mean()).backward() # Make sure all ranks have very similar parameters for param in actor_critic.parameters(): if param.grad is not None: grads = [param.grad.detach().clone() for _ in range(world_size)] distrib.all_gather(grads, grads[world_rank]) for i in range(world_size): assert torch.isclose(grads[i], grads[world_rank]).all()
def __init__(self, config: Config, dataset: Optional[Dataset] = None): self.noise = NOISY print('[VisTargetNavEnv] NOISY ACTUATION : ', self.noise) if hasattr(config, 'AGENT_TASK'): self.agent_task = config.AGENT_TASK else: self.agent_task = 'search' if self.agent_task == 'homing': self.num_goals = 2 else: self.num_goals = config.NUM_GOALS task_config = config.TASK_CONFIG task_config.defrost() #task_config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250 task_config.TASK.TOP_DOWN_MAP.DRAW_SOURCE = True task_config.TASK.TOP_DOWN_MAP.DRAW_SHORTEST_PATH = True task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.VISIBILITY_DIST = 2.0 task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.FOV = 360 task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.DRAW = True task_config.TASK.TOP_DOWN_MAP.DRAW_VIEW_POINTS = False task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_POSITIONS = True task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_AABBS = False if ('GMT' in config.POLICY or 'NTS' in config.POLICY) and RENDER: task_config.TASK.TOP_DOWN_GRAPH_MAP = config.TASK_CONFIG.TASK.TOP_DOWN_MAP.clone( ) if 'GMT' in config.POLICY: task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = "TopDownGraphMap" elif 'NTS' in config.POLICY: task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = 'NTSGraphMap' task_config.TASK.TOP_DOWN_GRAPH_MAP.MAP_RESOLUTION = 4000 task_config.TASK.TOP_DOWN_GRAPH_MAP.NUM_TOPDOWN_MAP_SAMPLE_POINTS = 20000 task_config.TASK.MEASUREMENTS += ['TOP_DOWN_GRAPH_MAP'] if 'TOP_DOWN_MAP' in config.TASK_CONFIG.TASK.MEASUREMENTS: task_config.TASK.MEASUREMENTS = [ k for k in task_config.TASK.MEASUREMENTS if 'TOP_DOWN_MAP' != k ] task_config.SIMULATOR.ACTION_SPACE_CONFIG = "CustomActionSpaceConfiguration" task_config.TASK.POSSIBLE_ACTIONS = task_config.TASK.POSSIBLE_ACTIONS + [ 'NOISY_FORWARD', 'NOISY_RIGHT', 'NOISY_LEFT' ] task_config.TASK.ACTIONS.NOISY_FORWARD = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_FORWARD.TYPE = "NOISYFORWARD" task_config.TASK.ACTIONS.NOISY_RIGHT = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_RIGHT.TYPE = "NOISYRIGHT" task_config.TASK.ACTIONS.NOISY_LEFT = habitat.config.Config() task_config.TASK.ACTIONS.NOISY_LEFT.TYPE = "NOISYLEFT" task_config.TASK.MEASUREMENTS = [ 'GOAL_INDEX' ] + task_config.TASK.MEASUREMENTS + ['SOFT_SPL'] task_config.TASK.DISTANCE_TO_GOAL.TYPE = 'Custom_DistanceToGoal' if self.agent_task != 'search': task_config.TASK.SPL.TYPE = 'Custom_SPL' task_config.TASK.SOFT_SPL.TYPE = 'Custom_SoftSPL' task_config.TASK.GOAL_INDEX = task_config.TASK.SPL.clone() task_config.TASK.GOAL_INDEX.TYPE = 'GoalIndex' task_config.freeze() self.config = config self._core_env_config = config.TASK_CONFIG self._reward_measure_name = config.REWARD_METHOD self._success_measure_name = config.SUCCESS_MEASURE self.success_distance = config.SUCCESS_DISTANCE self._previous_measure = None self._previous_action = -1 self.time_t = 0 self.stuck = 0 self.follower = None if 'NOISY_FORWARD' not in HabitatSimActions: HabitatSimActions.extend_action_space("NOISY_FORWARD") HabitatSimActions.extend_action_space("NOISY_RIGHT") HabitatSimActions.extend_action_space("NOISY_LEFT") if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS: self.action_dict = { 0: HabitatSimActions.STOP, 1: "NOISY_FORWARD", 2: "NOISY_LEFT", 3: "NOISY_RIGHT" } else: self.action_dict = { 0: "NOISY_FORWARD", 1: "NOISY_LEFT", 2: "NOISY_RIGHT" } super().__init__(self._core_env_config, dataset) act_dict = { "MOVE_FORWARD": EmptySpace(), 'TURN_LEFT': EmptySpace(), 'TURN_RIGHT': EmptySpace() } if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS: act_dict.update({'STOP': EmptySpace()}) self.action_space = ActionSpace(act_dict) obs_dict = { 'panoramic_rgb': self.habitat_env._task.sensor_suite.observation_spaces. spaces['panoramic_rgb'], 'panoramic_depth': self.habitat_env._task.sensor_suite.observation_spaces. spaces['panoramic_depth'], 'target_goal': self.habitat_env._task.sensor_suite.observation_spaces. spaces['target_goal'], 'step': Box(low=np.array(0), high=np.array(500), dtype=np.float32), 'prev_act': Box(low=np.array(-1), high=np.array(self.action_space.n), dtype=np.int32), 'gt_action': Box(low=np.array(-1), high=np.array(self.action_space.n), dtype=np.int32), 'position': Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32), 'target_pose': Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32), 'distance': Box(low=-np.Inf, high=np.Inf, shape=(1, ), dtype=np.float32), } if 'GMT' in config.POLICY and RENDER: self.mapper = self.habitat_env.task.measurements.measures[ 'top_down_map'] #obs_dict.update({'unexplored':Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32), # 'neighbors': Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),}) else: self.mapper = None if 'aux' in self.config.POLICY: self.return_have_been = True self.return_target_dist_score = True obs_dict.update({ 'have_been': Box(low=0, high=1, shape=(1, ), dtype=np.int32), 'target_dist_score': Box(low=0, high=1, shape=(1, ), dtype=np.float32), }) else: self.return_have_been = False self.return_target_dist_score = False self.observation_space = SpaceDict(obs_dict) if config.DIFFICULTY == 'easy': self.habitat_env.difficulty = 'easy' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 1.5, 3.0 elif config.DIFFICULTY == 'medium': self.habitat_env.difficulty = 'medium' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 5.0 elif config.DIFFICULTY == 'hard': self.habitat_env.difficulty = 'hard' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 5.0, 10.0 elif config.DIFFICULTY == 'random': self.habitat_env.difficulty = 'random' self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 10.0 else: raise NotImplementedError self.habitat_env._num_goals = self.num_goals self.habitat_env._agent_task = self.agent_task print('current task : %s' % (self.agent_task)) print('current difficulty %s, MIN_DIST %f, MAX_DIST %f - # goals %d' % (config.DIFFICULTY, self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST, self.habitat_env._num_goals)) self.min_measure = self.habitat_env.MAX_DIST self.reward_method = config.REWARD_METHOD if self.reward_method == 'progress': self.get_reward = self.get_progress_reward elif self.reward_method == 'milestone': self.get_reward = self.get_milestone_reward elif self.reward_method == 'coverage': self.get_reward = self.get_coverage_reward self.run_mode = 'RL' self.number_of_episodes = 1000 self.need_gt_action = False self.has_log_info = None
def test_empty_space(): space = EmptySpace() assert space.contains(space.sample()) assert space.contains(None) assert not space.contains(0)