示例#1
0
def test_action_space():
    space = ActionSpace(
        {
            "move": gym.spaces.Dict(
                {
                    "position": gym.spaces.Discrete(2),
                    "velocity": gym.spaces.Discrete(3),
                }
            ),
            "move_forward": EmptySpace(),
        }
    )
    assert space.contains(space.sample())
    assert space.contains(
        {"action": "move", "action_args": {"position": 0, "velocity": 1}}
    )
    assert space.contains({"action": "move_forward"})
    assert not space.contains([0, 1, 2])
    assert not space.contains({"zero": None})
    assert not space.contains({"action": "bad"})
    assert not space.contains({"action": "move"})
    assert not space.contains(
        {"action": "move", "action_args": {"position": 0}}
    )
    assert not space.contains(
        {"action": "move_forward", "action_args": {"position": 0}}
    )
示例#2
0
 def action_space(self):
     return EmptySpace()
示例#3
0
def _worker_fn(world_rank: int, world_size: int, port: int,
               unused_params: bool):
    device = (torch.device("cuda")
              if torch.cuda.is_available() else torch.device("cpu"))
    tcp_store = distrib.TCPStore(  # type: ignore
        "127.0.0.1", port, world_size, world_rank == 0)
    distrib.init_process_group("gloo",
                               store=tcp_store,
                               rank=world_rank,
                               world_size=world_size)

    config = get_config("habitat_baselines/config/test/ppo_pointnav_test.yaml")
    obs_space = gym.spaces.Dict({
        IntegratedPointGoalGPSAndCompassSensor.cls_uuid:
        gym.spaces.Box(
            low=np.finfo(np.float32).min,
            high=np.finfo(np.float32).max,
            shape=(2, ),
            dtype=np.float32,
        )
    })
    action_space = ActionSpace({"move": EmptySpace()})
    actor_critic = PointNavBaselinePolicy.from_config(config, obs_space,
                                                      action_space)
    # This use adds some arbitrary parameters that aren't part of the computation
    # graph, so they will mess up DDP if they aren't correctly ignored by it
    if unused_params:
        actor_critic.unused = nn.Linear(64, 64)

    actor_critic.to(device=device)
    ppo_cfg = config.RL.PPO
    agent = DDPPO(
        actor_critic=actor_critic,
        clip_param=ppo_cfg.clip_param,
        ppo_epoch=ppo_cfg.ppo_epoch,
        num_mini_batch=ppo_cfg.num_mini_batch,
        value_loss_coef=ppo_cfg.value_loss_coef,
        entropy_coef=ppo_cfg.entropy_coef,
        lr=ppo_cfg.lr,
        eps=ppo_cfg.eps,
        max_grad_norm=ppo_cfg.max_grad_norm,
        use_normalized_advantage=ppo_cfg.use_normalized_advantage,
    )
    agent.init_distributed()
    rollouts = RolloutStorage(
        ppo_cfg.num_steps,
        2,
        obs_space,
        action_space,
        ppo_cfg.hidden_size,
        num_recurrent_layers=actor_critic.net.num_recurrent_layers,
        is_double_buffered=False,
    )
    rollouts.to(device)

    for k, v in rollouts.buffers["observations"].items():
        rollouts.buffers["observations"][k] = torch.randn_like(v)

    # Add two steps so batching works
    rollouts.advance_rollout()
    rollouts.advance_rollout()

    # Get a single batch
    batch = next(rollouts.recurrent_generator(rollouts.buffers["returns"], 1))

    # Call eval actions through the internal wrapper that is used in
    # agent.update
    value, action_log_probs, dist_entropy, _ = agent._evaluate_actions(
        batch["observations"],
        batch["recurrent_hidden_states"],
        batch["prev_actions"],
        batch["masks"],
        batch["actions"],
    )
    # Backprop on things
    (value.mean() + action_log_probs.mean() + dist_entropy.mean()).backward()

    # Make sure all ranks have very similar parameters
    for param in actor_critic.parameters():
        if param.grad is not None:
            grads = [param.grad.detach().clone() for _ in range(world_size)]
            distrib.all_gather(grads, grads[world_rank])

            for i in range(world_size):
                assert torch.isclose(grads[i], grads[world_rank]).all()
示例#4
0
    def __init__(self, config: Config, dataset: Optional[Dataset] = None):
        self.noise = NOISY
        print('[VisTargetNavEnv] NOISY ACTUATION : ', self.noise)
        if hasattr(config, 'AGENT_TASK'):
            self.agent_task = config.AGENT_TASK
        else:
            self.agent_task = 'search'
        if self.agent_task == 'homing':
            self.num_goals = 2
        else:
            self.num_goals = config.NUM_GOALS

        task_config = config.TASK_CONFIG
        task_config.defrost()
        #task_config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250
        task_config.TASK.TOP_DOWN_MAP.DRAW_SOURCE = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_SHORTEST_PATH = True
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.VISIBILITY_DIST = 2.0
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.FOV = 360
        task_config.TASK.TOP_DOWN_MAP.FOG_OF_WAR.DRAW = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_VIEW_POINTS = False
        task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_POSITIONS = True
        task_config.TASK.TOP_DOWN_MAP.DRAW_GOAL_AABBS = False
        if ('GMT' in config.POLICY or 'NTS' in config.POLICY) and RENDER:
            task_config.TASK.TOP_DOWN_GRAPH_MAP = config.TASK_CONFIG.TASK.TOP_DOWN_MAP.clone(
            )
            if 'GMT' in config.POLICY:
                task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = "TopDownGraphMap"
            elif 'NTS' in config.POLICY:
                task_config.TASK.TOP_DOWN_GRAPH_MAP.TYPE = 'NTSGraphMap'
                task_config.TASK.TOP_DOWN_GRAPH_MAP.MAP_RESOLUTION = 4000
            task_config.TASK.TOP_DOWN_GRAPH_MAP.NUM_TOPDOWN_MAP_SAMPLE_POINTS = 20000
            task_config.TASK.MEASUREMENTS += ['TOP_DOWN_GRAPH_MAP']
            if 'TOP_DOWN_MAP' in config.TASK_CONFIG.TASK.MEASUREMENTS:
                task_config.TASK.MEASUREMENTS = [
                    k for k in task_config.TASK.MEASUREMENTS
                    if 'TOP_DOWN_MAP' != k
                ]
        task_config.SIMULATOR.ACTION_SPACE_CONFIG = "CustomActionSpaceConfiguration"
        task_config.TASK.POSSIBLE_ACTIONS = task_config.TASK.POSSIBLE_ACTIONS + [
            'NOISY_FORWARD', 'NOISY_RIGHT', 'NOISY_LEFT'
        ]
        task_config.TASK.ACTIONS.NOISY_FORWARD = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_FORWARD.TYPE = "NOISYFORWARD"
        task_config.TASK.ACTIONS.NOISY_RIGHT = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_RIGHT.TYPE = "NOISYRIGHT"
        task_config.TASK.ACTIONS.NOISY_LEFT = habitat.config.Config()
        task_config.TASK.ACTIONS.NOISY_LEFT.TYPE = "NOISYLEFT"
        task_config.TASK.MEASUREMENTS = [
            'GOAL_INDEX'
        ] + task_config.TASK.MEASUREMENTS + ['SOFT_SPL']
        task_config.TASK.DISTANCE_TO_GOAL.TYPE = 'Custom_DistanceToGoal'
        if self.agent_task != 'search':
            task_config.TASK.SPL.TYPE = 'Custom_SPL'
        task_config.TASK.SOFT_SPL.TYPE = 'Custom_SoftSPL'
        task_config.TASK.GOAL_INDEX = task_config.TASK.SPL.clone()
        task_config.TASK.GOAL_INDEX.TYPE = 'GoalIndex'
        task_config.freeze()
        self.config = config
        self._core_env_config = config.TASK_CONFIG
        self._reward_measure_name = config.REWARD_METHOD
        self._success_measure_name = config.SUCCESS_MEASURE
        self.success_distance = config.SUCCESS_DISTANCE
        self._previous_measure = None
        self._previous_action = -1
        self.time_t = 0
        self.stuck = 0
        self.follower = None
        if 'NOISY_FORWARD' not in HabitatSimActions:
            HabitatSimActions.extend_action_space("NOISY_FORWARD")
            HabitatSimActions.extend_action_space("NOISY_RIGHT")
            HabitatSimActions.extend_action_space("NOISY_LEFT")
        if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS:
            self.action_dict = {
                0: HabitatSimActions.STOP,
                1: "NOISY_FORWARD",
                2: "NOISY_LEFT",
                3: "NOISY_RIGHT"
            }
        else:
            self.action_dict = {
                0: "NOISY_FORWARD",
                1: "NOISY_LEFT",
                2: "NOISY_RIGHT"
            }
        super().__init__(self._core_env_config, dataset)
        act_dict = {
            "MOVE_FORWARD": EmptySpace(),
            'TURN_LEFT': EmptySpace(),
            'TURN_RIGHT': EmptySpace()
        }
        if 'STOP' in task_config.TASK.POSSIBLE_ACTIONS:
            act_dict.update({'STOP': EmptySpace()})
        self.action_space = ActionSpace(act_dict)
        obs_dict = {
            'panoramic_rgb':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['panoramic_rgb'],
            'panoramic_depth':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['panoramic_depth'],
            'target_goal':
            self.habitat_env._task.sensor_suite.observation_spaces.
            spaces['target_goal'],
            'step':
            Box(low=np.array(0), high=np.array(500), dtype=np.float32),
            'prev_act':
            Box(low=np.array(-1),
                high=np.array(self.action_space.n),
                dtype=np.int32),
            'gt_action':
            Box(low=np.array(-1),
                high=np.array(self.action_space.n),
                dtype=np.int32),
            'position':
            Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32),
            'target_pose':
            Box(low=-np.Inf, high=np.Inf, shape=(3, ), dtype=np.float32),
            'distance':
            Box(low=-np.Inf, high=np.Inf, shape=(1, ), dtype=np.float32),
        }
        if 'GMT' in config.POLICY and RENDER:
            self.mapper = self.habitat_env.task.measurements.measures[
                'top_down_map']
            #obs_dict.update({'unexplored':Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),
            #    'neighbors': Box(low=0, high=1, shape=(self.mapper.delta,), dtype=np.int32),})
        else:
            self.mapper = None
        if 'aux' in self.config.POLICY:
            self.return_have_been = True
            self.return_target_dist_score = True
            obs_dict.update({
                'have_been':
                Box(low=0, high=1, shape=(1, ), dtype=np.int32),
                'target_dist_score':
                Box(low=0, high=1, shape=(1, ), dtype=np.float32),
            })
        else:
            self.return_have_been = False
            self.return_target_dist_score = False

        self.observation_space = SpaceDict(obs_dict)

        if config.DIFFICULTY == 'easy':
            self.habitat_env.difficulty = 'easy'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 1.5, 3.0
        elif config.DIFFICULTY == 'medium':
            self.habitat_env.difficulty = 'medium'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 5.0
        elif config.DIFFICULTY == 'hard':
            self.habitat_env.difficulty = 'hard'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 5.0, 10.0
        elif config.DIFFICULTY == 'random':
            self.habitat_env.difficulty = 'random'
            self.habitat_env.MIN_DIST, self.habitat_env.MAX_DIST = 3.0, 10.0
        else:
            raise NotImplementedError

        self.habitat_env._num_goals = self.num_goals
        self.habitat_env._agent_task = self.agent_task
        print('current task : %s' % (self.agent_task))
        print('current difficulty %s, MIN_DIST %f, MAX_DIST %f - # goals %d' %
              (config.DIFFICULTY, self.habitat_env.MIN_DIST,
               self.habitat_env.MAX_DIST, self.habitat_env._num_goals))

        self.min_measure = self.habitat_env.MAX_DIST
        self.reward_method = config.REWARD_METHOD
        if self.reward_method == 'progress':
            self.get_reward = self.get_progress_reward
        elif self.reward_method == 'milestone':
            self.get_reward = self.get_milestone_reward
        elif self.reward_method == 'coverage':
            self.get_reward = self.get_coverage_reward

        self.run_mode = 'RL'
        self.number_of_episodes = 1000
        self.need_gt_action = False
        self.has_log_info = None
示例#5
0
def test_empty_space():
    space = EmptySpace()
    assert space.contains(space.sample())
    assert space.contains(None)
    assert not space.contains(0)