Exemplo n.º 1
0
def gibson_env(hparams):
    basic_config = cfg_env(config_file=hparams.task_config,
                           config_dir=os.path.join(dirname, 'configs'))
    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)
    config_env = cfg_env(config_file=hparams.task_config,
                         config_dir=os.path.join(dirname, 'configs'))
    config_env.defrost()

    if len(scenes) > 0:
        random.shuffle(scenes)
        config_env.DATASET.POINTNAVV1.CONTENT_SCENES = scenes
    for sensor in hparams.sensors:
        assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
    config_env.SIMULATOR.AGENT_0.SENSORS = hparams.sensors
    config_env.freeze()
    config_baseline = cfg_baseline()

    dataset = PointNavDatasetV1(config_env.DATASET)

    config_env.defrost()
    config_env.SIMULATOR.SCENE = dataset.episodes[0].scene_id
    config_env.freeze()

    env = NavRLEnv(config_env=config_env,
                   config_baseline=config_baseline,
                   dataset=dataset)

    return env
Exemplo n.º 2
0
def construct_envs(args):
    env_configs = []
    baseline_configs = []
    args_list = []

    # TODO check params consistency here
    basic_config = cfg_env(config_paths=[args.task_config])

    print("loading scenes ...")
    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    print("using ", args.num_processes, " processes and ", scene_split_size,
          " scenes per process")

    for i in range(args.num_processes):
        config_env = cfg_env(config_paths=[args.task_config])
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.CONTENT_SCENES = scenes[i *
                                                       scene_split_size:(i +
                                                                         1) *
                                                       scene_split_size]

        if i < args.num_processes_on_first_gpu:
            gpu_id = 0
        else:
            gpu_id = int((i - args.num_processes_on_first_gpu) //
                         args.num_processes_per_gpu) + args.sim_gpu_id
#         gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
        gpu_id = 0
        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

        args_list.append(args)

    envs = VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(args_list, env_configs, baseline_configs,
                    range(args.num_processes)))),
    )

    # envs = make_env_fn(args_list[0], env_configs[0], config_baseline=baseline_configs[0], rank=42)
    print("returning with environment")

    return envs
Exemplo n.º 3
0
def construct_envs(args):
    env_configs = []
    baseline_configs = []

    basic_config = cfg_env(config_paths=args.task_config, opts=args.opts)
    dataset = make_dataset(basic_config.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes"
        )
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    scene_splits = [[] for _ in range(args.num_processes)]
    for j, s in enumerate(scenes):
        scene_splits[j % len(scene_splits)].append(s)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(args.num_processes):
        config_env = cfg_env(config_paths=args.task_config, opts=args.opts)
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.CONTENT_SCENES = scene_splits[i]

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

        logger.info("config_env: {}".format(config_env))

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(env_configs, baseline_configs, range(args.num_processes))
            )
        ),
    )

    return envs
Exemplo n.º 4
0
def construct_envs(args):
    env_configs = []
    baseline_configs = []

    basic_config = cfg_env(config_file=args.task_config)

    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    for i in range(args.num_processes):
        config_env = cfg_env(config_file=args.task_config)
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.POINTNAVV1.CONTENT_SCENES = scenes[
                i * scene_split_size:(i + 1) * scene_split_size]

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

        logger.info("config_env: {}".format(config_env))

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(env_configs, baseline_configs,
                      range(args.num_processes)))),
    )

    return envs
Exemplo n.º 5
0
def main():
    basic_config = cfg_env(config_paths=
                           ["env/habitat/habitat_lab/configs/" + args.task_config])
    basic_config.defrost()
    basic_config.DATASET.SPLIT = args.split
    basic_config.freeze()

    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)
    config_env = cfg_env(config_paths=
                         ["env/habitat/habitat_lab/configs/" + args.task_config])
    config_env.defrost()
    config_env.DATASET.CONTENT_SCENES = scenes

    gpu_id = 0
    config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id

    agent_sensors = []
    agent_sensors.append("RGB_SENSOR")
    agent_sensors.append("DEPTH_SENSOR")

    config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors

    config_env.ENVIRONMENT.MAX_EPISODE_STEPS = args.max_episode_length
    config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False

    config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
    config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
    config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
    config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]

    config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
    config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
    config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
    config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]

    config_env.SIMULATOR.TURN_ANGLE = 10
    config_env.DATASET.SPLIT = args.split

    config_env.freeze()

    env = make_env_fn(args, config_env, 0)
    obs, inf = env.reset()
    print("done")
Exemplo n.º 6
0
def make_habitat_vector_env(num_processes=2,
                            target_dim=7,
                            preprocessing_fn=None,
                            log_dir=None,
                            visdom_name='main',
                            visdom_log_file=None,
                            visdom_server='localhost',
                            visdom_port='8097',
                            vis_interval=200,
                            scenes=None,
                            val_scenes=['Greigsville', 'Pablo', 'Mosquito'],
                            num_val_processes=0,
                            swap_building_k_episodes=10,
                            gpu_devices=[0],
                            collate_obs_before_transform=False,
                            map_kwargs={},
                            reward_kwargs={},
                            seed=42):
    assert map_kwargs[
        'map_building_size'] > 0, 'Map building size must be positive!'
    default_reward_kwargs = {
        'slack_reward': -0.01,
        'success_reward': 10,
        'use_visit_penalty': False,
        'visit_penalty_coef': 0,
        'penalty_eps': 999,
    }
    for k, v in default_reward_kwargs.items():
        if k not in reward_kwargs:
            reward_kwargs[k] = v

    habitat_path = os.path.dirname(os.path.dirname(habitat.__file__))
    task_config = os.path.join(habitat_path,
                               'configs/tasks/pointnav_gibson_train.yaml')
    basic_config = cfg_env(config_file=task_config)
    basic_config.defrost()
    basic_config.DATASET.POINTNAVV1.DATA_PATH = os.path.join(
        habitat_path, basic_config.DATASET.POINTNAVV1.DATA_PATH)
    basic_config.freeze()

    if scenes is None:
        scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)
        random.shuffle(scenes)

    val_task_config = os.path.join(
        habitat_path, 'configs/tasks/pointnav_gibson_val_mini.yaml')
    val_cfg = cfg_env(config_file=val_task_config)
    val_cfg.defrost()
    val_cfg.DATASET.SPLIT = "val"
    val_cfg.freeze()

    scenes = [s for s in scenes if s not in val_scenes]
    if num_val_processes > 0 and len(val_scenes) % num_val_processes != 0:
        warnings.warn(
            "Please make num_val_processes ({}) evenly divide len(val_scenes) ({}) or some buildings may be overrepresented"
            .format(num_val_processes, len(val_scenes)))

    env_configs = []
    baseline_configs = []
    encoders = []
    target_dims = []
    is_val = []

    # Assign specific buildings to each process
    train_process_scenes = [[]
                            for _ in range(num_processes - num_val_processes)]
    for i, scene in enumerate(scenes):
        train_process_scenes[i % len(train_process_scenes)].append(scene)

    if num_val_processes > 0:
        val_process_scenes = [[] for _ in range(num_val_processes)]
        for i, scene in enumerate(val_scenes):
            val_process_scenes[i % len(val_process_scenes)].append(scene)

    for i in range(num_processes):
        config_env = cfg_env(task_config)
        config_env.defrost()
        config_env.DATASET.POINTNAVV1.DATA_PATH = os.path.join(
            habitat_path, basic_config.DATASET.POINTNAVV1.DATA_PATH)

        if i < num_processes - num_val_processes:
            config_env.DATASET.SPLIT = 'train'
            config_env.DATASET.POINTNAVV1.CONTENT_SCENES = train_process_scenes[
                i]
        else:
            config_env.DATASET.SPLIT = 'val'
            val_i = i - (num_processes - num_val_processes)
            config_env.DATASET.POINTNAVV1.CONTENT_SCENES = val_process_scenes[
                val_i]
        print("Env {}:".format(i),
              config_env.DATASET.POINTNAVV1.CONTENT_SCENES)

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_devices[
            i % len(gpu_devices)]
        agent_sensors = ["RGB_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.SIMULATOR.SCENE = os.path.join(habitat_path,
                                                  config_env.SIMULATOR.SCENE)

        config_env.freeze()
        env_configs.append(config_env)
        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)
        encoders.append(preprocessing_fn)
        target_dims.append(target_dim)

    should_record = [(i == 0 or i == (num_processes - num_val_processes))
                     for i in range(num_processes)]
    envs = HabitatPreprocessVectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(
                    env_configs,
                    baseline_configs,
                    range(num_processes),
                    target_dims,
                    [log_dir for _ in range(num_processes)],
                    [visdom_name for _ in range(num_processes)],
                    [visdom_log_file for _ in range(num_processes)],
                    [vis_interval for _ in range(num_processes)],
                    [visdom_server for _ in range(num_processes)],
                    [visdom_port for _ in range(num_processes)],
                    [swap_building_k_episodes for _ in range(num_processes)],
                    [map_kwargs for _ in range(num_processes)],
                    [reward_kwargs for _ in range(num_processes)],
                    should_record,
                    [seed + i for i in range(num_processes)],
                ))),
        preprocessing_fn=preprocessing_fn,
        collate_obs_before_transform=collate_obs_before_transform)
    envs.observation_space = envs.observation_spaces[0]
    envs.action_space = spaces.Discrete(3)
    envs.reward_range = None
    envs.metadata = None
    envs.is_embodied = True
    return envs
Exemplo n.º 7
0
def run_training():
    parser = ppo_args()
    args = parser.parse_args()

    random.seed(args.seed)

    device = torch.device("cuda:{}".format(args.pth_gpu_id))

    logger.add_filehandler(args.log_file)

    if not os.path.isdir(args.checkpoint_folder):
        os.makedirs(args.checkpoint_folder)

    for p in sorted(list(vars(args))):
        logger.info("{}: {}".format(p, getattr(args, p)))

    envs = construct_envs(args)
    task_cfg = cfg_env(config_paths=args.task_config)
    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=args.hidden_size,
        goal_sensor_uuid=task_cfg.TASK.GOAL_SENSOR_UUID,
    )
    actor_critic.to(device)

    agent = PPO(
        actor_critic,
        args.clip_param,
        args.ppo_epoch,
        args.num_mini_batch,
        args.value_loss_coef,
        args.entropy_coef,
        lr=args.lr,
        eps=args.eps,
        max_grad_norm=args.max_grad_norm,
    )

    logger.info("agent number of parameters: {}".format(
        sum(param.numel() for param in agent.parameters())))

    observations = envs.reset()

    batch = batch_obs(observations)

    rollouts = RolloutStorage(
        args.num_steps,
        envs.num_envs,
        envs.observation_spaces[0],
        envs.action_spaces[0],
        args.hidden_size,
    )
    for sensor in rollouts.observations:
        rollouts.observations[sensor][0].copy_(batch[sensor])
    rollouts.to(device)

    episode_rewards = torch.zeros(envs.num_envs, 1)
    episode_counts = torch.zeros(envs.num_envs, 1)
    current_episode_reward = torch.zeros(envs.num_envs, 1)
    window_episode_reward = deque()
    window_episode_counts = deque()

    t_start = time()
    env_time = 0
    pth_time = 0
    count_steps = 0
    count_checkpoints = 0

    for update in range(args.num_updates):
        if args.use_linear_lr_decay:
            update_linear_schedule(agent.optimizer, update, args.num_updates,
                                   args.lr)

        agent.clip_param = args.clip_param * (1 - update / args.num_updates)

        for step in range(args.num_steps):
            t_sample_action = time()
            # sample actions
            with torch.no_grad():
                step_observation = {
                    k: v[step]
                    for k, v in rollouts.observations.items()
                }

                (
                    values,
                    actions,
                    actions_log_probs,
                    recurrent_hidden_states,
                ) = actor_critic.act(
                    step_observation,
                    rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step],
                )
            pth_time += time() - t_sample_action

            t_step_env = time()

            outputs = envs.step([a[0].item() for a in actions])
            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]

            env_time += time() - t_step_env

            t_update_stats = time()
            batch = batch_obs(observations)
            rewards = torch.tensor(rewards, dtype=torch.float)
            rewards = rewards.unsqueeze(1)

            masks = torch.tensor([[0.0] if done else [1.0] for done in dones],
                                 dtype=torch.float)

            current_episode_reward += rewards
            episode_rewards += (1 - masks) * current_episode_reward
            episode_counts += 1 - masks
            current_episode_reward *= masks

            rollouts.insert(
                batch,
                recurrent_hidden_states,
                actions,
                actions_log_probs,
                values,
                rewards,
                masks,
            )

            count_steps += envs.num_envs
            pth_time += time() - t_update_stats

        if len(window_episode_reward) == args.reward_window_size:
            window_episode_reward.popleft()
            window_episode_counts.popleft()
        window_episode_reward.append(episode_rewards.clone())
        window_episode_counts.append(episode_counts.clone())

        t_update_model = time()
        with torch.no_grad():
            last_observation = {
                k: v[-1]
                for k, v in rollouts.observations.items()
            }
            next_value = actor_critic.get_value(
                last_observation,
                rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1],
            ).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()
        pth_time += time() - t_update_model

        # log stats
        if update > 0 and update % args.log_interval == 0:
            logger.info("update: {}\tfps: {:.3f}\t".format(
                update, count_steps / (time() - t_start)))

            logger.info("update: {}\tenv-time: {:.3f}s\tpth-time: {:.3f}s\t"
                        "frames: {}".format(update, env_time, pth_time,
                                            count_steps))

            window_rewards = (window_episode_reward[-1] -
                              window_episode_reward[0]).sum()
            window_counts = (window_episode_counts[-1] -
                             window_episode_counts[0]).sum()

            if window_counts > 0:
                logger.info("Average window size {} reward: {:3f}".format(
                    len(window_episode_reward),
                    (window_rewards / window_counts).item(),
                ))
            else:
                logger.info("No episodes finish in current window")

        # checkpoint model
        if update % args.checkpoint_interval == 0:
            checkpoint = {"state_dict": agent.state_dict()}
            torch.save(
                checkpoint,
                os.path.join(
                    args.checkpoint_folder,
                    "ckpt.{}.pth".format(count_checkpoints),
                ),
            )
            count_checkpoints += 1
Exemplo n.º 8
0
def construct_envs(args):
    env_configs = []
    # baseline_configs = []
    args_list = []

    basic_config = cfg_env(
        config_paths=["env/habitat/habitat_lab/configs/" + args.task_config])
    basic_config.defrost()
    basic_config.DATASET.SPLIT = args.split
    basic_config.freeze()

    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    for i in range(args.num_processes):
        config_env = cfg_env(config_paths=[
            "env/habitat/habitat_lab/configs/" + args.task_config
        ])
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.CONTENT_SCENES = scenes[i *
                                                       scene_split_size:(i +
                                                                         1) *
                                                       scene_split_size]

        if i < args.num_processes_on_first_gpu:
            gpu_id = 0
        else:
            gpu_id = int((i - args.num_processes_on_first_gpu) //
                         args.num_processes_per_gpu) + args.sim_gpu_id
        gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id

        agent_sensors = []
        agent_sensors.append("RGB_SENSOR")
        agent_sensors.append("DEPTH_SENSOR")

        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors

        config_env.ENVIRONMENT.MAX_EPISODE_STEPS = args.max_episode_length
        config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False

        config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
        config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
        config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
        config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]

        config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
        config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
        config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
        config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]

        config_env.SIMULATOR.TURN_ANGLE = 10
        config_env.DATASET.SPLIT = args.split

        config_env.freeze()
        env_configs.append(config_env)

        # Baseline configs are not used
        # config_baseline = cfg_baseline()
        # baseline_configs.append(config_baseline)

        args_list.append(args)

    envs = VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(args_list, env_configs, range(args.num_processes)))),
    )

    return envs
Exemplo n.º 9
0
def construct_envs(args):
    env_configs = []
    args_list = []

    basic_config = cfg_env(config_paths=["envs/habitat/configs/"
                                         + args.task_config])
    basic_config.defrost()
    basic_config.DATASET.SPLIT = args.split
    basic_config.DATASET.DATA_PATH = \
        basic_config.DATASET.DATA_PATH.replace("v1", args.version)
    basic_config.DATASET.EPISODES_DIR = \
        basic_config.DATASET.EPISODES_DIR.replace("v1", args.version)
    basic_config.freeze()

    scenes = basic_config.DATASET.CONTENT_SCENES
    if "*" in basic_config.DATASET.CONTENT_SCENES:
        content_dir = os.path.join(basic_config.DATASET.EPISODES_DIR.format(
            split=args.split), "content")
        scenes = _get_scenes_from_folder(content_dir)

    if len(scenes) > 0:
        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes"
        )

        scene_split_sizes = [int(np.floor(len(scenes) / args.num_processes))
                             for _ in range(args.num_processes)]
        for i in range(len(scenes) % args.num_processes):
            scene_split_sizes[i] += 1

    print("Scenes per thread:")
    for i in range(args.num_processes):
        config_env = cfg_env(config_paths=["envs/habitat/configs/"
                                           + args.task_config])
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.CONTENT_SCENES = scenes[
                sum(scene_split_sizes[:i]):
                sum(scene_split_sizes[:i + 1])
            ]
            print("Thread {}: {}".format(i, config_env.DATASET.CONTENT_SCENES))

        if i < args.num_processes_on_first_gpu:
            gpu_id = 0
        else:
            gpu_id = int((i - args.num_processes_on_first_gpu)
                         // args.num_processes_per_gpu) + args.sim_gpu_id
        gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id

        agent_sensors = []
        agent_sensors.append("RGB_SENSOR")
        agent_sensors.append("DEPTH_SENSOR")
        # agent_sensors.append("SEMANTIC_SENSOR")

        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors

        # Reseting episodes manually, setting high max episode length in sim
        config_env.ENVIRONMENT.MAX_EPISODE_STEPS = 10000000
        config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False

        config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
        config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
        config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
        config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]

        config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
        config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
        config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
        config_env.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = args.min_depth
        config_env.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = args.max_depth
        config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]

        # config_env.SIMULATOR.SEMANTIC_SENSOR.WIDTH = args.env_frame_width
        # config_env.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = args.env_frame_height
        # config_env.SIMULATOR.SEMANTIC_SENSOR.HFOV = args.hfov
        # config_env.SIMULATOR.SEMANTIC_SENSOR.POSITION = \
        #     [0, args.camera_height, 0]

        config_env.SIMULATOR.TURN_ANGLE = args.turn_angle
        config_env.DATASET.SPLIT = args.split
        config_env.DATASET.DATA_PATH = \
            config_env.DATASET.DATA_PATH.replace("v1", args.version)
        config_env.DATASET.EPISODES_DIR = \
            config_env.DATASET.EPISODES_DIR.replace("v1", args.version)

        config_env.freeze()
        env_configs.append(config_env)

        args_list.append(args)

    envs = VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(args_list, env_configs, range(args.num_processes))
            )
        ),
    )

    return envs
def make_habitat_vector_env(
    scenario='PointNav',
    num_processes=2,
    target_dim=7,
    preprocessing_fn=None,
    log_dir=None,
    visdom_name='main',
    visdom_log_file=None,
    visdom_server='localhost',
    visdom_port='8097',
    vis_interval=200,
    train_scenes=None,
    val_scenes=None,
    num_val_processes=0,
    swap_building_k_episodes=10,
    gpu_devices=[0],
    map_kwargs={},
    reward_kwargs={},
    seed=42,
    test_mode=False,
    debug_mode=False,
    scenario_kwargs={},
):
    assert map_kwargs[
        'map_building_size'] > 0, 'Map building size must be positive!'
    default_reward_kwargs = {
        'slack_reward': -0.01,
        'success_reward': 10,
        'use_visit_penalty': False,
        'visit_penalty_coef': 0,
        'penalty_eps': 999,
        'sparse': False,
        'dist_coef': 1.0,
    }
    for k, v in default_reward_kwargs.items():
        if k not in reward_kwargs:
            reward_kwargs[k] = v

    habitat_path = os.path.dirname(os.path.dirname(habitat.__file__))
    if scenario == 'PointNav' or scenario == 'Exploration':
        task_config = os.path.join(habitat_path,
                                   'configs/tasks/pointnav_gibson_train.yaml')
        # only difference is that Exploration needs DEPTH_SENSOR but that is added in the Env
        # task_config = os.path.join(habitat_path, 'configs/tasks/exploration_gibson.yaml')

    env_configs = []
    baseline_configs = []
    encoders = []
    target_dims = []
    is_val = []

    # Assign specific episodes to each process
    config_env = cfg_env(task_config)

    # Load dataset
    print('Loading val dataset (partition by episode)...')
    datasetfile_path = config_env.DATASET.POINTNAVV1.DATA_PATH.format(
        split='val')
    dataset = PointNavDatasetV1()
    with gzip.open(datasetfile_path, "rt") as f:
        dataset.from_json(f.read())
    val_datasets = get_splits(dataset, max(num_val_processes, 1))
    #     for d in val_datasets:
    #         d.episodes = [d.episodes[0]]
    print('Loaded.')

    print('Loading train dataset (partition by building)...')
    train_datasets = []
    if num_processes - num_val_processes > 0:
        #         dataset = PointNavDatasetV1(config_env.DATASET)
        train_datasets = [
            None for _ in range(num_processes - num_val_processes)
        ]
    print('Loaded.')

    # Assign specific buildings to each process
    if num_processes > num_val_processes:
        train_process_scenes = [[] for _ in range(num_processes -
                                                  num_val_processes)]
        if train_scenes is None:
            train_scenes = PointNavDatasetV1.get_scenes_to_load(
                config_env.DATASET)
            random.shuffle(train_scenes)

        for i, scene in enumerate(train_scenes):
            train_process_scenes[i % len(train_process_scenes)].append(scene)

        # If n processes > n envs, some processes can use all envs
        for j, process in enumerate(train_process_scenes):
            if len(process) == 0:
                train_process_scenes[j] = list(train_scenes)

    get_scenes = lambda d: list(
        Counter([e.scene_id.split('/')[-1].split(".")[0]
                 for e in d.episodes]).items())
    for i in range(num_processes):
        config_env = cfg_env(task_config)
        config_env.defrost()

        if i < num_processes - num_val_processes:
            config_env.DATASET.SPLIT = 'train'
            #             config_env.DATASET.POINTNAVV1.CONTENT_SCENES = get_scenes(train_datasets[i])
            config_env.DATASET.POINTNAVV1.CONTENT_SCENES = train_process_scenes[
                i]
        else:
            val_i = i - (num_processes - num_val_processes)
            config_env.DATASET.SPLIT = 'val'
            if val_scenes is not None:
                config_env.DATASET.POINTNAVV1.CONTENT_SCENES = val_scenes
            else:
                config_env.DATASET.POINTNAVV1.CONTENT_SCENES = get_scenes(
                    val_datasets[val_i])

        print("Env {}:".format(i),
              config_env.DATASET.POINTNAVV1.CONTENT_SCENES)

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_devices[
            i % len(gpu_devices)]
        config_env.SIMULATOR.SCENE = os.path.join(habitat_path,
                                                  config_env.SIMULATOR.SCENE)
        config_env.SIMULATOR.AGENT_0.SENSORS = ["RGB_SENSOR"]

        # Now define the config for the sensor
        #         config.TASK.AGENT_POSITION_SENSOR = habitat.Config()
        #         config.TASK.AGENT_POSITION_SENSOR.TYPE = "agent_position_sensor"
        #         config.TASK.SENSORS.append("AGENT_POSITION_SENSOR")

        config_env.TASK.MEASUREMENTS.append('COLLISIONS')

        config_env.freeze()
        env_configs.append(config_env)
        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)
        encoders.append(preprocessing_fn)
        target_dims.append(target_dim)

    should_record = [(i == 0 or i == (num_processes - num_val_processes))
                     for i in range(num_processes)]
    if debug_mode:
        env = make_env_fn(
            scenario,
            env_configs[0],
            baseline_configs[0],
            0,
            0,
            1,
            target_dim,
            log_dir,
            visdom_name,
            visdom_log_file,
            vis_interval,
            visdom_server,
            visdom_port,
            swap_building_k_episodes,
            map_kwargs,
            reward_kwargs,
            False,
            seed,  # TODO set should_record to True
            test_mode,
            (train_datasets + val_datasets)[0],
            scenario_kwargs)
        envs = PreprocessEnv(env, preprocessing_fn=preprocessing_fn)
    else:
        envs = HabitatPreprocessVectorEnv(
            make_env_fn=make_env_fn,
            env_fn_args=tuple(
                tuple(
                    zip(
                        [scenario for _ in range(num_processes)],
                        env_configs,
                        baseline_configs,
                        range(num_processes),
                        [num_val_processes for _ in range(num_processes)],
                        [num_processes for _ in range(num_processes)],
                        target_dims,
                        [log_dir for _ in range(num_processes)],
                        [visdom_name for _ in range(num_processes)],
                        [visdom_log_file for _ in range(num_processes)],
                        [vis_interval for _ in range(num_processes)],
                        [visdom_server for _ in range(num_processes)],
                        [visdom_port for _ in range(num_processes)],
                        [
                            swap_building_k_episodes
                            for _ in range(num_processes)
                        ],
                        [map_kwargs for _ in range(num_processes)],
                        [reward_kwargs for _ in range(num_processes)],
                        should_record,
                        [seed + i for i in range(num_processes)],
                        [test_mode for _ in range(num_processes)],
                        train_datasets + val_datasets,
                        [scenario_kwargs for _ in range(num_processes)],
                    ))),
            preprocessing_fn=preprocessing_fn,
        )
        envs.observation_space = envs.observation_spaces[0]
    envs.action_space = spaces.Discrete(3)
    envs.reward_range = None
    envs.metadata = None
    envs.is_embodied = True
    return envs