示例#1
0
def test_rl_vectorized_envs():
    configs, datasets = _load_test_data()

    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    envs = habitat.VectorEnv(make_env_fn=make_rl_env, env_fn_args=env_fn_args)
    envs.reset()
    non_stop_actions = [
        v for v in range(len(SimulatorActions))
        if v != SimulatorActions.STOP.value
    ]

    for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS):
        outputs = envs.step(np.random.choice(non_stop_actions, num_envs))
        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        assert len(observations) == num_envs
        assert len(rewards) == num_envs
        assert len(dones) == num_envs
        assert len(infos) == num_envs

        tiled_img = envs.render(mode="rgb_array")
        new_height = int(np.ceil(np.sqrt(NUM_ENVS)))
        new_width = int(np.ceil(float(NUM_ENVS) / new_height))
        h, w, c = observations[0]["rgb"].shape
        assert tiled_img.shape == (
            h * new_height,
            w * new_width,
            c,
        ), "vector env render is broken"

        if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0:
            assert all(dones), "dones should be true after max_episode steps"

    envs.close()
示例#2
0
def test_number_of_episodes():
    configs, datasets = _load_test_data()
    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    with habitat.VectorEnv(env_fn_args=env_fn_args,
                           multiprocessing_start_method="forkserver") as envs:
        assert envs.number_of_episodes == [10000, 10000, 10000, 10000]
示例#3
0
def construct_envs(
    config: Config, training: bool
) -> VectorEnv:
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
        env_class: class type of the envs to be created.

    Returns:
        VectorEnv object created according to specification.
    """
    num_processes = config.NUM_PROCESSES
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes"
        )

    scene_splits = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    task = 'habitat_train_task' if training else 'habitat_eval_task'
    max_duration = gin.query_parameter(f'{task}.max_length')
    wrappers = [w.scoped_configurable_fn() for w in gin.query_parameter(f'{task}.wrappers')]
    kwargs = get_config(training=training, max_steps=max_duration*3)
    kwargs['max_duration'] = max_duration
    kwargs['action_repeat'] = 1
    kwargs['wrappers'] = [(wrapper, kwarg_fn(kwargs)) for wrapper, kwarg_fn in wrappers]
    env_kwargs = []
    for scenes in scene_splits:
        kw = kwargs.copy()
        config = kw['config'].clone()
        if len(scenes) > 0:
            config.defrost()
            config.DATASET.CONTENT_SCENES = scenes
            config.freeze()
        kw['config'] = config
        env_kwargs.append(kw)

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            # tuple(zip(configs, env_classes, range(num_processes)))
            tuple(zip(env_kwargs, range(num_processes)))
        ),
    )
    return envs
示例#4
0
def construct_envs(config: Config, env_class: Type[Union[Env,
                                                         RLEnv]]) -> VectorEnv:
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
        env_class: class type of the envs to be created.

    Returns:
        VectorEnv object created according to specification.
    """

    num_processes = config.NUM_PROCESSES
    configs = []
    env_classes = [env_class for _ in range(num_processes)]
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")

    scene_splits = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_processes):

        task_config = config.TASK_CONFIG.clone()
        task_config.defrost()
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
            config.SIMULATOR_GPU_ID)

        task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS
        task_config.freeze()

        config.defrost()
        config.TASK_CONFIG = task_config
        config.freeze()
        configs.append(config.clone())

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(configs, env_classes, range(num_processes)))),
    )
    return envs
示例#5
0
def test_with_scope():
    configs, datasets = _load_test_data()
    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    with habitat.VectorEnv(env_fn_args=env_fn_args,
                           multiprocessing_start_method="forkserver") as envs:
        envs.reset()

    assert envs._is_closed
示例#6
0
def construct_envs(args):
    env_configs = []
    baseline_configs = []

    basic_config = cfg_env(config_paths=args.task_config, opts=args.opts)
    dataset = make_dataset(basic_config.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes"
        )
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    scene_splits = [[] for _ in range(args.num_processes)]
    for j, s in enumerate(scenes):
        scene_splits[j % len(scene_splits)].append(s)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(args.num_processes):
        config_env = cfg_env(config_paths=args.task_config, opts=args.opts)
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.CONTENT_SCENES = scene_splits[i]

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

        logger.info("config_env: {}".format(config_env))

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(env_configs, baseline_configs, range(args.num_processes))
            )
        ),
    )

    return envs
示例#7
0
def _vec_env_test_fn(configs, datasets, multiprocessing_start_method):
    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    envs = habitat.VectorEnv(
        env_fn_args=env_fn_args,
        multiprocessing_start_method=multiprocessing_start_method,
    )
    envs.reset()
    non_stop_actions = [
        v for v in range(len(SimulatorActions))
        if v != SimulatorActions.STOP.value
    ]

    for _ in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS):
        observations = envs.step(np.random.choice(non_stop_actions, num_envs))
        assert len(observations) == num_envs
示例#8
0
def construct_envs(args):
    env_configs = []
    baseline_configs = []

    basic_config = cfg_env(config_file=args.task_config)

    scenes = PointNavDatasetV1.get_scenes_to_load(basic_config.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= args.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")
        scene_split_size = int(np.floor(len(scenes) / args.num_processes))

    for i in range(args.num_processes):
        config_env = cfg_env(config_file=args.task_config)
        config_env.defrost()

        if len(scenes) > 0:
            config_env.DATASET.POINTNAVV1.CONTENT_SCENES = scenes[
                i * scene_split_size:(i + 1) * scene_split_size]

        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = args.sim_gpu_id

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

        logger.info("config_env: {}".format(config_env))

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(env_configs, baseline_configs,
                      range(args.num_processes)))),
    )

    return envs
示例#9
0
def test_rl_vectorized_envs(gpu2gpu):
    import habitat_sim

    if gpu2gpu and not habitat_sim.cuda_enabled:
        pytest.skip("GPU-GPU requires CUDA")

    configs, datasets = _load_test_data()
    for config in configs:
        config.defrost()
        config.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = gpu2gpu
        config.freeze()

    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    with habitat.VectorEnv(
        make_env_fn=make_rl_env, env_fn_args=env_fn_args
    ) as envs:
        envs.reset()

        for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS):
            outputs = envs.step(
                sample_non_stop_action(envs.action_spaces[0], num_envs)
            )
            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            assert len(observations) == num_envs
            assert len(rewards) == num_envs
            assert len(dones) == num_envs
            assert len(infos) == num_envs

            tiled_img = envs.render(mode="rgb_array")
            new_height = int(np.ceil(np.sqrt(NUM_ENVS)))
            new_width = int(np.ceil(float(NUM_ENVS) / new_height))
            print(f"observations: {observations}")
            h, w, c = observations[0]["rgb"].shape
            assert tiled_img.shape == (
                h * new_height,
                w * new_width,
                c,
            ), "vector env render is broken"

            if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0:
                assert all(
                    dones
                ), "dones should be true after max_episode steps"
示例#10
0
def make_task_envs(env_types, nav_configs, nav_datasets, shell_args):
    data_keys = list(nav_datasets.keys())
    nav_datasets = [{key: nav_datasets[key][ii]
                     for key in data_keys}
                    for ii in range(len(nav_datasets[data_keys[0]]))]
    env_fn_args: Tuple[Tuple] = tuple(
        zip(env_types, nav_configs, nav_datasets,
            range(shell_args.seed, shell_args.seed + len(nav_configs))))

    if shell_args.use_multithreading:
        envs = habitat.ThreadedVectorEnv(make_env_fn, env_fn_args)
    else:
        envs = habitat.VectorEnv(make_env_fn,
                                 env_fn_args,
                                 multiprocessing_start_method="forkserver")
    envs = HabitatVecEnvWrapper(envs)
    return envs
示例#11
0
def _vec_env_test_fn(configs, datasets, multiprocessing_start_method, gpu2gpu):
    num_envs = len(configs)
    for cfg in configs:
        cfg.defrost()
        cfg.SIMULATOR.HABITAT_SIM_V0.GPU_GPU = gpu2gpu
        cfg.freeze()

    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    envs = habitat.VectorEnv(
        env_fn_args=env_fn_args,
        multiprocessing_start_method=multiprocessing_start_method,
    )
    envs.reset()

    for _ in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS):
        observations = envs.step(
            sample_non_stop_action(envs.action_spaces[0], num_envs))
        assert len(observations) == num_envs
示例#12
0
def test_vec_env_call_func():
    configs, datasets = _load_test_data()
    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    true_env_ids = list(range(num_envs))
    envs = habitat.VectorEnv(
        make_env_fn=_make_dummy_env_func,
        env_fn_args=env_fn_args,
        multiprocessing_start_method="forkserver",
    )
    envs.reset()
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == true_env_ids

    env_id = envs.call_at(1, "get_env_ind")
    assert env_id == true_env_ids[1]

    envs.call_at(2, "set_env_ind", {"new_env_ind": 20})
    true_env_ids[2] = 20
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == true_env_ids

    envs.call_at(2, "set_env_ind", {"new_env_ind": 2})
    true_env_ids[2] = 2
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == true_env_ids

    envs.pause_at(0)
    true_env_ids.pop(0)
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == true_env_ids

    envs.pause_at(0)
    true_env_ids.pop(0)
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == true_env_ids

    envs.resume_all()
    env_ids = envs.call(["get_env_ind"] * num_envs)
    assert env_ids == list(range(num_envs))
    envs.close()
示例#13
0
def test_rl_vectorized_envs():
    configs, datasets = _load_test_data()

    num_envs = len(configs)
    env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
    envs = habitat.VectorEnv(make_env_fn=make_rl_env, env_fn_args=env_fn_args)
    envs.reset()
    non_stop_actions = [
        k for k, v in SIM_ACTION_TO_NAME.items()
        if v != SimulatorActions.STOP.value
    ]

    for i in range(2 * configs[0].ENVIRONMENT.MAX_EPISODE_STEPS):
        outputs = envs.step(np.random.choice(non_stop_actions, num_envs))
        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        assert len(observations) == num_envs
        assert len(rewards) == num_envs
        assert len(dones) == num_envs
        assert len(infos) == num_envs
        if (i + 1) % configs[0].ENVIRONMENT.MAX_EPISODE_STEPS == 0:
            assert all(dones), "dones should be true after max_episode steps"

    envs.close()
示例#14
0
def construct_envs_habitat(
    config,
    env_class,
    workers_ignore_signals: bool = False,
):
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    :param config: configs that contain num_processes as well as information
    :param necessary to create individual environments.
    :param env_class: class type of the envs to be created.
    :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor

    :return: VectorEnv object created according to specification.
    """

    import habitat
    from habitat import make_dataset
    from habitat_baselines.utils.env_utils import make_env_fn

    num_processes = config.NUM_PROCESSES
    configs = []
    env_classes = [env_class for _ in range(num_processes)]
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES
    if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES:
        scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    if num_processes > 1:
        if len(scenes) == 0:
            raise RuntimeError(
                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
            )

        if len(scenes) < num_processes:
            raise RuntimeError("reduce the number of processes as there "
                               "aren't enough number of scenes")

        random.shuffle(scenes)

    scene_splits = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_processes):
        proc_config = config.clone()
        proc_config.defrost()

        task_config = proc_config.TASK_CONFIG
        task_config.SEED = task_config.SEED + i
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_ID

        sensors = []
        if config.DEPTH:
            sensors += ["DEPTH_SENSOR"]

        if config.COLOR:
            sensors += ["RGB_SENSOR"]

        task_config.SIMULATOR.AGENT_0.SENSORS = sensors

        task_config.SIMULATOR.RGB_SENSOR.HEIGHT = config.RESOLUTION[1]
        task_config.SIMULATOR.RGB_SENSOR.WIDTH = config.RESOLUTION[0]

        task_config.SIMULATOR.DEPTH_SENSOR.HEIGHT = config.RESOLUTION[1]
        task_config.SIMULATOR.DEPTH_SENSOR.WIDTH = config.RESOLUTION[0]
        task_config.SIMULATOR.DEPTH_SENSOR.NORMALIZE_DEPTH = False
        task_config.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = 20.0

        proc_config.freeze()
        configs.append(proc_config)

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(tuple(zip(configs, env_classes))),
        workers_ignore_signals=workers_ignore_signals,
    )
    return envs
示例#15
0
    def __init__(self, split, gpu_id, envs_processed, envs_to_process, opts, vectorize=False, seed=0) -> None:
        # self.vectorize = vectorize
        self.vectorize = False

        # print("gpu_id", gpu_id)
        resolution = opts.W
        if opts.no_sem_images and opts.no_txt_semantic and opts.no_binary_semantic:
            sensors = ["RGB_SENSOR", "DEPTH_SENSOR"]
        else:
            sensors = ["RGB_SENSOR", "DEPTH_SENSOR", "SEMANTIC_SENSOR"]
        if split == "train":
            data_path = opts.train_data_path
        elif split == "val":
            data_path = opts.val_data_path
        elif split == "test":
            data_path = opts.test_data_path
        else:
            raise Exception("Invalid split")
        unique_dataset_name = opts.dataset

        self.num_parallel_envs = 1

        self.images_before_reset = opts.images_before_reset
        config = make_config(
            opts.config,
            gpu_id,
            split,
            data_path,
            sensors,
            resolution,
            opts.scenes_dir,
        )
        data_dir = os.path.join(
            "./util/scripts/mp3d_data_gen_deps/scene_episodes", unique_dataset_name + "_" + split
        )
        self.dataset_name = config.DATASET.TYPE
        # print(data_dir)
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        data_path = os.path.join(data_dir, "dataset_one_ep_per_scene.json.gz")
        # Creates a dataset where each episode is a random spawn point in each scene.
        if not (os.path.exists(data_path)):
            print("Creating dataset...", flush=True)
            dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET)
            # Get one episode per scene in dataset
            scene_episodes = {}
            for episode in tqdm.tqdm(dataset.episodes):
                if episode.scene_id not in scene_episodes:
                    scene_episodes[episode.scene_id] = episode

            scene_episodes = list(scene_episodes.values())
            dataset.episodes = scene_episodes
            if not os.path.exists(data_path):
                # Multiproc do check again before write.
                json = dataset.to_json().encode("utf-8")
                with gzip.GzipFile(data_path, "w") as fout:
                    fout.write(json)
            print("Finished dataset...", flush=True)

        # Load in data and update the location to the proper location (else
        # get a weird, uninformative, error -- Affine2Dtransform())
        dataset = mp3d_dataset.PointNavDatasetV1()
        with gzip.open(data_path, "rt") as f:
            dataset.from_json(f.read())

            envs = []
            for i in range(0, len(dataset.episodes)):
                scene_id = dataset.episodes[i].scene_id.split("/")[-2]
                # Check if scene already processed
                if scene_id not in envs_processed:
                    # Check if user wants to process this scene (if no scene is specified then ignore this condition)
                    if len(envs_to_process) == 0 or scene_id in envs_to_process:
                        dataset.episodes[i].scene_id = dataset.episodes[i].scene_id.replace(
                            '/checkpoint/erikwijmans/data/mp3d/',
                                opts.scenes_dir + '/mp3d/')
                        envs.append(dataset.episodes[i])
            dataset.episodes = envs

        config.TASK.SENSORS = ["POINTGOAL_SENSOR"]

        config.freeze()

        self.rng = np.random.RandomState(seed)

        # Now look at vector environments
        if self.vectorize:
            configs, datasets = _load_datasets(
                (
                    opts.config,
                    gpu_id,
                    split,
                    data_path,
                    sensors,
                    resolution,
                    opts.scenes_dir,
                ),
                dataset,
                data_path,
                opts.scenes_dir + '/mp3d/',
                num_workers=self.num_parallel_envs,
            )
            num_envs = len(configs)

            env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
            envs = habitat.VectorEnv(
                env_fn_args=env_fn_args,
                multiprocessing_start_method="forkserver",
            )

            self.env = envs
            self.num_train_envs = int(0.9 * (self.num_parallel_envs))
            self.num_val_envs = self.num_parallel_envs - self.num_train_envs
        else:
            self.env = habitat.Env(config=config, dataset=dataset) # TODO: End randomization here
            self.env_sim = self.env.sim
            self.rng.shuffle(self.env.episodes)
            self.env_sim = self.env.sim

        self.num_samples = 0

        # Set up intrinsic parameters
        self.hfov = config.SIMULATOR.DEPTH_SENSOR.HFOV * np.pi / 180.0
        self.W = resolution
        self.K = np.array(
            [
                [1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0, 0.0],
                [0, 1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0],
                [0.0, 0.0, 1.0, 0.0],
                [0.0, 0.0, 0.0, 1.0],
            ],
            dtype=np.float32,
        )

        self.invK = np.linalg.inv(self.K)

        self.config = config
        self.opts = opts

        if self.opts.normalize_image:
            self.transform = transforms.Compose(
                [
                    transforms.ToTensor(),
                    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
                ]
            )  # Using same normalization as BigGan
        else:
            self.transform = transforms.ToTensor()
示例#16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-path", type=str, required=True)
    parser.add_argument("--sim-gpu-id", type=int, required=True)
    parser.add_argument("--pth-gpu-id", type=int, required=True)
    parser.add_argument("--num-processes", type=int, required=True)
    parser.add_argument("--hidden-size", type=int, default=512)
    parser.add_argument("--count-test-episodes", type=int, default=100)
    parser.add_argument(
        "--sensors",
        type=str,
        default="RGB_SENSOR,DEPTH_SENSOR",
        help="comma separated string containing different"
        "sensors to use, currently 'RGB_SENSOR' and"
        "'DEPTH_SENSOR' are supported",
    )
    parser.add_argument(
        "--task-config",
        type=str,
        default="configs/tasks/pointnav.yaml",
        help="path to config yaml containing information about task",
    )
    args = parser.parse_args()

    device = torch.device("cuda:{}".format(args.pth_gpu_id))

    env_configs = []
    baseline_configs = []

    for _ in range(args.num_processes):
        config_env = get_config(config_paths=args.task_config)
        config_env.defrost()
        config_env.DATASET.SPLIT = "val"

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

    assert len(baseline_configs) > 0, "empty list of datasets"

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(env_configs, baseline_configs,
                      range(args.num_processes)))),
    )

    ckpt = torch.load(args.model_path, map_location=device)

    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=512,
        goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID,
    )
    actor_critic.to(device)

    ppo = PPO(
        actor_critic=actor_critic,
        clip_param=0.1,
        ppo_epoch=4,
        num_mini_batch=32,
        value_loss_coef=0.5,
        entropy_coef=0.01,
        lr=2.5e-4,
        eps=1e-5,
        max_grad_norm=0.5,
    )

    ppo.load_state_dict(ckpt["state_dict"])

    actor_critic = ppo.actor_critic

    observations = envs.reset()
    batch = batch_obs(observations)
    for sensor in batch:
        batch[sensor] = batch[sensor].to(device)

    episode_rewards = torch.zeros(envs.num_envs, 1, device=device)
    episode_spls = torch.zeros(envs.num_envs, 1, device=device)
    episode_success = torch.zeros(envs.num_envs, 1, device=device)
    episode_counts = torch.zeros(envs.num_envs, 1, device=device)
    current_episode_reward = torch.zeros(envs.num_envs, 1, device=device)

    test_recurrent_hidden_states = torch.zeros(args.num_processes,
                                               args.hidden_size,
                                               device=device)
    not_done_masks = torch.zeros(args.num_processes, 1, device=device)

    while episode_counts.sum() < args.count_test_episodes:
        with torch.no_grad():
            _, actions, _, test_recurrent_hidden_states = actor_critic.act(
                batch,
                test_recurrent_hidden_states,
                not_done_masks,
                deterministic=False,
            )

        outputs = envs.step([a[0].item() for a in actions])

        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(device)

        not_done_masks = torch.tensor(
            [[0.0] if done else [1.0] for done in dones],
            dtype=torch.float,
            device=device,
        )

        for i in range(not_done_masks.shape[0]):
            if not_done_masks[i].item() == 0:
                episode_spls[i] += infos[i]["roomnavmetric"]
                if infos[i]["roomnavmetric"] > 0:
                    episode_success[i] += 1

        rewards = torch.tensor(rewards, dtype=torch.float,
                               device=device).unsqueeze(1)
        current_episode_reward += rewards
        episode_rewards += (1 - not_done_masks) * current_episode_reward
        episode_counts += 1 - not_done_masks
        current_episode_reward *= not_done_masks

    episode_reward_mean = (episode_rewards / episode_counts).mean().item()
    episode_spl_mean = (episode_spls / episode_counts).mean().item()
    episode_success_mean = (episode_success / episode_counts).mean().item()

    print("Average episode reward: {:.6f}".format(episode_reward_mean))
    print("Average episode success: {:.6f}".format(episode_success_mean))
    print("Average episode spl: {:.6f}".format(episode_spl_mean))
示例#17
0
def construct_envs(
    config: Config,
    env_class: Union[Type[Env], Type[RLEnv]],
    workers_ignore_signals: bool = False,
) -> VectorEnv:
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    :param config: configs that contain num_environments as well as information
    :param necessary to create individual environments.
    :param env_class: class type of the envs to be created.
    :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor

    :return: VectorEnv object created according to specification.
    """

    num_environments = config.NUM_ENVIRONMENTS
    configs = []
    env_classes = [env_class for _ in range(num_environments)]
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES
    if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES:
        scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    if num_environments > 1:
        if len(scenes) == 0:
            raise RuntimeError(
                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
            )

        if len(scenes) < num_environments:
            raise RuntimeError(
                "reduce the number of environments as there "
                "aren't enough number of scenes.\n"
                "num_environments: {}\tnum_scenes: {}".format(
                    num_environments, len(scenes)
                )
            )

        random.shuffle(scenes)

    scene_splits: List[List[str]] = [[] for _ in range(num_environments)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_environments):
        proc_config = config.clone()
        proc_config.defrost()

        task_config = proc_config.TASK_CONFIG
        task_config.SEED = task_config.SEED + i
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
            config.SIMULATOR_GPU_ID
        )

        task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS

        proc_config.freeze()
        configs.append(proc_config)

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(zip(configs, env_classes)),
        workers_ignore_signals=workers_ignore_signals,
    )
    return envs
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--model-path", type=str, required=True)
    parser.add_argument("--sim-gpu-id", type=int, required=True)
    parser.add_argument("--pth-gpu-id", type=int, required=True)
    parser.add_argument("--num-processes", type=int, required=True)
    parser.add_argument("--hidden-size", type=int, default=512)
    parser.add_argument("--count-test-episodes", type=int, default=100)
    parser.add_argument(
        "--sensors",
        type=str,
        default="DEPTH_SENSOR",
        help="comma separated string containing different"
        "sensors to use, currently 'RGB_SENSOR' and"
        "'DEPTH_SENSOR' are supported",
    )
    parser.add_argument(
        "--task-config",
        type=str,
        default="configs/tasks/pointnav.yaml",
        help="path to config yaml containing information about task",
    )

    cmd_line_inputs = [
        "--model-path",
        "/home/bruce/NSERC_2019/habitat-api/data/checkpoints/depth.pth",
        "--sim-gpu-id",
        "0",
        "--pth-gpu-id",
        "0",
        "--num-processes",
        "1",
        "--count-test-episodes",
        "100",
        "--task-config",
        "configs/tasks/pointnav.yaml",
    ]
    args = parser.parse_args(cmd_line_inputs)

    device = torch.device("cuda:{}".format(args.pth_gpu_id))

    env_configs = []
    baseline_configs = []

    for _ in range(args.num_processes):
        config_env = get_config(config_paths=args.task_config)
        config_env.defrost()
        config_env.DATASET.SPLIT = "val"

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

    assert len(baseline_configs) > 0, "empty list of datasets"

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(env_configs, baseline_configs, range(args.num_processes)))
        ),
    )

    ckpt = torch.load(args.model_path, map_location=device)

    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=512,
        goal_sensor_uuid="pointgoal",
    )
    actor_critic.to(device)

    ppo = PPO(
        actor_critic=actor_critic,
        clip_param=0.1,
        ppo_epoch=4,
        num_mini_batch=32,
        value_loss_coef=0.5,
        entropy_coef=0.01,
        lr=2.5e-4,
        eps=1e-5,
        max_grad_norm=0.5,
    )

    ppo.load_state_dict(ckpt["state_dict"])

    actor_critic = ppo.actor_critic

    observations = envs.reset()
    batch = batch_obs(observations)
    for sensor in batch:
        batch[sensor] = batch[sensor].to(device)

    test_recurrent_hidden_states = torch.zeros(
        args.num_processes, args.hidden_size, device=device
    )
    not_done_masks = torch.zeros(args.num_processes, 1, device=device)

    def transform_callback(data):
        nonlocal actor_critic
        nonlocal batch
        nonlocal not_done_masks
        nonlocal test_recurrent_hidden_states
        global flag
        global t_prev_update
        global observation

        if flag == 2:
            observation["depth"] = np.reshape(data.data[0:-2], (256, 256, 1))
            observation["pointgoal"] = data.data[-2:]
            flag = 1
            return

        pointgoal_received = data.data[-2:]
        translate_amount = 0.25  # meters
        rotate_amount = 0.174533  # radians

        isrotated = (
            rotate_amount * 0.95
            <= abs(pointgoal_received[1] - observation["pointgoal"][1])
            <= rotate_amount * 1.05
        )
        istimeup = (time.time() - t_prev_update) >= 4

        # print('istranslated is '+ str(istranslated))
        # print('isrotated is '+ str(isrotated))
        # print('istimeup is '+ str(istimeup))

        if isrotated or istimeup:
            vel_msg = Twist()
            vel_msg.linear.x = 0
            vel_msg.linear.y = 0
            vel_msg.linear.z = 0
            vel_msg.angular.x = 0
            vel_msg.angular.y = 0
            vel_msg.angular.z = 0
            pub_vel.publish(vel_msg)
            time.sleep(0.2)
            print("entered update step")

            # cv2.imshow("Depth", observation['depth'])
            # cv2.waitKey(100)

            observation["depth"] = np.reshape(data.data[0:-2], (256, 256, 1))
            observation["pointgoal"] = data.data[-2:]

            batch = batch_obs([observation])
            for sensor in batch:
                batch[sensor] = batch[sensor].to(device)
            if flag == 1:
                not_done_masks = torch.tensor([0.0], dtype=torch.float, device=device)
                flag = 0
            else:
                not_done_masks = torch.tensor([1.0], dtype=torch.float, device=device)

            _, actions, _, test_recurrent_hidden_states = actor_critic.act(
                batch, test_recurrent_hidden_states, not_done_masks, deterministic=True
            )

            action_id = actions.item()
            print(
                "observation received to produce action_id is "
                + str(observation["pointgoal"])
            )
            print("action_id from net is " + str(actions.item()))

            t_prev_update = time.time()
            vel_msg = Twist()
            vel_msg.linear.x = 0
            vel_msg.linear.y = 0
            vel_msg.linear.z = 0
            vel_msg.angular.x = 0
            vel_msg.angular.y = 0
            vel_msg.angular.z = 0
            if action_id == 0:
                vel_msg.linear.x = 0.25 / 4
                pub_vel.publish(vel_msg)
            elif action_id == 1:
                vel_msg.angular.z = 10 / 180 * 3.1415926
                pub_vel.publish(vel_msg)
            elif action_id == 2:
                vel_msg.angular.z = -10 / 180 * 3.1415926
                pub_vel.publish(vel_msg)
            else:
                pub_vel.publish(vel_msg)
                sub.unregister()
                print("NN finished navigation task")

    sub = rospy.Subscriber(
        "depth_and_pointgoal", numpy_msg(Floats), transform_callback, queue_size=1
    )
    rospy.spin()
示例#19
0
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0):
    env_configs = []
    baseline_configs = []
    device = torch.device("cuda", args.pth_gpu_id)

    for _ in range(args.num_processes):
        config_env = get_config(config_paths=args.task_config)
        config_env.defrost()
        config_env.DATASET.SPLIT = "val"

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        if args.video_option:
            config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config_env.TASK.MEASUREMENTS.append("COLLISIONS")
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

    assert len(baseline_configs) > 0, "empty list of datasets"

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(zip(env_configs, baseline_configs,
                      range(args.num_processes)))),
    )

    ckpt = torch.load(checkpoint_path, map_location=device)

    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=512,
        goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID,
    )
    actor_critic.to(device)

    ppo = PPO(
        actor_critic=actor_critic,
        clip_param=0.1,
        ppo_epoch=4,
        num_mini_batch=32,
        value_loss_coef=0.5,
        entropy_coef=0.01,
        lr=2.5e-4,
        eps=1e-5,
        max_grad_norm=0.5,
    )

    ppo.load_state_dict(ckpt["state_dict"])

    actor_critic = ppo.actor_critic

    observations = envs.reset()
    batch = batch_obs(observations)
    for sensor in batch:
        batch[sensor] = batch[sensor].to(device)

    current_episode_reward = torch.zeros(envs.num_envs, 1, device=device)

    test_recurrent_hidden_states = torch.zeros(args.num_processes,
                                               args.hidden_size,
                                               device=device)
    not_done_masks = torch.zeros(args.num_processes, 1, device=device)
    stats_episodes = dict()  # dict of dicts that stores stats per episode

    while episode_counts.sum() < args.count_test_episodes:
        # test_recurrent_hidden_states_list.append(test_recurrent_hidden_states)
        # pickle_out = open("hab_recurrent_states.pickle","wb")
        # pickle.dump(test_recurrent_hidden_states_list, pickle_out)
        # pickle_out.close()
        # obs_list.append(observations[0])
        # pickle_out = open("hab_obs_list.pickle","wb")
        # pickle.dump(obs_list, pickle_out)
        # pickle_out.close()

        # mask_list.append(not_done_masks)
        # pickle_out = open("hab_mask_list.pickle","wb")
        # pickle.dump(mask_list, pickle_out)
        # pickle_out.close()

        with torch.no_grad():
            _, actions, _, test_recurrent_hidden_states = actor_critic.act(
                batch,
                test_recurrent_hidden_states,
                not_done_masks,
                deterministic=True,
            )

        print("action_id is " + str(actions.item()))
        print('point goal is ' + str(observations[0]['pointgoal']))

        outputs = envs.step([a[0].item() for a in actions])

        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]

        #for visualizing where robot is going
        #cv2.imshow("RGB", transform_rgb_bgr(observations[0]["rgb"]))
        cv2.imshow("Depth", observations[0]["depth"])
        cv2.waitKey(100)
        time.sleep(0.2)

        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(device)

        not_done_masks = torch.tensor(
            [[0.0] if done else [1.0] for done in dones],
            dtype=torch.float,
            device=device,
        )

        rewards = torch.tensor(rewards, dtype=torch.float,
                               device=device).unsqueeze(1)
        current_episode_reward += rewards
        next_episodes = envs.current_episodes()
        envs_to_pause = []
        n_envs = envs.num_envs
        for i in range(n_envs):
            if (
                    next_episodes[i].scene_id,
                    next_episodes[i].episode_id,
            ) in stats_episodes:
                envs_to_pause.append(i)

            # episode ended
            if not_done_masks[i].item() == 0:
                episode_stats = dict()
                episode_stats["spl"] = infos[i]["spl"]
                episode_stats["success"] = int(infos[i]["spl"] > 0)
                episode_stats["reward"] = current_episode_reward[i].item()
                current_episode_reward[i] = 0
                # use scene_id + episode_id as unique id for storing stats
                stats_episodes[(
                    current_episodes[i].scene_id,
                    current_episodes[i].episode_id,
                )] = episode_stats
                if args.video_option:
                    generate_video(
                        args,
                        rgb_frames[i],
                        current_episodes[i].episode_id,
                        cur_ckpt_idx,
                        infos[i]["spl"],
                        writer,
                    )
                    rgb_frames[i] = []

            # episode continues
            elif args.video_option:
                frame = observations_to_image(observations[i], infos[i])
                rgb_frames[i].append(frame)

        # pausing envs with no new episode
        if len(envs_to_pause) > 0:
            state_index = list(range(envs.num_envs))
            for idx in reversed(envs_to_pause):
                state_index.pop(idx)
                envs.pause_at(idx)

            # indexing along the batch dimensions
            test_recurrent_hidden_states = test_recurrent_hidden_states[
                state_index]
            not_done_masks = not_done_masks[state_index]
            current_episode_reward = current_episode_reward[state_index]

            for k, v in batch.items():
                batch[k] = v[state_index]

            if args.video_option:
                rgb_frames = [rgb_frames[i] for i in state_index]

    aggregated_stats = dict()
    for stat_key in next(iter(stats_episodes.values())).keys():
        aggregated_stats[stat_key] = sum(
            [v[stat_key] for v in stats_episodes.values()])
    num_episodes = len(stats_episodes)

    episode_reward_mean = aggregated_stats["reward"] / num_episodes
    episode_spl_mean = aggregated_stats["spl"] / num_episodes
    episode_success_mean = aggregated_stats["success"] / num_episodes

    logger.info("Average episode reward: {:.6f}".format(episode_reward_mean))
    logger.info("Average episode success: {:.6f}".format(episode_success_mean))
    logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean))

    writer.add_scalars("eval_reward", {"average reward": episode_reward_mean},
                       cur_ckpt_idx)
    writer.add_scalars("eval_SPL", {"average SPL": episode_spl_mean},
                       cur_ckpt_idx)
    writer.add_scalars("eval_success",
                       {"average success": episode_success_mean}, cur_ckpt_idx)
def construct_envs(config: Config, env_class: Type[Union[Env,
                                                         RLEnv]]) -> VectorEnv:
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
        env_class: class type of the envs to be created.

    Returns:
        VectorEnv object created according to specification.
    """

    num_processes = config.NUM_PROCESSES
    configs = []
    env_classes = [env_class for _ in range(num_processes)]
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = config.TASK_CONFIG.DATASET.CONTENT_SCENES
    if "*" in config.TASK_CONFIG.DATASET.CONTENT_SCENES:
        scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    print("************************************* scenes lens:", len(scenes))
    print("************************************* num_processes lens:",
          num_processes)
    print("************************************* env_classes:", env_class)

    if num_processes > 1:
        if len(scenes) == 0:
            raise RuntimeError(
                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
            )

        if len(scenes) < num_processes:
            raise RuntimeError("reduce the number of processes as there "
                               "aren't enough number of scenes")

        random.shuffle(scenes)

    scene_splits = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_processes):
        proc_config = config.clone()
        proc_config.defrost()

        task_config = proc_config.TASK_CONFIG
        task_config.SEED = task_config.SEED + i
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
            config.SIMULATOR_GPU_ID)

        task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS

        proc_config.freeze()
        configs.append(proc_config)

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(tuple(zip(configs, env_classes))),
    )
    print("************************************* envs type:", type(envs))
    print("************************************* count_episodes:",
          (envs.count_episodes()))
    # print("************************************* current_episodes:", (envs.current_episodes()))

    return envs
示例#21
0
    agent_sensors = "RGB_SENSOR,DEPTH_SENSOR".strip().split(",")
    for sensor in agent_sensors:
        assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
    config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
    config_env.freeze()
    env_configs.append(config_env)

    config_baseline = cfg_baseline()
    baseline_configs.append(config_baseline)

assert len(baseline_configs) > 0, "empty list of datasets"

envs = habitat.VectorEnv(
    make_env_fn=make_env_fn,
    env_fn_args=tuple(
        tuple(
            zip(env_configs, baseline_configs, range(1))
        )
    ),
)

ckpt = torch.load("/home/bruce/NSERC_2019/habitat-api/data/checkpoints/ckpt.2.pth", map_location=device)

actor_critic = Policy(
    observation_space=envs.observation_spaces[0],
    action_space=envs.action_spaces[0],
    hidden_size=512,
)
actor_critic.to(device)

ppo = PPO(
    actor_critic=actor_critic,
示例#22
0
def eval_checkpoint(checkpoint_path, args, writer, cur_ckpt_idx=0):
    env_configs = []
    baseline_configs = []
    device = torch.device("cuda", args.pth_gpu_id)

    for _ in range(args.num_processes):
        config_env = get_config(config_paths=args.task_config)
        config_env.defrost()
        config_env.DATASET.SPLIT = "val"

        agent_sensors = args.sensors.strip().split(",")
        for sensor in agent_sensors:
            assert sensor in ["RGB_SENSOR", "DEPTH_SENSOR"]
        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        if args.video_option:
            config_env.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
            config_env.TASK.MEASUREMENTS.append("COLLISIONS")
        config_env.freeze()
        env_configs.append(config_env)

        config_baseline = cfg_baseline()
        baseline_configs.append(config_baseline)

    assert len(baseline_configs) > 0, "empty list of datasets"

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(env_configs, baseline_configs, range(args.num_processes))
            )
        ),
    )

    ckpt = torch.load(checkpoint_path, map_location=device)

    actor_critic = Policy(
        observation_space=envs.observation_spaces[0],
        action_space=envs.action_spaces[0],
        hidden_size=512,
        goal_sensor_uuid=env_configs[0].TASK.GOAL_SENSOR_UUID,
    )
    actor_critic.to(device)

    ppo = PPO(
        actor_critic=actor_critic,
        clip_param=0.1,
        ppo_epoch=4,
        num_mini_batch=32,
        value_loss_coef=0.5,
        entropy_coef=0.01,
        lr=2.5e-4,
        eps=1e-5,
        max_grad_norm=0.5,
    )

    ppo.load_state_dict(ckpt["state_dict"])

    actor_critic = ppo.actor_critic

    observations = envs.reset()
    batch = batch_obs(observations)
    for sensor in batch:
        batch[sensor] = batch[sensor].to(device)

    episode_rewards = torch.zeros(envs.num_envs, 1, device=device)
    episode_spls = torch.zeros(envs.num_envs, 1, device=device)
    episode_success = torch.zeros(envs.num_envs, 1, device=device)
    episode_counts = torch.zeros(envs.num_envs, 1, device=device)
    current_episode_reward = torch.zeros(envs.num_envs, 1, device=device)

    test_recurrent_hidden_states = torch.zeros(
        args.num_processes, args.hidden_size, device=device
    )
    not_done_masks = torch.zeros(args.num_processes, 1, device=device)
    stats_episodes = set()

    rgb_frames = None
    if args.video_option:
        rgb_frames = [[]] * args.num_processes
        os.makedirs(args.video_dir, exist_ok=True)

    while episode_counts.sum() < args.count_test_episodes:
        current_episodes = envs.current_episodes()

        with torch.no_grad():
            _, actions, _, test_recurrent_hidden_states = actor_critic.act(
                batch,
                test_recurrent_hidden_states,
                not_done_masks,
                deterministic=False,
            )

        outputs = envs.step([a[0].item() for a in actions])

        observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
        batch = batch_obs(observations)
        for sensor in batch:
            batch[sensor] = batch[sensor].to(device)

        not_done_masks = torch.tensor(
            [[0.0] if done else [1.0] for done in dones],
            dtype=torch.float,
            device=device,
        )

        for i in range(not_done_masks.shape[0]):
            if not_done_masks[i].item() == 0:
                episode_spls[i] += infos[i]["spl"]
                if infos[i]["spl"] > 0:
                    episode_success[i] += 1

        rewards = torch.tensor(
            rewards, dtype=torch.float, device=device
        ).unsqueeze(1)
        current_episode_reward += rewards
        episode_rewards += (1 - not_done_masks) * current_episode_reward
        episode_counts += 1 - not_done_masks
        current_episode_reward *= not_done_masks

        next_episodes = envs.current_episodes()
        envs_to_pause = []
        n_envs = envs.num_envs
        for i in range(n_envs):
            if next_episodes[i].episode_id in stats_episodes:
                envs_to_pause.append(i)

            # episode ended
            if not_done_masks[i].item() == 0:
                stats_episodes.add(current_episodes[i].episode_id)
                if args.video_option:
                    generate_video(
                        args,
                        rgb_frames[i],
                        current_episodes[i].episode_id,
                        cur_ckpt_idx,
                        infos[i]["spl"],
                        writer,
                    )
                    rgb_frames[i] = []

            # episode continues
            elif args.video_option:
                frame = observations_to_image(observations[i], infos[i])
                rgb_frames[i].append(frame)

        # stop tracking ended episodes if they exist
        if len(envs_to_pause) > 0:
            state_index = list(range(envs.num_envs))
            for idx in reversed(envs_to_pause):
                state_index.pop(idx)
                envs.pause_at(idx)

            # indexing along the batch dimensions
            test_recurrent_hidden_states = test_recurrent_hidden_states[
                :, state_index
            ]
            not_done_masks = not_done_masks[state_index]
            current_episode_reward = current_episode_reward[state_index]

            for k, v in batch.items():
                batch[k] = v[state_index]

            if args.video_option:
                rgb_frames = [rgb_frames[i] for i in state_index]

    episode_reward_mean = (episode_rewards / episode_counts).mean().item()
    episode_spl_mean = (episode_spls / episode_counts).mean().item()
    episode_success_mean = (episode_success / episode_counts).mean().item()

    logger.info("Average episode reward: {:.6f}".format(episode_reward_mean))
    logger.info("Average episode success: {:.6f}".format(episode_success_mean))
    logger.info("Average episode SPL: {:.6f}".format(episode_spl_mean))

    writer.add_scalars(
        "eval_reward", {"average reward": episode_reward_mean}, cur_ckpt_idx
    )
    writer.add_scalars(
        "eval_SPL", {"average SPL": episode_spl_mean}, cur_ckpt_idx
    )
    writer.add_scalars(
        "eval_success", {"average success": episode_success_mean}, cur_ckpt_idx
    )
示例#23
0
def construct_envs(config: Config,
                   env_class: Type[Union[Env, RLEnv]],
                   auto_reset_done: bool = True) -> VectorEnv:
    r"""Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.

    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
        env_class: class type of the envs to be created.
        auto_reset_done: Whether or not to automatically reset the env on done

    Returns:
        VectorEnv object created according to specification.
    """

    num_processes = config.NUM_PROCESSES
    configs = []
    env_classes = [env_class for _ in range(num_processes)]
    dataset = make_dataset(config.TASK_CONFIG.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(config.TASK_CONFIG.DATASET)

    if num_processes > 1:
        if len(scenes) == 0:
            raise RuntimeError(
                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
            )

        if len(scenes) < num_processes:
            raise RuntimeError("reduce the number of processes as there "
                               "aren't enough number of scenes")

        random.shuffle(scenes)

    scene_splits = [[] for _ in range(num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(num_processes):
        new_config = config.clone()
        task_config = new_config.TASK_CONFIG.clone()
        task_config.defrost()
        if len(scenes) > 0:
            task_config.DATASET.CONTENT_SCENES = scene_splits[i]

        task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
            config.SIMULATOR_GPU_ID[i % len(config.SIMULATOR_GPU_ID)])
        task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS
        task_config.freeze()

        new_config.defrost()
        new_config.TASK_CONFIG = task_config
        new_config.freeze()
        configs.append(new_config)

    # for i in range(num_processes):
    #     proc_config = config.clone()
    #     proc_config.defrost()

    #     task_config = proc_config.TASK_CONFIG
    #     if len(scenes) > 0:
    #         task_config.DATASET.CONTENT_SCENES = scene_splits[i]

    #     task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
    #         config.SIMULATOR_GPU_ID[i % len(config.SIMULATOR_GPU_ID)]
    #     )
    #     # task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_ID

    #     task_config.SIMULATOR.AGENT_0.SENSORS = config.SENSORS

    #     proc_config.freeze()
    #     configs.append(proc_config)

    for config in configs:
        logger.info(
            f"[construct_envs] Using GPU ID {config.TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID}"
        )
    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(tuple(zip(configs, env_classes))),
        auto_reset_done=auto_reset_done,
    )
    return envs
示例#24
0
def construct_envs(config: Config, env_class: Type) -> VectorEnv:
    r"""
    Create VectorEnv object with specified config and env class type.
    To allow better performance, dataset are split into small ones for
    each individual env, grouped by scenes.
    Args:
        config: configs that contain num_processes as well as information
        necessary to create individual environments.
        env_class: class type of the envs to be created.

    Returns:
        VectorEnv object created according to specification.
    """
    trainer_config = config.TRAINER.RL.PPO
    rl_env_config = config.TRAINER.RL
    task_config = config.TASK_CONFIG  # excluding trainer-specific configs
    env_configs, rl_env_configs = [], []
    env_classes = [env_class for _ in range(trainer_config.num_processes)]
    dataset = make_dataset(task_config.DATASET.TYPE)
    scenes = dataset.get_scenes_to_load(task_config.DATASET)

    if len(scenes) > 0:
        random.shuffle(scenes)

        assert len(scenes) >= trainer_config.num_processes, (
            "reduce the number of processes as there "
            "aren't enough number of scenes")

    scene_splits = [[] for _ in range(trainer_config.num_processes)]
    for idx, scene in enumerate(scenes):
        scene_splits[idx % len(scene_splits)].append(scene)

    assert sum(map(len, scene_splits)) == len(scenes)

    for i in range(trainer_config.num_processes):

        env_config = task_config.clone()
        env_config.defrost()
        if len(scenes) > 0:
            env_config.DATASET.CONTENT_SCENES = scene_splits[i]

        env_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
            trainer_config.sim_gpu_id)

        agent_sensors = trainer_config.sensors.strip().split(",")
        env_config.SIMULATOR.AGENT_0.SENSORS = agent_sensors
        env_config.freeze()
        env_configs.append(env_config)
        rl_env_configs.append(rl_env_config)

    envs = habitat.VectorEnv(
        make_env_fn=make_env_fn,
        env_fn_args=tuple(
            tuple(
                zip(
                    env_configs,
                    rl_env_configs,
                    env_classes,
                    range(trainer_config.num_processes),
                ))),
    )
    return envs