Пример #1
0
 def test_callbacks(self):
     for fw in framework_iterator(frameworks=("torch", "tf")):
         counts = Counter()
         pg = PG(
             env="CartPole-v0",
             config={
                 "num_workers": 0,
                 "rollout_fragment_length": 50,
                 "train_batch_size": 50,
                 "callbacks": {
                     "on_episode_start":
                     lambda x: counts.update({"start": 1}),
                     "on_episode_step":
                     lambda x: counts.update({"step": 1}),
                     "on_episode_end": lambda x: counts.update({"end": 1}),
                     "on_sample_end":
                     lambda x: counts.update({"sample": 1}),
                 },
                 "framework": fw,
             },
         )
         pg.train()
         pg.train()
         self.assertGreater(counts["sample"], 0)
         self.assertGreater(counts["start"], 0)
         self.assertGreater(counts["end"], 0)
         self.assertGreater(counts["step"], 0)
         pg.stop()
Пример #2
0
    def test_nested_action_spaces(self):
        config = DEFAULT_CONFIG.copy()
        config["env"] = RandomEnv
        # Write output to check, whether actions are written correctly.
        tmp_dir = os.popen("mktemp -d").read()[:-1]
        if not os.path.exists(tmp_dir):
            # Last resort: Resolve via underlying tempdir (and cut tmp_.
            tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]
            assert os.path.exists(tmp_dir), f"'{tmp_dir}' not found!"
        config["output"] = tmp_dir
        # Switch off OPE as we don't write action-probs.
        # TODO: We should probably always write those if `output` is given.
        config["off_policy_estimation_methods"] = {}

        # Pretend actions in offline files are already normalized.
        config["actions_in_input_normalized"] = True

        for _ in framework_iterator(config):
            for name, action_space in SPACES.items():
                config["env_config"] = {
                    "action_space": action_space,
                }
                for flatten in [True, False]:
                    print(f"A={action_space} flatten={flatten}")
                    shutil.rmtree(config["output"])
                    config["_disable_action_flattening"] = not flatten
                    trainer = PG(config)
                    trainer.train()
                    trainer.stop()

                    # Check actions in output file (whether properly flattened
                    # or not).
                    reader = JsonReader(
                        inputs=config["output"],
                        ioctx=trainer.workers.local_worker().io_context,
                    )
                    sample_batch = reader.next()
                    if flatten:
                        assert isinstance(sample_batch["actions"], np.ndarray)
                        assert len(sample_batch["actions"].shape) == 2
                        assert sample_batch["actions"].shape[0] == len(
                            sample_batch)
                    else:
                        tree.assert_same_structure(
                            trainer.get_policy().action_space_struct,
                            sample_batch["actions"],
                        )

                    # Test, whether offline data can be properly read by a
                    # BCTrainer, configured accordingly.
                    config["input"] = config["output"]
                    del config["output"]
                    bc_trainer = BC(config=config)
                    bc_trainer.train()
                    bc_trainer.stop()
                    config["output"] = tmp_dir
                    config["input"] = "sampler"
Пример #3
0
    def test_local(self):
        cf = DEFAULT_CONFIG.copy()
        cf["model"]["fcnet_hiddens"] = [10]
        cf["num_workers"] = 2

        for _ in framework_iterator(cf):
            agent = PG(cf, "CartPole-v0")
            print(agent.train())
            agent.stop()
Пример #4
0
    def test_multi_agent(self):
        register_env(
            "multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 10})
        )

        for fw in framework_iterator():
            pg = PG(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "output": self.test_dir,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn": (
                            lambda aid, **kwargs: random.choice(
                                ["policy_1", "policy_2"]
                            )
                        ),
                    },
                    "framework": fw,
                },
            )
            pg.train()
            self.assertEqual(len(os.listdir(self.test_dir)), 1)

            pg.stop()
            pg = PG(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "input": self.test_dir,
                    "off_policy_estimation_methods": {
                        "simulation": {"type": "simulation"}
                    },
                    "train_batch_size": 2000,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn": (
                            lambda aid, **kwargs: random.choice(
                                ["policy_1", "policy_2"]
                            )
                        ),
                    },
                    "framework": fw,
                },
            )
            for _ in range(50):
                result = pg.train()
                if not np.isnan(result["episode_reward_mean"]):
                    return  # simulation ok
                time.sleep(0.1)
            assert False, "did not see any simulation results"
Пример #5
0
    def test_multi_agent(self):
        register_env("multi_agent_cartpole",
                     lambda _: MultiAgentCartPole({"num_agents": 10}))

        for fw in framework_iterator():
            pg = PG(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "output": self.test_dir + fw,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn":
                        (lambda aid, **kwargs: random.choice(
                            ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                },
            )
            pg.train()
            self.assertEqual(len(os.listdir(self.test_dir + fw)), 1)
            pg.stop()
            pg = PG(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "input": self.test_dir + fw,
                    "train_batch_size": 2000,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn":
                        (lambda aid, **kwargs: random.choice(
                            ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                    "evaluation_interval": 1,
                    "evaluation_config": {
                        "input": "sampler"
                    },
                },
            )
            result = pg.train()
            assert np.isnan(
                result["episode_reward_mean"]
            ), "episode reward should not be computed for offline data"
            assert not np.isnan(
                result["evaluation"]["episode_reward_mean"]
            ), "Did not see simulation results during evaluation"
Пример #6
0
 def test_query_evaluators(self):
     register_env("test", lambda _: gym.make("CartPole-v0"))
     for fw in framework_iterator(frameworks=("torch", "tf")):
         pg = PG(
             env="test",
             config={
                 "num_workers": 2,
                 "rollout_fragment_length": 5,
                 "num_envs_per_worker": 2,
                 "framework": fw,
                 "create_env_on_driver": True,
             },
         )
         results = pg.workers.foreach_worker(
             lambda ev: ev.rollout_fragment_length)
         results2 = pg.workers.foreach_worker_with_index(
             lambda ev, i: (i, ev.rollout_fragment_length))
         results3 = pg.workers.foreach_worker(
             lambda ev: ev.foreach_env(lambda env: 1))
         self.assertEqual(results, [10, 10, 10])
         self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
         self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
         pg.stop()