Exemplo n.º 1
0
 def test_callbacks(self):
     for fw in framework_iterator(frameworks=("torch", "tf")):
         counts = Counter()
         pg = PGTrainer(
             env="CartPole-v0", config={
                 "num_workers": 0,
                 "rollout_fragment_length": 50,
                 "train_batch_size": 50,
                 "callbacks": {
                     "on_episode_start":
                         lambda x: counts.update({"start": 1}),
                     "on_episode_step":
                         lambda x: counts.update({"step": 1}),
                     "on_episode_end": lambda x: counts.update({"end": 1}),
                     "on_sample_end":
                         lambda x: counts.update({"sample": 1}),
                 },
                 "framework": fw,
             })
         pg.train()
         pg.train()
         self.assertGreater(counts["sample"], 0)
         self.assertGreater(counts["start"], 0)
         self.assertGreater(counts["end"], 0)
         self.assertGreater(counts["step"], 0)
         pg.stop()
Exemplo n.º 2
0
    def test_gpus_in_local_mode(self):
        # Local mode.
        ray.init(num_gpus=8, local_mode=True)

        actual_gpus_available = torch.cuda.device_count()

        config = DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["env"] = "CartPole-v0"

        # Expect no errors in local mode.
        for num_gpus in [0, 0.1, 1, actual_gpus_available + 4]:
            print(f"num_gpus={num_gpus}")
            for fake_gpus in [False, True]:
                print(f"_fake_gpus={fake_gpus}")
                config["num_gpus"] = num_gpus
                config["_fake_gpus"] = fake_gpus
                frameworks = ("tf", "torch") if num_gpus > 1 else \
                    ("tf2", "tf", "torch")
                for _ in framework_iterator(config, frameworks=frameworks):
                    print("direct RLlib")
                    trainer = PGTrainer(config, env="CartPole-v0")
                    trainer.stop()
                    print("via ray.tune.run()")
                    tune.run("PG",
                             config=config,
                             stop={"training_iteration": 0})
        ray.shutdown()
Exemplo n.º 3
0
    def test_gpus_in_non_local_mode(self):
        # Non-local mode.
        ray.init(num_cpus=8)

        actual_gpus = torch.cuda.device_count()
        print(f"Actual GPUs found (by torch): {actual_gpus}")

        config = DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["env"] = "CartPole-v0"

        # Expect errors when we run a config w/ num_gpus>0 w/o a GPU
        # and _fake_gpus=False.
        for num_gpus in [0, 0.1, 1, actual_gpus + 4]:
            # Only allow possible num_gpus_per_worker (so test would not
            # block infinitely due to a down worker).
            per_worker = [0] if actual_gpus == 0 or actual_gpus < num_gpus \
                else [0, 0.5, 1]
            for num_gpus_per_worker in per_worker:
                for fake_gpus in [False] + ([] if num_gpus == 0 else [True]):
                    config["num_gpus"] = num_gpus
                    config["num_gpus_per_worker"] = num_gpus_per_worker
                    config["_fake_gpus"] = fake_gpus

                    print(f"\n------------\nnum_gpus={num_gpus} "
                          f"num_gpus_per_worker={num_gpus_per_worker} "
                          f"_fake_gpus={fake_gpus}")

                    frameworks = ("tf", "torch") if num_gpus > 1 else \
                        ("tf2", "tf", "torch")
                    for _ in framework_iterator(config, frameworks=frameworks):
                        # Expect that trainer creation causes a num_gpu error.
                        if actual_gpus < num_gpus + 2 * num_gpus_per_worker \
                                and not fake_gpus:
                            # "Direct" RLlib (create Trainer on the driver).
                            # Cannot run through ray.tune.run() as it would
                            # simply wait infinitely for the resources to
                            # become available.
                            print("direct RLlib")
                            self.assertRaisesRegex(
                                RuntimeError,
                                "Found 0 GPUs on your machine",
                                lambda: PGTrainer(config, env="CartPole-v0"),
                            )
                        # If actual_gpus >= num_gpus or faked,
                        # expect no error.
                        else:
                            print("direct RLlib")
                            trainer = PGTrainer(config, env="CartPole-v0")
                            trainer.stop()
                            # Cannot run through ray.tune.run() w/ fake GPUs
                            # as it would simply wait infinitely for the
                            # resources to become available (even though, we
                            # wouldn't really need them).
                            if num_gpus == 0:
                                print("via ray.tune.run()")
                                tune.run("PG",
                                         config=config,
                                         stop={"training_iteration": 0})
        ray.shutdown()
Exemplo n.º 4
0
    def test_local(self):
        cf = DEFAULT_CONFIG.copy()
        cf["model"]["fcnet_hiddens"] = [10]

        for _ in framework_iterator(cf):
            agent = PGTrainer(cf, "CartPole-v0")
            print(agent.train())
            agent.stop()
Exemplo n.º 5
0
    def test_nested_action_spaces(self):
        config = DEFAULT_CONFIG.copy()
        config["env"] = RandomEnv
        # Write output to check, whether actions are written correctly.
        tmp_dir = os.popen("mktemp -d").read()[:-1]
        if not os.path.exists(tmp_dir):
            # Last resort: Resolve via underlying tempdir (and cut tmp_.
            tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]
            assert os.path.exists(tmp_dir), f"'{tmp_dir}' not found!"
        config["output"] = tmp_dir
        # Switch off OPE as we don't write action-probs.
        # TODO: We should probably always write those if `output` is given.
        config["input_evaluation"] = []

        # Pretend actions in offline files are already normalized.
        config["actions_in_input_normalized"] = True

        for _ in framework_iterator(config):
            for name, action_space in SPACES.items():
                config["env_config"] = {
                    "action_space": action_space,
                }
                for flatten in [False, True]:
                    print(f"A={action_space} flatten={flatten}")
                    shutil.rmtree(config["output"])
                    config["_disable_action_flattening"] = not flatten
                    trainer = PGTrainer(config)
                    trainer.train()
                    trainer.stop()

                    # Check actions in output file (whether properly flattened
                    # or not).
                    reader = JsonReader(
                        inputs=config["output"],
                        ioctx=trainer.workers.local_worker().io_context,
                    )
                    sample_batch = reader.next()
                    if flatten:
                        assert isinstance(sample_batch["actions"], np.ndarray)
                        assert len(sample_batch["actions"].shape) == 2
                        assert sample_batch["actions"].shape[0] == len(
                            sample_batch)
                    else:
                        tree.assert_same_structure(
                            trainer.get_policy().action_space_struct,
                            sample_batch["actions"],
                        )

                    # Test, whether offline data can be properly read by a
                    # BCTrainer, configured accordingly.
                    config["input"] = config["output"]
                    del config["output"]
                    bc_trainer = BCTrainer(config=config)
                    bc_trainer.train()
                    bc_trainer.stop()
                    config["output"] = tmp_dir
                    config["input"] = "sampler"
Exemplo n.º 6
0
 def test_no_step_on_init(self):
     register_env("fail", lambda _: FailOnStepEnv())
     for fw in framework_iterator(frameworks=()):
         pg = PGTrainer(
             env="fail", config={
                 "num_workers": 1,
                 "framework": fw,
             })
         self.assertRaises(Exception, lambda: pg.train())
         pg.stop()
Exemplo n.º 7
0
    def testMultiAgent(self):
        register_env("multi_agent_cartpole",
                     lambda _: MultiAgentCartPole({"num_agents": 10}))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGTFPolicy, obs_space, act_space, {})

        for fw in framework_iterator():
            pg = PGTrainer(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "output": self.test_dir,
                    "multiagent": {
                        "policies": {
                            "policy_1": gen_policy(),
                            "policy_2": gen_policy(),
                        },
                        "policy_mapping_fn": (
                            lambda agent_id: random.choice(
                                ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                })
            pg.train()
            self.assertEqual(len(os.listdir(self.test_dir)), 1)

            pg.stop()
            pg = PGTrainer(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "input": self.test_dir,
                    "input_evaluation": ["simulation"],
                    "train_batch_size": 2000,
                    "multiagent": {
                        "policies": {
                            "policy_1": gen_policy(),
                            "policy_2": gen_policy(),
                        },
                        "policy_mapping_fn": (
                            lambda agent_id: random.choice(
                                ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                })
            for _ in range(50):
                result = pg.train()
                if not np.isnan(result["episode_reward_mean"]):
                    return  # simulation ok
                time.sleep(0.1)
            assert False, "did not see any simulation results"
Exemplo n.º 8
0
    def test_rollout_dict_space(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGTrainer(env="nested", config={"framework": "tf"})
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGTrainer(env="nested", config={"framework": "tf"})
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)
Exemplo n.º 9
0
    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGTrainer(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGTrainer(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)
Exemplo n.º 10
0
    def test_multi_agent(self):
        register_env("multi_agent_cartpole",
                     lambda _: MultiAgentCartPole({"num_agents": 10}))

        for fw in framework_iterator():
            pg = PGTrainer(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "output": self.test_dir,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn":
                        (lambda aid, **kwargs: random.choice(
                            ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                },
            )
            pg.train()
            self.assertEqual(len(os.listdir(self.test_dir)), 1)

            pg.stop()
            pg = PGTrainer(
                env="multi_agent_cartpole",
                config={
                    "num_workers": 0,
                    "input": self.test_dir,
                    "input_evaluation": ["simulation"],
                    "train_batch_size": 2000,
                    "multiagent": {
                        "policies": {"policy_1", "policy_2"},
                        "policy_mapping_fn":
                        (lambda aid, **kwargs: random.choice(
                            ["policy_1", "policy_2"])),
                    },
                    "framework": fw,
                },
            )
            for _ in range(50):
                result = pg.train()
                if not np.isnan(result["episode_reward_mean"]):
                    return  # simulation ok
                time.sleep(0.1)
            assert False, "did not see any simulation results"
Exemplo n.º 11
0
 def test_query_evaluators(self):
     register_env("test", lambda _: gym.make("CartPole-v0"))
     for fw in framework_iterator(frameworks=("torch", "tf")):
         pg = PGTrainer(env="test",
                        config={
                            "num_workers": 2,
                            "rollout_fragment_length": 5,
                            "num_envs_per_worker": 2,
                            "framework": fw,
                        })
         results = pg.workers.foreach_worker(
             lambda ev: ev.rollout_fragment_length)
         results2 = pg.workers.foreach_worker_with_index(
             lambda ev, i: (i, ev.rollout_fragment_length))
         results3 = pg.workers.foreach_worker(
             lambda ev: ev.foreach_env(lambda env: 1))
         self.assertEqual(results, [10, 10, 10])
         self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
         self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
         pg.stop()