Exemplo n.º 1
0
    def test_dqn_apex_cpu_spawn_full_train(self, tmpdir):
        # by default, pytorch lightning will use ddp-spawn mode to replace ddp
        # if there are only cpus
        os.environ["WORLD_SIZE"] = "3"
        config = generate_env_config("CartPole-v0", {})
        config = generate_training_config(root_dir=tmpdir.make_numbered_dir(),
                                          config=config)
        config = generate_algorithm_config("DQNApex", config)
        # use ddp_cpu
        config["gpus"] = None
        config["num_processes"] = 3
        # this testing process corresponds to this node
        config["num_nodes"] = 1
        config["early_stopping_patience"] = 100
        # Use class instead of string name since algorithms is distributed.
        config["frame_config"]["models"] = [QNet, QNet]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 4,
                "action_num": 2
            },
            {
                "state_dim": 4,
                "action_num": 2
            },
        ]

        # for spawn we use a special callback, because the we cannot access
        # max_total_reward from sub-processes
        queue = SimpleQueue(ctx=mp.get_context("spawn"))
        # cb = [SpawnInspectCallback(queue), LoggerDebugCallback()]
        cb = [SpawnInspectCallback(queue)]
        t = Thread(target=launch, args=(config, ), kwargs={"pl_callbacks": cb})
        t.start()

        default_logger.info("Start tracking")
        subproc_max_total_reward = [0, 0, 0]
        while True:
            try:
                result = queue.quick_get(timeout=60)
                default_logger.info(
                    f"Result from process [{result[0]}]: {result[1]}")
                subproc_max_total_reward[result[0]] = result[1]
            except TimeoutError:
                # no more results
                default_logger.info("No more results.")
                break
        t.join()
        assert (
            sum(subproc_max_total_reward) / 3 >= 150
        ), f"Max total reward {sum(subproc_max_total_reward) / 3} below threshold 150."
Exemplo n.º 2
0
    def test_DDPG(self):
        config = generate_algorithm_config("DDPG", {})
        config["frame_config"]["models"] = [
            "DDPGActorDisc",
            "DDPGActorDisc",
            "DDPGCritic",
            "DDPGCritic",
        ]
        config["frame_config"]["model_kwargs"] = [{
            "state_dim": 4,
            "action_dim": 2
        }] * 4
        ddpg = init_algorithm_from_config(config)

        env = gym.make("CartPole-v0")
        dataset = RLGymDiscActDataset(ddpg, env, render_every_episode=1)
        self.assert_valid_disc_output(next(dataset))
Exemplo n.º 3
0
    def test_A2C(self):
        config = generate_algorithm_config("A2C", {})
        config["frame_config"]["models"] = ["A2CActorDisc", "A2CCritic"]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 4,
                "action_num": 2
            },
            {
                "state_dim": 4
            },
        ]
        a2c = init_algorithm_from_config(config)

        env = gym.make("CartPole-v0")
        dataset = RLGymDiscActDataset(a2c, env, render_every_episode=1)
        self.assert_valid_disc_output(next(dataset))
Exemplo n.º 4
0
    def test_A2C(self):
        config = generate_algorithm_config("A2C", {})
        config["frame_config"]["models"] = ["A2CActorCont", "A2CCritic"]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 3,
                "action_dim": 1,
                "action_range": 2
            },
            {
                "state_dim": 3
            },
        ]
        a2c = init_algorithm_from_config(config)

        env = gym.make("Pendulum-v0")
        dataset = RLGymContActDataset(a2c, env, render_every_episode=1)
        self.assert_valid_cont_output(next(dataset))
Exemplo n.º 5
0
    def test_DQN(self):
        config = generate_algorithm_config("DQN", {})
        config["frame_config"]["models"] = ["QNet", "QNet"]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": 4,
                "action_num": 2
            },
            {
                "state_dim": 4,
                "action_num": 2
            },
        ]
        dqn = init_algorithm_from_config(config)

        env = gym.make("CartPole-v0")
        dataset = RLGymDiscActDataset(dqn, env, render_every_episode=1)
        self.assert_valid_disc_output(next(dataset))
Exemplo n.º 6
0
def test_gym_env_dataset_creator():
    # Discrete action environment
    config = generate_gym_env_config("CartPole-v0", {})
    config = generate_algorithm_config("DDPG", config)
    config["frame_config"]["models"] = [
        "DDPGActorCont",
        "DDPGActorCont",
        "DDPGCritic",
        "DDPGCritic",
    ]
    config["frame_config"]["model_kwargs"] = [{
        "state_dim": 3,
        "action_dim": 1,
        "action_range": 2
    }] * 2 + [{
        "state_dim": 3,
        "action_dim": 1
    }] * 2
    ddpg = init_algorithm_from_config(config)

    assert isinstance(
        gym_env_dataset_creator(ddpg, config["train_env_config"]),
        RLGymDiscActDataset)
    assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]),
                      RLGymDiscActDataset)

    # Continuous action environment
    config = generate_gym_env_config("Pendulum-v0", {})
    assert isinstance(
        gym_env_dataset_creator(ddpg, config["train_env_config"]),
        RLGymContActDataset)
    assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]),
                      RLGymContActDataset)

    # Unsupported environment,
    # like algorithmic, which uses a tuple action space
    # or robotics, which uses the goal action space
    config = generate_gym_env_config("Copy-v0", {})
    with pytest.raises(ValueError, match="not supported"):
        gym_env_dataset_creator(ddpg, config["train_env_config"])

    with pytest.raises(ValueError, match="not supported"):
        gym_env_dataset_creator(ddpg, config["test_env_config"])
Exemplo n.º 7
0
 def test_dqn_apex(_, tmpdir):
     config = generate_gym_env_config("CartPole-v0", {})
     config = generate_training_config(trials_dir=str(
         tmpdir.make_numbered_dir()),
                                       config=config)
     config = generate_algorithm_config("DQNApex", config)
     config["early_stopping_patience"] = 10
     config["frame_config"]["models"] = ["QNet", "QNet"]
     config["frame_config"]["model_kwargs"] = [
         {
             "state_dim": 4,
             "action_num": 2
         },
         {
             "state_dim": 4,
             "action_num": 2
         },
     ]
     cb = InspectCallback()
     launch_gym(config, pl_callbacks=[cb])
     assert cb.max_total_reward >= 150
Exemplo n.º 8
0
    def test_DDPG(self):
        config = generate_algorithm_config("DDPG", {})
        config["frame_config"]["models"] = [
            "DDPGActorCont",
            "DDPGActorCont",
            "DDPGCritic",
            "DDPGCritic",
        ]
        config["frame_config"]["model_kwargs"] = [{
            "state_dim": 3,
            "action_dim": 1,
            "action_range": 2
        }] * 2 + [{
            "state_dim": 3,
            "action_dim": 1
        }] * 2
        ddpg = init_algorithm_from_config(config)

        env = gym.make("Pendulum-v0")
        dataset = RLGymContActDataset(ddpg, env, render_every_episode=1)
        self.assert_valid_cont_output(next(dataset))
Exemplo n.º 9
0
 def test_dqn_full_train(self, tmpdir):
     config = generate_env_config("CartPole-v0", {})
     config = generate_training_config(root_dir=str(
         tmpdir.make_numbered_dir()),
                                       config=config)
     config = generate_algorithm_config("DQN", config)
     config["early_stopping_patience"] = 100
     config["frame_config"]["models"] = ["QNet", "QNet"]
     config["frame_config"]["model_kwargs"] = [
         {
             "state_dim": 4,
             "action_num": 2
         },
         {
             "state_dim": 4,
             "action_num": 2
         },
     ]
     cb = InspectCallback()
     launch(config, pl_callbacks=[cb])
     assert (
         cb.max_total_reward >= 150
     ), f"Max total reward {cb.max_total_reward} below threshold 150."
Exemplo n.º 10
0
Arquivo: main.py Projeto: iffiX/machin
import torch.nn as nn


class SomeQNet(nn.Module):
    def __init__(self, state_dim, action_num):
        super().__init__()

        self.fc1 = nn.Linear(state_dim, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, action_num)

    def forward(self, state):
        a = t.relu(self.fc1(state))
        a = t.relu(self.fc2(a))
        return self.fc3(a)


if __name__ == "__main__":
    config = generate_algorithm_config("DQN")
    config = generate_env_config("openai_gym", config)
    config = generate_training_config(root_dir="trial",
                                      episode_per_epoch=10,
                                      max_episodes=10000,
                                      config=config)
    config["frame_config"]["models"] = ["SomeQNet", "SomeQNet"]
    config["frame_config"]["model_kwargs"] = [{
        "state_dim": 4,
        "action_num": 2
    }] * 2
    launch(config)
        should_stop = t_plugin.reduce(should_stop, reduce_op=ReduceOp.SUM)
        should_stop = bool(should_stop == trainer.world_size)
        return should_stop

    def reduce_max_total_reward(self, trainer, t_plugin):
        avg = t.tensor(self.max_total_reward, device=trainer.lightning_module.device)
        avg = t_plugin.reduce(avg, reduce_op=ReduceOp.SUM)
        return float(avg)


if __name__ == "__main__":
    os.environ["WORLD_SIZE"] = "3"
    print(os.environ["TEST_SAVE_PATH"])
    config = generate_env_config("CartPole-v0", {})
    config = generate_training_config(root_dir=os.environ["ROOT_DIR"], config=config)
    config = generate_algorithm_config("DQNApex", config)

    # use ddp gpu
    config["gpus"] = [0, 0, 0]
    config["num_processes"] = 3
    # this testing process corresponds to this node
    config["num_nodes"] = 1
    config["early_stopping_patience"] = 100
    # Use class instead of string name since algorithms is distributed.
    config["frame_config"]["models"] = [QNet, QNet]
    config["frame_config"]["model_kwargs"] = [
        {"state_dim": 4, "action_num": 2},
        {"state_dim": 4, "action_num": 2},
    ]

    # cb = [DDPInspectCallback(), LoggerDebugCallback()]
Exemplo n.º 12
0
    elif args.command == "generate":
        if args.algo not in get_available_algorithms():
            print(
                f"{args.algo} is not a valid algorithm, use list "
                "--algo to get a list of available algorithms."
            )
            exit(0)
        if args.env not in get_available_environments():
            print(
                f"{args.env} is not a valid environment, use list "
                "--env to get a list of available environments."
            )
            exit(0)
        config = {}
        config = generate_env_config(args.env, config=config)
        config = generate_algorithm_config(args.algo, config=config)
        config = generate_training_config(config=config)

        if args.print:
            pprint(config)

        with open(args.output, "w") as f:
            json.dump(config, f, indent=4, sort_keys=True)
        print(f"Config saved to {args.output}")

    elif args.command == "launch":
        with open(args.config, "r") as f:
            conf = json.load(f)
        launch(conf)