Пример #1
0
    def test_config_init(self, train_config):
        c = train_config
        config = HDDPG.generate_config({})
        config["frame_config"]["models"] = [
            "Actor", "Actor", "Critic", "Critic"
        ]
        config["frame_config"][
            "model_kwargs"] = [{
                "state_dim": c.observe_dim,
                "action_dim": c.action_dim,
                "action_range": c.action_range,
            }] * 2 + [{
                "state_dim": c.observe_dim,
                "action_dim": c.action_dim
            }] * 2
        hddpg = HDDPG.init_from_config(config)

        old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32)
        action = t.zeros([1, c.action_dim], dtype=t.float32)
        hddpg.store_episode([{
            "state": {
                "state": old_state
            },
            "action": {
                "action": action
            },
            "next_state": {
                "state": state
            },
            "reward": 0,
            "terminal": False,
        } for _ in range(3)])
        hddpg.update()
Пример #2
0
 def hddpg(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     hddpg = HDDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return hddpg
Пример #3
0
 def hddpg_vis(self, train_config, device, dtype, tmpdir):
     # not used for training, only used for testing apis
     c = train_config
     tmp_dir = tmpdir.make_numbered_dir()
     actor = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     hddpg = HDDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
         visualize=True,
         visualize_dir=str(tmp_dir),
     )
     return hddpg
Пример #4
0
 def hddpg_train(self, train_config, gpu):
     c = train_config
     # cpu is faster for testing full training.
     actor = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu",
                 "cpu")
     actor_t = smw(Actor(c.observe_dim, c.action_dim, c.action_range),
                   "cpu", "cpu")
     critic = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     critic_t = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     hddpg = HDDPG(actor,
                   actor_t,
                   critic,
                   critic_t,
                   t.optim.Adam,
                   nn.MSELoss(reduction='sum'),
                   replay_device="cpu",
                   replay_size=c.replay_size)
     return hddpg
Пример #5
0
 def hddpg(self, train_config):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     critic = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     hddpg = HDDPG(actor,
                   actor_t,
                   critic,
                   critic_t,
                   t.optim.Adam,
                   nn.MSELoss(reduction='sum'),
                   replay_device=c.device,
                   replay_size=c.replay_size)
     return hddpg