def __init__(self, path: str, device: str): path = Path(path).absolute() with open(path.parent / "hyperparams.json", "r") as f: hyperparams = json.load(f) checkpoint = torch.load(path, "cpu") self.ns = hyperparams["diayn"]["num_skills"] env = make_env("Ant-v4", device) diayn = DiscreteDiayn( env.observation_space, hyperparams["diayn"]["model"]["hidden_dim"], self.ns, hyperparams["diayn"]["reward_weight"], _truncate=hyperparams["diayn"]["truncate"], ) env = DiaynWrapper(env, diayn) self.forward_policy = TanhGaussianActor( obs_spec=env.observation_space, act_spec=env.action_space, hidden_dim=hyperparams["sac"]["actor"]["policy"]["hidden_dim"], ) self.forward_policy.load_state_dict(checkpoint["sac"]["actor"]["state_dict"]) self.forward_policy.to(device) self.device = device
def test_video_wrapper(): env = VideoWrapper(make_env("HalfCheetah-v2", "cpu")) env.reset() for _ in range(49): action = torch.tensor(env.action_space.sample()).unsqueeze(0) env.step(action) assert env.get_video_and_clear().dim() == 4
def test_critic(): for device in "cpu", "cuda": env = make_env("CartPole-v1", device) critic = DqnCritic(env.observation_space, env.action_space, [256, 256], device) for _ in range(300): critic( torchify(env.observation_space.sample(), device), torchify(env.action_space.sample(), device), )
def diayn_test(diayn): for dev in "cpu", "cuda": env = make_env("InvertedPendulum-v2", dev) dd = diayn(env.observation_space, [256, 256], 10, 3, 128, _device=dev) obs = env.reset() obs["diayn"] = dd.sample(1) print(obs) dd.calc_rewards(obs) dd.train(obs) assert isinstance(dd, Loggable) print(dd.log_hyperparams()) print(dd.log_epoch())
def test_actor(): for device in "cpu", "cuda": env = make_env("CartPole-v1", device) critic = DqnCritic(env.observation_space, env.action_space, [256, 256], device) actor = DqnActor(critic, env.observation_space, env.action_space, device) for _ in range(300): obs = collate([ torchify(env.observation_space.sample(), device) for _ in range(20) ]) act = actor(obs) rand_act = torch.cat([ torchify(env.action_space.sample(), device) for _ in range(20) ]) assert torch.all(critic(obs, act) >= critic(obs, rand_act))
def abstract_test_diayn(diayn_class, device): env = make_env("InvertedPendulum-v2", device) diayn = diayn_class(env.observation_space, [256, 256], 1000, 2.0, _device=device) env = DiaynWrapper(env, diayn) # test device obs = env.reset() assert obs["diayn"].device == obs["observations"].device # test reset obs = env.reset() obs2 = env.reset() assert not torch.all(obs["diayn"] == obs2["diayn"]) # test same rollout obs = env.reset() for _ in range(1000): action = torch.tensor(env.action_space.sample()).unsqueeze(0) obs2, _, _, _ = env.step(action) assert torch.all(obs["diayn"] == obs2["diayn"])