def test_one_step(self): exp_source = experience.ExperienceSource(self.envs, DummyAgent(), steps_count=1) for exp in exp_source: self.assertEqual(1, len(exp)) break
def test_two_steps(self): exp_source = experience.ExperienceSource(self.env, DummyAgent(), steps_count=2) for exp in exp_source: self.assertEqual(2, len(exp)) break
def test_short_game(self): env = gym.make('CartPole-v0') exp_source = experience.ExperienceSource(env, DummyAgent(), steps_count=1) for step, exp in enumerate(exp_source): self.assertIsInstance(exp, tuple) self.assertIsInstance(exp[0], experience.Experience) if exp[0].done: break
def test_one_step(self): exp_source = experience.ExperienceSource(self.env, DummyAgent(), steps_count=1) for exp in exp_source: self.assertEqual(1, len(exp)) self.assertIsInstance(exp, tuple) self.assertIsInstance(exp[0], experience.Experience) self.assertAlmostEqual(exp[0].reward, -1.0) self.assertFalse(exp[0].done) break
def test_short_game(self): env = gym.make('CartPole-v0') exp_source = experience.ExperienceSource(env, dummy_agent, steps_count=1) for step, exp in enumerate(exp_source): self.assertIsInstance(exp, tuple) self.assertIsInstance(exp[0], experience.Experience) if len(exp) == 1: self.assertTrue(exp[0].done) break
def test_state(self): actions_count = self.envs[0].action_space.n my_agent = StatefulAgent(self.envs[0].action_space) steps = 3 exp_source = experience.ExperienceSource(self.envs, my_agent, steps_count=steps) for _, exp in zip(range(100), exp_source): prev_act = None for e in exp: if prev_act is not None: self.assertEqual(e.action, (prev_act+1) % actions_count) prev_act = e.action if len(exp) != steps: self.assertTrue(exp[-1].done)
def setUpClass(cls): env = gym.make("MountainCar-v0") cls.source = experience.ExperienceSource(env, agent=DummyAgent())
input_shape=(1 if grayscale else 3, im_height, im_width)) if params.cuda_enabled: model.cuda() loss_fn = nn.MSELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=run.getfloat("learning", "lr")) action_selector = ActionSelectorEpsilonGreedy(epsilon=run.getfloat( "defaults", "epsilon"), params=params) target_net = agent.TargetNet(model) dqn_agent = agent.DQNAgent(dqn_model=model, action_selector=action_selector) exp_source = experience.ExperienceSource(env=env_pool, agent=dqn_agent, steps_count=run.getint( "defaults", "n_steps")) exp_replay = experience.ExperienceReplayBuffer(exp_source, buffer_size=run.getint( "exp_buffer", "size")) use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False) use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False) if use_target_dqn: target_model = target_net.target_model else: target_model = model def batch_to_train(batch): """