Exemplo n.º 1
0
 def test_stick_20_policy_save(self):
     game = Easy21()
     policy = Stick20ActionPolicy(game.action_space)
     for time_steps in [1_000]:
         mc = MonteCarloPolicyEvaluation(env=game, policy=policy)
         mc.learn(total_timesteps=time_steps)
     mc.save("stick20_%s" % time_steps)
Exemplo n.º 2
0
 def test_game_reset(self):
     game = Easy21()
     self.assertEqual(game.score_dealer, 0)
     self.assertEqual(game.score_player, 0)
     game.reset()
     self.assertNotEqual(game.score_dealer, 0)
     self.assertNotEqual(game.score_player, 0)
Exemplo n.º 3
0
 def test_game_step_stick(self):
     game = Easy21()
     game.reset()
     (dealer, player), reward, done, _ = game.step(action=0)
     self.assertEqual(done, True, msg="Done should be 'True'")
     self.assertIn(reward, [-1, 0, 1],
                   msg="Rewards should be in [-1, 0, 1]")
     self.__assert_scores(dealer, player, reward)
Exemplo n.º 4
0
 def test_epsilon_greedy__save(self):
     for _lambda in [.0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1]:
         print("Training with lambda: %s" % _lambda)
         game = Easy21()
         policy = EpsilonGreedyActionPolicy(game.action_space)
         model = Sarsa(env=game, policy=policy, _lambda=_lambda)
         model.learn(total_timesteps=1000)
         model.save("greedy_1000_%s" % _lambda)
Exemplo n.º 5
0
 def test_random_policy(self):
     game = Easy21()
     policy = RandomActionPolicy(game.action_space)
     for time_steps in [1_000, 10_000, 100_000]:
         mc = MonteCarloPolicyEvaluation(env=game, policy=policy)
         mc.learn(total_timesteps=time_steps)
         game_plots.plot(lambda d, p: mc.q_value(observation=(d, p), action=ACTION_STICK))
         game_plots.plot(lambda d, p: mc.q_value(observation=(d, p), action=ACTION_HIT))
         game_plots.plot(lambda d, p: mc.q_value_max(observation=(d, p)))
         game_plots.plot_image(lambda d, p: mc.q_max(observation=(d, p)))
Exemplo n.º 6
0
 def test_game_step_hit(self):
     game = Easy21()
     game.reset()
     dealer_before = game.score_dealer
     player_before = game.score_player
     (dealer, player), reward, done, _ = game.step(action=1)
     self.assertEqual(done, False)
     self.assertEqual(reward, 0)
     self.assertEqual(dealer_before, dealer)
     self.assertNotEqual(player_before, player)
Exemplo n.º 7
0
 def test_game(self):
     env = Easy21()
     env.reset()
     while True:
         env.render()
         (dealer, player), reward, done, _ = env.step(
             action=env.action_space.sample())
         if done:
             break
     env.render()
     self.assertIn(reward, [-1, 0, 1])
     self.__assert_scores(dealer, player, reward)
Exemplo n.º 8
0
 def test_epsilon_greedy_policy_1M_save(self):
     game = Easy21()
     policy = EpsilonGreedyActionPolicy(game.action_space)
     mc = MonteCarloPolicyEvaluation(env=game, policy=policy)
     mc.learn(total_timesteps=1_000_000)
     mc.save("greedy1M")