def test_cur_state_when_opponent_should_move_to_start(self): game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']]) ab = AlphaBeta() mdp = FixedGameMDP(game, ab, 1) env = Environment(mdp) expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], ['O', 'X', 'X']]) self.assertEqual(env.cur_state(), expected)
def test_do_action(self): # X - O # - - X # - - O game = TicTacToe().make_moves([1, 3, 6, 9]) mdp = FixedGameMDP(game.copy(), AlphaBeta(), 1) env = Environment(mdp) env.do_action(7) expected = TicTacToe().make_moves([1, 3, 6, 9, 7, 4]) self.assertEqual(env.cur_state(), expected)
def on_episode_begin(self, episode, qfunction): mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) qlearning.env = env egreedy.action_space = env.actions qlearning.policy.provider = env.actions if episode % 50 == 0: print('Episode {}'.format(episode))
from capstone.game.games import Connect4 as C4 from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning as ApproxQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork import numpy as np import random seed = 383 random.seed(seed) np.random.seed(seed) mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) c4dn = Connect4DeepNetwork() egreedy = EGreedy(action_space=env.actions, qfunction=c4dn, epsilon=1.0, selfplay=False, random_state=seed) qlearning = ApproxQLearning(env=env, qfunction=c4dn, policy=egreedy, discount_factor=0.99, selfplay=False, experience_replay=True, replay_memory_size=20000, batch_size=32)
from capstone.game.games import TicTacToe from capstone.game.players import RandPlayer from capstone.rl import GameMDP, FixedGameMDP, Environment from capstone.rl.learners import ApproximateQLearning from capstone.rl.policies import RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter from capstone.rl.value_functions import MLP seed = 23 game = TicTacToe() env = Environment(FixedGameMDP(game, RandPlayer(), 1)) mlp = MLP() qlearning = ApproximateQLearning( env=env, policy=RandomPolicy(env.actions, random_state=seed), qfunction=mlp, discount_factor=1.0, n_episodes=50000 ) qlearning.train( callbacks=[ EpisodicWLDPlotter( game=game, opp_player=RandPlayer(random_state=seed), n_matches=1000, period=5000, # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf' filepath='figures/test88.pdf' ) ] )
def test_cur_state(self): game = TicTacToe() mdp = FixedGameMDP(game, AlphaBeta(), 1) env = Environment(mdp) self.assertEqual(env.cur_state(), mdp.start_state()) self.assertEqual(env.cur_state(), game)