def on_episode_begin(self, episode, qfunction): mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) qlearning.env = env egreedy.action_space = env.actions qlearning.policy.provider = env.actions if episode % 50 == 0: print('Episode {}'.format(episode))
from capstone.datasets.ucic4 import get_random_game, get_random_loss_game from capstone.game.games import Connect4 as C4 from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning as ApproxQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork import numpy as np import random seed = 383 random.seed(seed) np.random.seed(seed) mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) c4dn = Connect4DeepNetwork() egreedy = EGreedy(action_space=env.actions, qfunction=c4dn, epsilon=1.0, selfplay=False, random_state=seed) qlearning = ApproxQLearning(env=env, qfunction=c4dn, policy=egreedy, discount_factor=0.99, selfplay=False, experience_replay=True, replay_memory_size=20000, batch_size=32)
from capstone.game.utils import c42pdf from capstone.rl import FixedGameMDP, Environment from capstone.rl.learners import QLearning from capstone.rl.policies import RandomPolicy from capstone.rl.utils import QValuesPlotter from capstone.rl.value_functions import TabularQ seed = 23 board = [['X', 'O', 'O', ' ', 'O', ' ', ' '], ['X', 'O', 'X', ' ', 'X', ' ', ' '], ['O', 'X', 'O', 'X', 'O', 'X', 'O'], ['O', 'X', 'O', 'X', 'O', 'X', 'O'], ['X', 'O', 'X', 'O', 'X', 'O', 'X'], ['X', 'O', 'X', 'O', 'X', 'O', 'X']] game = Connect4(board) mdp = FixedGameMDP(game, AlphaBeta(), 1) env = Environment(mdp) qlearning = QLearning(env=env, qfunction=TabularQ(random_state=seed), policy=RandomPolicy(env.actions, random_state=seed), learning_rate=0.1, discount_factor=1.0, n_episodes=1000) qlearning.train(callbacks=[ QValuesPlotter(state=game, actions=game.legal_moves(), filepath='figures/c4_ql_tab_qvalues.pdf') ]) #################### # Generate figures #
from capstone.game.games import TicTacToe from capstone.game.players import RandPlayer from capstone.rl import GameMDP, FixedGameMDP, Environment from capstone.rl.learners import ApproximateQLearning from capstone.rl.policies import RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter from capstone.rl.value_functions import MLP seed = 23 game = TicTacToe() env = Environment(FixedGameMDP(game, RandPlayer(), 1)) mlp = MLP() qlearning = ApproximateQLearning( env=env, policy=RandomPolicy(env.actions, random_state=seed), qfunction=mlp, discount_factor=1.0, n_episodes=50000 ) qlearning.train( callbacks=[ EpisodicWLDPlotter( game=game, opp_player=RandPlayer(random_state=seed), n_matches=1000, period=5000, # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf' filepath='figures/test88.pdf' ) ] )
''' Q-Learning is used to estimate the state-action values for all Tic-Tac-Toe positions against a Random opponent. ''' from capstone.game.games import TicTacToe from capstone.game.players import AlphaBeta, RandPlayer from capstone.game.utils import tic2pdf from capstone.rl import FixedGameMDP, Environment from capstone.rl.learners import QLearning from capstone.rl.policies import RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter from capstone.rl.value_functions import TabularQ seed = 23 game = TicTacToe() mdp = FixedGameMDP(game, RandPlayer(random_state=seed), 1) env = Environment(mdp) qlearning = QLearning( env=env, qfunction=TabularQ(random_state=seed), policy=RandomPolicy(env.actions, random_state=seed), learning_rate=0.1, discount_factor=1.0, n_episodes=65000 ) qlearning.train( callbacks=[ EpisodicWLDPlotter( game=game, opp_player=RandPlayer(random_state=seed), n_matches=1000,
''' The Q-learning algorithm is used to learn a function approximator for the state-action values of Connect-4 positions. ''' from capstone.game.games import Connect4, TicTacToe from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions import MLP, QNetwork # game = Connect4() game = TicTacToe() # mdp = GameMDP(game) mdp = FixedGameMDP(game, RandPlayer(), 1) env = Environment(mdp) # qnetwork = QNetwork(n_input_units=42, n_output_units=7) qnetwork = QNetwork(n_input_units=9, n_hidden_layers=3, n_output_units=9, n_hidden_units=100) # qnetwork = QNetwork(n_input_units=42, n_hidden_layers=3, n_output_units=7, n_hidden_units=100) egreedy = EGreedy(env.actions, qnetwork, 1.0) qlearning = ApproximateQLearning( env=env, qfunction=qnetwork, policy=EGreedy(env.actions, qnetwork, 0.3), discount_factor=0.99, # change this to 1, and say because is deterministic n_episodes=100000, experience_replay=False)