def on_episode_begin(self, episode, qfunction): mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) qlearning.env = env egreedy.action_space = env.actions qlearning.policy.provider = env.actions if episode % 50 == 0: print('Episode {}'.format(episode))
from capstone.game.games import TicTacToe from capstone.game.players import KerasPlayer, RandPlayer from capstone.game.utils import play_series players = [KerasPlayer('models/qltic.h5'), RandPlayer()] game = TicTacToe() play_series(game, players, n_matches=1000)
from capstone.datasets.ucic4 import get_random_game, get_random_loss_game from capstone.game.games import Connect4 as C4 from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning as ApproxQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork import numpy as np import random seed = 383 random.seed(seed) np.random.seed(seed) mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) c4dn = Connect4DeepNetwork() egreedy = EGreedy(action_space=env.actions, qfunction=c4dn, epsilon=1.0, selfplay=False, random_state=seed) qlearning = ApproxQLearning(env=env, qfunction=c4dn, policy=egreedy, discount_factor=0.99, selfplay=False, experience_replay=True, replay_memory_size=20000, batch_size=32)
mdp = GameMDP(game) env = Environment(mdp) qlearning = QLearning(env=env, qfunction=TabularVF(random_state=seed), policy=RandomPolicy(action_space=env.action_space, random_state=seed), learning_rate=0.1, discount_factor=1.0, selfplay=True) class Monitor(Callback): def on_episode_begin(self, episode, qfunction): if episode % 100 == 0: print('Episode {}'.format(episode)) qlearning.train( n_episodes=70000, callbacks=[ Monitor(), EpisodicWLDPlotter( game=game, opp_player=RandPlayer(random_state=seed), n_matches=1000, period=1000, filepath= '../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf' ) ])
from capstone.game.games import TicTacToe from capstone.game.players import RandPlayer from capstone.game.utils import play_series game = TicTacToe() players = [RandPlayer(), RandPlayer()] play_series(game, players)
selfplay=True, experience_replay=True, replay_memory_size=10000, batch_size=32) class Monitor(Callback): def on_episode_begin(self, episode, qfunction): if episode % 50 == 0: print('Episode {}'.format(episode)) qlearning.train( n_episodes=1750, callbacks=[ EpisodicWLDPlotter(game=game, opp_player=RandPlayer(), n_matches=1000, period=250, filepath='figures/c4_dqn_simple.pdf'), # LinearAnnealing(egreedy, 'epsilon', init=1.0, final=0.1, n_episodes=1000), Monitor() ]) from capstone.game.players import GreedyQ g = GreedyQ(qnetwork) print 'Move:', g.choose_move(game) # IMPORTANT: dont forget to filter the best value, ignore the ilegal moves
from capstone.game.games import Connect4 from capstone.game.players import RandPlayer from capstone.game.utils import play_series class MyPlayer(object): def choose_move(self, game): return game.legal_moves()[0] my = MyPlayer() game = Connect4() players = [my, RandPlayer()] play_series(game, players, n_matches=1000)
from capstone.game.games import TicTacToe from capstone.game.players import MonteCarlo, RandPlayer from capstone.game.utils import play_series game = TicTacToe() players = [MonteCarlo(), RandPlayer()] n_matches = 10 play_series(game, players, n_matches) print('') players.reverse() play_series(game, players, n_matches)
from capstone.game.games import TicTacToe from capstone.game.players import RandPlayer from capstone.rl import GameMDP, FixedGameMDP, Environment from capstone.rl.learners import ApproximateQLearning from capstone.rl.policies import RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, QValuesPlotter from capstone.rl.value_functions import MLP seed = 23 game = TicTacToe() env = Environment(FixedGameMDP(game, RandPlayer(), 1)) mlp = MLP() qlearning = ApproximateQLearning( env=env, policy=RandomPolicy(env.actions, random_state=seed), qfunction=mlp, discount_factor=1.0, n_episodes=50000 ) qlearning.train( callbacks=[ EpisodicWLDPlotter( game=game, opp_player=RandPlayer(random_state=seed), n_matches=1000, period=5000, # filepath='../mlnd-capstone-report/figures/tic_ql_tab_full_selfplay_wld_plot.pdf' filepath='figures/test88.pdf' ) ] )
from capstone.game.games import TicTacToe from capstone.game.players import AlphaBeta, RandPlayer from capstone.game.utils import play_series game = TicTacToe() players = [AlphaBeta(), RandPlayer()] print('Players: {}\n'.format(players)) n_matches = 10 play_series(game, players, n_matches) players.reverse() print('\nPlayers: {}\n'.format(players)) play_series(game, players, n_matches)
# result = model.predict(np.array([xx]), batch_size=1) # print('result', result) # import pdb; pdb.set_trace() # history = model.fit(X_train, Y_train, batch_size=batchSize, nb_epoch=epochs, verbose=1, validation_data=(X_test, Y_test), callbacks=[]) # # Report results # score = model.evaluate(X_test, Y_test, verbose=0) # print 'Test score:', score[0] # print 'Test accuracy:', score[1] from capstone.rl.value_functions import MLP from capstone.game.players import GreedyQ, RandPlayer from capstone.game.utils import play_series from capstone.game.games import Connect4 mlp = MLP() mlp.model = model n_matches = 1000 results = play_series( # game=get_random_game, game=C4(), players=[GreedyQ(mlp), RandPlayer()], # players=[RandPlayer(), RandPlayer()], n_matches=n_matches, verbose=True) print('Win:', results['W'] / float(n_matches)) print('Draw:', results['D'] / float(n_matches)) print('Loss:', results['L'] / float(n_matches))
''' The Q-learning algorithm is used to learn a function approximator for the state-action values of Connect-4 positions. ''' from capstone.game.games import Connect4, TicTacToe from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions import MLP, QNetwork # game = Connect4() game = TicTacToe() # mdp = GameMDP(game) mdp = FixedGameMDP(game, RandPlayer(), 1) env = Environment(mdp) # qnetwork = QNetwork(n_input_units=42, n_output_units=7) qnetwork = QNetwork(n_input_units=9, n_hidden_layers=3, n_output_units=9, n_hidden_units=100) # qnetwork = QNetwork(n_input_units=42, n_hidden_layers=3, n_output_units=7, n_hidden_units=100) egreedy = EGreedy(env.actions, qnetwork, 1.0) qlearning = ApproximateQLearning( env=env, qfunction=qnetwork, policy=EGreedy(env.actions, qnetwork, 0.3), discount_factor=0.99, # change this to 1, and say because is deterministic n_episodes=100000, experience_replay=False)
from keras.models import load_model from capstone.game.games import Connect4 from capstone.game.players import AlphaBeta, GreedyQ, KerasPlayer, RandPlayer from capstone.game.players.kerasplayer import KerasStatePlayer from capstone.game.utils import play_match, play_series from capstone.rl.value_functions import QNetwork results = play_series( game=Connect4(), # players=[KerasStatePlayer('models/episode-14500-winpct-0.942'), RandPlayer()], players=[ RandPlayer(), KerasStatePlayer('models/episode-14500-winpct-0.942') ], # players=[RandPlayer(), RandPlayer()], n_matches=100, verbose=True)
import numpy as np from capstone.datasets.ucic4 import get_random_win_game, get_random_loss_game from capstone.game.players.kerasplayer import KerasStatePlayer from capstone.game.players import RandPlayer from capstone.game.utils import play_match, play_series from capstone.utils import print_aec, str_aec keras = KerasStatePlayer('models/episode-14500-winpct-0.942') rnd = RandPlayer() N_EVALUATIONS = 100 N_MATCHES_PER_EVALUATION = 100 def run_evaluation(generator, players, expected): ''' Returns the accuracy of the predition. ''' print 'Running experiment for %s' % expected outcomes = [] for i in range(N_EVALUATIONS): print 'Episode %d' % i results = play_series( game=generator(), players=players, n_matches=N_MATCHES_PER_EVALUATION, verbose=False ) outcomes.append(results[expected] / float(N_MATCHES_PER_EVALUATION)) return np.mean(outcomes)