game = Connect4(board) mdp = FixedGameMDP(game, AlphaBeta(), 1) env = Environment(mdp) qlearning = QLearning(env=env, qfunction=TabularQ(random_state=seed), policy=RandomPolicy(env.actions, random_state=seed), learning_rate=0.1, discount_factor=1.0, n_episodes=1000) qlearning.train(callbacks=[ QValuesPlotter(state=game, actions=game.legal_moves(), filepath='figures/c4_ql_tab_qvalues.pdf') ]) #################### # Generate figures # #################### c42pdf('figures/c4_ql_tab_current.pdf', game.board) for move in game.legal_moves(): print('*' * 80) value = qlearning.qfunction[(game, move)] print('Move: %s' % move) print('Value: %f' % value) new_game = game.copy().make_move(move) print(new_game) filename = 'figures/c4_ql_tab_move_{}.pdf'.format(move) c42pdf(filename, new_game.board)
from capstone.game.games import Connect4 as C4 from capstone.game.utils import c42pdf def series_to_game(series): '''Converts a Pandas Series to a Connect 4 game''' cell_map = {'x': 'X', 'o': 'O', 'b': '-'} board = [[' '] * C4.COLS for row in range(C4.ROWS)] cells = series.iloc[:-1] outcome = series.iloc[-1] for ix, cell in enumerate(cells): row = C4.ROWS - (ix % C4.ROWS) - 1 col = ix // C4.ROWS board[row][col] = cell_map[cell] return C4(board), outcome # Load UCI Connect 4 dataset df = load_uci_c4() # Select the instances of the dataset ixs = [1, 5, 10, 20, 567] # Generate pdfs of the boards for i in ixs: row = df.iloc[i] c4, outcome = series_to_game(row) filename = 'figures/c4_exploration_{i}_{outcome}.pdf'.format( i=i, outcome=outcome) c42pdf(filename, c4.board)
policy=RandomPolicy(env.actions, random_state=seed), learning_rate=0.1, discount_factor=1.0, n_episodes=4000, ) qlearning.train( callbacks=[ QValuesPlotter( state=game, actions=game.legal_moves(), filepath='figures/c4_ql_tab_simple_selfplay_progress.pdf' ) ] ) #################### # Generate figures # #################### c42pdf('figures/c4_ql_tab_simple_selfplay_cur.pdf', game.board) for move in game.legal_moves(): print('*' * 80) value = qlearning.qfunction[(game, move)] print('Move: {}'.format(move)) print('Value: %f' % value) new_game = game.copy().make_move(move) print(new_game) filename = 'figures/c4_ql_tab_simple_selfplay_move_{}.pdf'.format(move) c42pdf(filename, new_game.board)
''' from capstone.game.games import Connect4 from capstone.game.utils import c42pdf from capstone.rl import GameMDP, Environment from capstone.rl.learners import QLearningSelfPlay board = [['X', 'O', 'O', ' ', 'O', ' ', ' '], ['X', 'O', 'X', ' ', 'X', ' ', ' '], ['O', 'X', 'O', 'X', 'O', 'X', 'O'], ['O', 'X', 'O', 'X', 'O', 'X', 'O'], ['X', 'O', 'X', 'O', 'X', 'O', 'X'], ['X', 'O', 'X', 'O', 'X', 'O', 'X']] game = Connect4(board) mdp = GameMDP(game) env = Environment(mdp) qlearning = QLearningSelfPlay(env, n_episodes=1000, random_state=0) qlearning.train() c42pdf('figures/c4_ql_tabular_selfplay_current.pdf', game.board) print(game) for move in game.legal_moves(): print('*' * 80) value = qlearning.qf[(game, move)] print('Move: %s' % move) print('Value: %f' % value) new_game = game.copy().make_move(move) print(new_game) filename = 'figures/c4_ql_tabular_selfplay_move_%s_value_%.4f.pdf' % ( move, value) c42pdf(filename, new_game.board)
# The Complete Book of Connect 4 # Problem Set A (Easy) # Problem 1, Page 16 # Red wins with C4 # A B C D E F G board = [ [' ', ' ', ' ', ' ', ' ', ' ', ' '], # 6 [' ', ' ', ' ', ' ', ' ', ' ', ' '], # 5 [' ', ' ', ' ', 'O', ' ', 'O', ' '], # 4 [' ', ' ', 'O', 'X', ' ', 'X', ' '], # 3 [' ', ' ', 'X', 'O', ' ', 'X', ' '], # 2 [' ', 'O', 'O', 'X', 'X', 'X', 'O'] ] # 1 c42pdf('figures/c4_dqn_easy_board.pdf', board) game = Connect4(board) mdp = GameMDP(game) env = Environment(mdp) qnetwork = QNetwork(move_mapper, n_input_units=42, n_hidden_layers=2, n_output_units=7, n_hidden_units=100, learning_rate=0.01) egreedy = EGreedy(provider=env.actions, qfunction=qnetwork, epsilon=1.0, selfplay=True, random_state=seed) qlearning = ApproximateQLearning(env=env,