def main(): """This function start and plays the game.""" print("Starting the game.") initial_board = Board() game = Game(initial_board) game.play()
def run(): new_game = Game() new_game.position.display_position("W") while True: if new_game.curr_player == "W": # get the move curr_col = input_number("Enter curr_col: ") if curr_col == 99: break curr_row = input_number("Enter curr_row: ") next_col = input_number("Enter next_col: ") next_row = input_number("Enter next_row: ") new_game.move(curr_col, curr_row, next_col, next_row) # new_game.move(4, 1, 4, 3) # us the MCTS for black's move else: move = MCTS.run(2, 30, new_game) new_game.move(move.from_x, move.from_y, move.to_x, move.to_y) new_game.position.display_position("W") if new_game.game_is_over(): break
def duplicate_game(self): temp_game = Game() # copy the position for x in [0, 1, 2, 3, 4, 5, 6, 7]: for y in [0, 1, 2, 3, 4, 5, 6, 7]: temp_game.position.locations[x][y] = self.state.position.locations[x][y] temp_game.position.W_King_moved = self.state.position.W_King_moved temp_game.position.W_0_Rook_moved = self.state.position.W_0_Rook_moved temp_game.position.W_7_Rook_moved = self.state.position.W_7_Rook_moved temp_game.position.B_King_moved = self.state.position.B_King_moved temp_game.position.B_0_Rook_moved = self.state.position.B_0_Rook_moved temp_game.position.B_7_Rook_moved = self.state.position.B_7_Rook_moved temp_game.position.W_passant = self.state.position.W_passant temp_game.position.B_passant = self.state.position.B_passant temp_game.position.W_King_loc = self.state.position.W_King_loc temp_game.position.B_King_loc = self.state.position.B_King_loc temp_game.position.W_King_check = self.state.position.W_King_check temp_game.position.B_King_check = self.state.position.B_King_check # copy game attributes temp_game.drawing_moves = self.state.drawing_moves temp_game.curr_player = self.state.curr_player temp_game.opp_player = self.state.opp_player # copy the state history temp_game.position_history = [] for history in self.state.position_history: temp_game.position_history.append(history) return temp_game
def run(): new_game = Game() new_game.position.display_position("W") while True: # get the move curr_col = input_number("Enter curr_col: ") if curr_col == 99: break curr_row = input_number("Enter curr_row: ") next_col = input_number("Enter next_col: ") next_row = input_number("Enter next_row: ") new_game.move(curr_col, curr_row, next_col, next_row) new_game.position.display_position("W") # test FEN print("FEN list: {}".format(new_game.position_history)) if new_game.game_is_over(): break
def __init__(self, board_size=9, n_playout=2000, init_model=None, use_cuda=False): self.board_size = board_size self.board = Board(self.board_size) self.learning_rate = 2e-3 self.learning_rate_multiplier = 1.0 self.n_playout = n_playout self.c_puct = 1.0 self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 self.check_freq = 10 self.game_batch_num = 1500 self.best_win_ratio = 0.0 self.game = Game(board_size) self.heat_start = 30 self.evaluation_time = 10 if init_model: self.p_v_net: Policy_Value_net = Policy_Value_net( self.board_size, init_model=init_model, use_cuda=use_cuda) else: self.p_v_net = Policy_Value_net(self.board_size, use_cuda=use_cuda) self.p_v_function = self.p_v_net.p_v_function self.mcts_player = MCTS_player(self.p_v_function, self.n_playout, self.c_puct, is_self_play=True) self.mcts_pure = Pure_MCTS_player() self.random_player = GoMoku_player()
# -*- coding: utf-8 -*- """ Created on Fri Oct 12 09:07:33 2018 @author: hztengkezhen """ from mcts import MCTS from mcts import MCTS_player from board import Board from board import Game from board import Random_player from board import Human_player g = Game(9) rp = Random_player() hp = Human_player() mp = MCTS_player() #num = 0 #for i in range(1): # if g.game_start(hp,mp)==2: # num+=1 #print num board = Board(9) board.move(4 * 9 + 4) board.move(0 * 9 + 0) board.move(4 * 9 + 5) board.move(8 * 9 + 8) board.move(4 * 9 + 6) board.move(0 * 9 + 8)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Thu Oct 11 23:14:41 2018 @author: ubuntu """ from board import Board from board import Game from board import Random_player g = Game(9) num = 0 for i in range(100): w = g.self_play(Random_player()) if w==1: num+=1 print num
from board import Game test = Game(10, 10, [(0, 0), (4, 4), (5, 6), (7, 3), (9, 9)], [(8, 8), (8, 9)], [[(2, 1), (2, 2)], [(3, 1), (3, 2)], [(4, 1), (4, 2), (4, 3)]]) print(test.boardState())
# if all placements have been tried, just keep going to next position/piece # if all pieces have been tried/placed: if len(named_pieces) == 0: # if they have all been placed print(state) # show end result with open('solutions.txt', 'a+') as f: f.write(str(state)) return else: # otherwise print(f'Unsuccessful attempt:') print(state) return # return to previous recursion state if __name__ == "__main__": named_pieces = list(zip([i for i in range(len(pieces))], pieces)) state = Game() positions = [(y, x) for y in range(state.board_size) for x in range(state.board_size)] # some pieces should not be rotated (p0, p5), # some can be rotated, but should not be mirrored (p4, p6) # finally, p9 should only be rotated and once. # all other pieces should be rotated (4 states), then mirrored (5th state) # and rotated three more times: 4+4=8 states specific_n_placements = [1, 8, 8, 8, 4, 1, 4, 8, 8, 2] # initialise start state: all pieces, empty board, all possible positions named_pieces = list(zip([i for i in range(len(pieces))], pieces)) state = Game() positions = [(y, x) for y in range(state.board_size) for x in range(state.board_size)]
def setUp(self): """Initial Set Up""" self.game = Game() self.round_list = [] self.winners_list = []
class BoardTest(unittest.TestCase): def setUp(self): """Initial Set Up""" self.game = Game() self.round_list = [] self.winners_list = [] def test_game_stats(self): """Returns the statistics: 1- matches by time out 2- average of turns 3- percentage of gain by type of player 4- which player wins the most""" #Simulation of 300 games for round_game in range(0,300): self.players = [ {'impulsive':'player 1', 'balance':300, 'position':0}, {'demanding':'player 2', 'balance':300, 'position':0}, {'cautious':'player 3', 'balance':300, 'position':0}, {'random':'player 4', 'balance':300, 'position':0}, ] self.props = [ {'name':'São Paulo','owner':False,'price':100,'rent_value':60}, {'name':'Osasco','owner':False,'price':95,'rent_value':59}, {'name':'Barueri','owner':False,'price':90,'rent_value':58}, {'name':'Carapicuiba','owner':False,'price':85,'rent_value':57}, {'name':'Maceio','owner':False,'price':80,'rent_value':56}, {'name':'Itapevi','owner':False,'price':75,'rent_value':55}, {'name':'Manaus','owner':False,'price':70,'rent_value':40}, {'name':'Porto Alegre','owner':False,'price':65,'rent_value':39}, {'name':'Piraju','owner':False,'price':65,'rent_value':38}, {'name':'Campinas','owner':False,'price':60,'rent_value':37}, {'name':'Sorocaba','owner':False,'price':55,'rent_value':36}, {'name':'Rio de Janeiro','owner':False,'price':50,'rent_value':35}, {'name':'Taboão da Serra','owner':False,'price':45,'rent_value':20}, {'name':'Niteroi','owner':False,'price':40,'rent_value':19}, {'name':'Florianopolis','owner':False,'price':35,'rent_value':18}, {'name':'Goiania','owner':False,'price':30,'rent_value':17}, {'name':'Santana de Parnaiba','owner':False,'price':25,'rent_value':16}, {'name':'Pirapora do Bom Jesus','owner':False,'price':20,'rent_value':15}, {'name':'Mogi das Cruzes','owner':False,'price':15,'rent_value':10}, {'name':'Grarulhos','owner':False,'price':10,'rent_value':9}, ] round = self.game.start_game(self.players, self.props) self.round_list.append(round.get('round')) self.winners_list.append(round.get('winner')) #Statistics treatment timeout = len([round for round in self.round_list if round == 1000]) round_average = sum(self.round_list) / 300 impulsive = (100 / 300) * self.winners_list.count('impulsive') demanding = (100 / 300) * self.winners_list.count('demanding') cautious = (100 / 300) * self.winners_list.count('cautious') random = (100 / 300) * self.winners_list.count('random') no_winner = (100 / 300) * self.winners_list.count(False) count_winners_list = [impulsive, demanding, cautious, random] players_list = ['Impulsivo', 'Exigente', 'Cauteloso', 'Aleatório'] biggest_winner_position = count_winners_list.index(max(count_winners_list)) #Statistics prints print("Partidas por timeout: {:.2f}".format(timeout)) print("Quantos turnos em média demora uma partida: {:.2f}".format(round_average)) print("Qual a porcentagem de vitórias por comportamento dos jogadores: Impulsivo {:.2f}% - Exigente {:.2f}% - Cauteloso {:.2f}% - Aleatório {:.2f}%".format(impulsive, demanding, cautious, random)) print("Qual o comportamento que mais vence: {}".format(players_list[biggest_winner_position]))
class train_pipeline: def __init__(self, board_size=9, n_playout=2000, init_model=None, use_cuda=False): self.board_size = board_size self.board = Board(self.board_size) self.learning_rate = 2e-3 self.learning_rate_multiplier = 1.0 self.n_playout = n_playout self.c_puct = 1.0 self.buffer_size = 10000 self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) self.play_batch_size = 1 self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 self.check_freq = 10 self.game_batch_num = 1500 self.best_win_ratio = 0.0 self.game = Game(board_size) self.heat_start = 30 self.evaluation_time = 10 if init_model: self.p_v_net: Policy_Value_net = Policy_Value_net( self.board_size, init_model=init_model, use_cuda=use_cuda) else: self.p_v_net = Policy_Value_net(self.board_size, use_cuda=use_cuda) self.p_v_function = self.p_v_net.p_v_function self.mcts_player = MCTS_player(self.p_v_function, self.n_playout, self.c_puct, is_self_play=True) self.mcts_pure = Pure_MCTS_player() self.random_player = GoMoku_player() def data_augumentation(self, play_data): extend_data = [] for state, mcts_porb, winner in play_data: for i in [1, 2, 3, 4]: # rotate counterclockwise equi_state = np.array([np.rot90(s, i) for s in state]) equi_mcts_prob = np.rot90( np.flipud( mcts_porb.reshape(self.board_size, self.board_size)), i) extend_data.append( (equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) # flip horizontally equi_state = np.array([np.fliplr(s) for s in equi_state]) equi_mcts_prob = np.fliplr(equi_mcts_prob) extend_data.append( (equi_state, np.flipud(equi_mcts_prob).flatten(), winner)) return extend_data def self_play(self, n_times=1, is_shown=False): for _ in range(n_times): winner, play_data = self.game.start_self_play(self.mcts_player, is_shown=is_shown, temp=1.0) play_data = list(play_data) self.episode_len = len(play_data) play_data = self.data_augumentation(play_data) self.data_buffer.extend(play_data) def get_learning_rate(self, epoch): if epoch > self.heat_start: return self.learning_rate else: return self.learning_rate * epoch / self.heat_start def policy_update(self): mini_batch = random.sample(self.data_buffer, self.batch_size) state_batch = [data[0] for data in mini_batch] mcts_probs_batch = [data[1] for data in mini_batch] winner_batch = [data[2] for data in mini_batch] state_batch = torch.Tensor(state_batch) mcts_probs_batch = torch.Tensor(mcts_probs_batch) winner_batch = torch.Tensor(winner_batch) for i in range(self.epochs): loss, entropy = self.p_v_net.train_step(state_batch, mcts_probs_batch, winner_batch, self.get_learning_rate(i)) print(loss, entropy) return loss, entropy def policy_evaluate(self, player2, is_shown=False): state = self.mcts_player.get_player_state() self.mcts_player.change_to_test_mode() win_table = np.zeros([self.evaluation_time, 2]) for _ in range(self.evaluation_time): winner = self.game.start_play(self.mcts_player, player2, is_shown=is_shown) win_table[_, 0] = int(winner == "X") for _ in range(self.evaluation_time): winner = self.game.start_play(self.random_player, player2) win_table[_, 1] = int(winner == "O") self.mcts_player.reset_player_state(state) return win_table.mean() def train(self, is_shown=False): try: for i in trange(self.game_batch_num): self.self_play(self.play_batch_size, is_shown=is_shown) print("batch i:{}, episode_len:{}".format( i + 1, self.episode_len)) if len(self.data_buffer) > self.batch_size: loss, entropy = self.policy_update() # check the performance of the current model, # and save the model params if (i + 1) % self.check_freq == 0: print("current self-play batch: {}".format(i + 1)) win_ratio = self.policy_evaluate(self.random_player) self.p_v_net.save_model('./current_policy_{}.model'.format( self.board_size)) if win_ratio > self.best_win_ratio: print("New best policy!!!!!!!!", win_ratio) self.best_win_ratio = win_ratio # update the best_policy self.p_v_net.save_model( './best_policy_{}.model'.format(self.board_size)) # if (self.best_win_ratio == 1.0 and self.pure_mcts_playout_num < 5000): # self.pure_mcts_playout_num += 1000 # self.best_win_ratio = 0.0 except KeyboardInterrupt: print('\n\rquit')