Exemplo n.º 1
0
def main():
    """This function start and plays the game."""
    print("Starting the game.")
    initial_board = Board()
    game = Game(initial_board)

    game.play()
Exemplo n.º 2
0
def run():
    new_game = Game()
    new_game.position.display_position("W")

    while True:

        if new_game.curr_player == "W":

            # get the move
            curr_col = input_number("Enter curr_col: ")
            if curr_col == 99:
                break
            curr_row = input_number("Enter curr_row: ")
            next_col = input_number("Enter next_col: ")
            next_row = input_number("Enter next_row: ")

            new_game.move(curr_col, curr_row, next_col, next_row)

            # new_game.move(4, 1, 4, 3)

        # us the MCTS for black's move
        else:
            move = MCTS.run(2, 30, new_game)
            new_game.move(move.from_x, move.from_y, move.to_x, move.to_y)

        new_game.position.display_position("W")

        if new_game.game_is_over():
            break
Exemplo n.º 3
0
    def duplicate_game(self):
        temp_game = Game()

        # copy the position
        for x in [0, 1, 2, 3, 4, 5, 6, 7]:
            for y in [0, 1, 2, 3, 4, 5, 6, 7]:
                temp_game.position.locations[x][y] = self.state.position.locations[x][y]

        temp_game.position.W_King_moved = self.state.position.W_King_moved
        temp_game.position.W_0_Rook_moved = self.state.position.W_0_Rook_moved
        temp_game.position.W_7_Rook_moved = self.state.position.W_7_Rook_moved
        temp_game.position.B_King_moved = self.state.position.B_King_moved
        temp_game.position.B_0_Rook_moved = self.state.position.B_0_Rook_moved
        temp_game.position.B_7_Rook_moved = self.state.position.B_7_Rook_moved
        temp_game.position.W_passant = self.state.position.W_passant
        temp_game.position.B_passant = self.state.position.B_passant
        temp_game.position.W_King_loc = self.state.position.W_King_loc
        temp_game.position.B_King_loc = self.state.position.B_King_loc
        temp_game.position.W_King_check = self.state.position.W_King_check
        temp_game.position.B_King_check = self.state.position.B_King_check

        # copy game attributes
        temp_game.drawing_moves = self.state.drawing_moves
        temp_game.curr_player = self.state.curr_player
        temp_game.opp_player = self.state.opp_player

        # copy the state history
        temp_game.position_history = []
        for history in self.state.position_history:
            temp_game.position_history.append(history)

        return temp_game
Exemplo n.º 4
0
def run():
    new_game = Game()
    new_game.position.display_position("W")

    while True:

        # get the move
        curr_col = input_number("Enter curr_col: ")
        if curr_col == 99:
            break
        curr_row = input_number("Enter curr_row: ")
        next_col = input_number("Enter next_col: ")
        next_row = input_number("Enter next_row: ")

        new_game.move(curr_col, curr_row, next_col, next_row)
        new_game.position.display_position("W")

        # test FEN
        print("FEN list: {}".format(new_game.position_history))

        if new_game.game_is_over():
            break
Exemplo n.º 5
0
    def __init__(self,
                 board_size=9,
                 n_playout=2000,
                 init_model=None,
                 use_cuda=False):
        self.board_size = board_size
        self.board = Board(self.board_size)
        self.learning_rate = 2e-3
        self.learning_rate_multiplier = 1.0
        self.n_playout = n_playout
        self.c_puct = 1.0
        self.buffer_size = 10000
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5  # num of train_steps for each update
        self.kl_targ = 0.02
        self.check_freq = 10
        self.game_batch_num = 1500
        self.best_win_ratio = 0.0
        self.game = Game(board_size)
        self.heat_start = 30
        self.evaluation_time = 10

        if init_model:
            self.p_v_net: Policy_Value_net = Policy_Value_net(
                self.board_size, init_model=init_model, use_cuda=use_cuda)
        else:
            self.p_v_net = Policy_Value_net(self.board_size, use_cuda=use_cuda)
        self.p_v_function = self.p_v_net.p_v_function
        self.mcts_player = MCTS_player(self.p_v_function,
                                       self.n_playout,
                                       self.c_puct,
                                       is_self_play=True)
        self.mcts_pure = Pure_MCTS_player()
        self.random_player = GoMoku_player()
Exemplo n.º 6
0
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 12 09:07:33 2018

@author: hztengkezhen
"""

from mcts import MCTS
from mcts import MCTS_player
from board import Board
from board import Game
from board import Random_player
from board import Human_player

g = Game(9)
rp = Random_player()
hp = Human_player()
mp = MCTS_player()
#num = 0
#for i in range(1):
#    if g.game_start(hp,mp)==2:
#        num+=1
#print num

board = Board(9)
board.move(4 * 9 + 4)
board.move(0 * 9 + 0)
board.move(4 * 9 + 5)
board.move(8 * 9 + 8)
board.move(4 * 9 + 6)
board.move(0 * 9 + 8)
Exemplo n.º 7
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 11 23:14:41 2018

@author: ubuntu
"""

from board import Board
from board import Game
from board import Random_player

g = Game(9)
num = 0
for i in range(100):
    w = g.self_play(Random_player())
    if w==1:
        num+=1
print num
Exemplo n.º 8
0
from board import Game
test = Game(10, 10, [(0, 0), (4, 4), (5, 6), (7, 3), (9, 9)], [(8, 8), (8, 9)],
            [[(2, 1), (2, 2)], [(3, 1), (3, 2)], [(4, 1), (4, 2), (4, 3)]])

print(test.boardState())
Exemplo n.º 9
0
        # if all placements have been tried, just keep going to next position/piece
    # if all pieces have been tried/placed:
    if len(named_pieces) == 0:  # if they have all been placed
        print(state)  # show end result
        with open('solutions.txt', 'a+') as f:
            f.write(str(state))
        return
    else:  # otherwise
        print(f'Unsuccessful attempt:')
        print(state)
        return  # return to previous recursion state


if __name__ == "__main__":
    named_pieces = list(zip([i for i in range(len(pieces))], pieces))
    state = Game()
    positions = [(y, x) for y in range(state.board_size)
                 for x in range(state.board_size)]

    # some pieces should not be rotated (p0, p5),
    # some can be rotated, but should not be mirrored (p4, p6)
    # finally, p9 should only be rotated and once.
    # all other pieces should be rotated (4 states), then mirrored (5th state)
    # and rotated three more times: 4+4=8 states
    specific_n_placements = [1, 8, 8, 8, 4, 1, 4, 8, 8, 2]
    # initialise start state: all pieces, empty board, all possible positions
    named_pieces = list(zip([i for i in range(len(pieces))], pieces))
    state = Game()
    positions = [(y, x) for y in range(state.board_size)
                 for x in range(state.board_size)]
Exemplo n.º 10
0
 def setUp(self):
     """Initial Set Up"""
     self.game = Game()
     self.round_list = []
     self.winners_list = []
Exemplo n.º 11
0
class BoardTest(unittest.TestCase):

    def setUp(self):
        """Initial Set Up"""
        self.game = Game()
        self.round_list = []
        self.winners_list = []

    def test_game_stats(self):
        """Returns the statistics:
        1- matches by time out
        2- average of turns
        3- percentage of gain by type of player
        4- which player wins the most"""

        #Simulation of 300 games
        for round_game in range(0,300):
            self.players = [
            {'impulsive':'player 1', 'balance':300, 'position':0},
            {'demanding':'player 2', 'balance':300, 'position':0},
            {'cautious':'player 3', 'balance':300, 'position':0},
            {'random':'player 4', 'balance':300, 'position':0},
            ]
            self.props =  [
            {'name':'São Paulo','owner':False,'price':100,'rent_value':60},
            {'name':'Osasco','owner':False,'price':95,'rent_value':59},
            {'name':'Barueri','owner':False,'price':90,'rent_value':58},
            {'name':'Carapicuiba','owner':False,'price':85,'rent_value':57},
            {'name':'Maceio','owner':False,'price':80,'rent_value':56},
            {'name':'Itapevi','owner':False,'price':75,'rent_value':55},
            {'name':'Manaus','owner':False,'price':70,'rent_value':40},
            {'name':'Porto Alegre','owner':False,'price':65,'rent_value':39},
            {'name':'Piraju','owner':False,'price':65,'rent_value':38},
            {'name':'Campinas','owner':False,'price':60,'rent_value':37},
            {'name':'Sorocaba','owner':False,'price':55,'rent_value':36},
            {'name':'Rio de Janeiro','owner':False,'price':50,'rent_value':35},
            {'name':'Taboão da Serra','owner':False,'price':45,'rent_value':20},
            {'name':'Niteroi','owner':False,'price':40,'rent_value':19},
            {'name':'Florianopolis','owner':False,'price':35,'rent_value':18},
            {'name':'Goiania','owner':False,'price':30,'rent_value':17},
            {'name':'Santana de Parnaiba','owner':False,'price':25,'rent_value':16},
            {'name':'Pirapora do Bom Jesus','owner':False,'price':20,'rent_value':15},
            {'name':'Mogi das Cruzes','owner':False,'price':15,'rent_value':10},
            {'name':'Grarulhos','owner':False,'price':10,'rent_value':9},
            ]

            round = self.game.start_game(self.players, self.props)

            self.round_list.append(round.get('round'))
            self.winners_list.append(round.get('winner'))


        #Statistics treatment
        timeout = len([round for round in self.round_list if round == 1000])
        round_average = sum(self.round_list) / 300
        impulsive = (100 / 300) * self.winners_list.count('impulsive')
        demanding = (100 / 300) * self.winners_list.count('demanding')
        cautious = (100 / 300) * self.winners_list.count('cautious')
        random = (100 / 300) * self.winners_list.count('random')
        no_winner = (100 / 300) * self.winners_list.count(False)

        count_winners_list = [impulsive, demanding, cautious, random]
        players_list = ['Impulsivo', 'Exigente', 'Cauteloso', 'Aleatório']
        biggest_winner_position = count_winners_list.index(max(count_winners_list))

        #Statistics prints
        print("Partidas por timeout: {:.2f}".format(timeout))
        print("Quantos turnos em média demora uma partida: {:.2f}".format(round_average))
        print("Qual a porcentagem de vitórias por comportamento dos jogadores: Impulsivo {:.2f}% - Exigente {:.2f}% - Cauteloso {:.2f}% - Aleatório {:.2f}%".format(impulsive, demanding, cautious, random))
        print("Qual o comportamento que mais vence: {}".format(players_list[biggest_winner_position]))
Exemplo n.º 12
0
class train_pipeline:
    def __init__(self,
                 board_size=9,
                 n_playout=2000,
                 init_model=None,
                 use_cuda=False):
        self.board_size = board_size
        self.board = Board(self.board_size)
        self.learning_rate = 2e-3
        self.learning_rate_multiplier = 1.0
        self.n_playout = n_playout
        self.c_puct = 1.0
        self.buffer_size = 10000
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5  # num of train_steps for each update
        self.kl_targ = 0.02
        self.check_freq = 10
        self.game_batch_num = 1500
        self.best_win_ratio = 0.0
        self.game = Game(board_size)
        self.heat_start = 30
        self.evaluation_time = 10

        if init_model:
            self.p_v_net: Policy_Value_net = Policy_Value_net(
                self.board_size, init_model=init_model, use_cuda=use_cuda)
        else:
            self.p_v_net = Policy_Value_net(self.board_size, use_cuda=use_cuda)
        self.p_v_function = self.p_v_net.p_v_function
        self.mcts_player = MCTS_player(self.p_v_function,
                                       self.n_playout,
                                       self.c_puct,
                                       is_self_play=True)
        self.mcts_pure = Pure_MCTS_player()
        self.random_player = GoMoku_player()

    def data_augumentation(self, play_data):
        extend_data = []
        for state, mcts_porb, winner in play_data:
            for i in [1, 2, 3, 4]:
                # rotate counterclockwise
                equi_state = np.array([np.rot90(s, i) for s in state])
                equi_mcts_prob = np.rot90(
                    np.flipud(
                        mcts_porb.reshape(self.board_size, self.board_size)),
                    i)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
                # flip horizontally
                equi_state = np.array([np.fliplr(s) for s in equi_state])
                equi_mcts_prob = np.fliplr(equi_mcts_prob)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
        return extend_data

    def self_play(self, n_times=1, is_shown=False):
        for _ in range(n_times):
            winner, play_data = self.game.start_self_play(self.mcts_player,
                                                          is_shown=is_shown,
                                                          temp=1.0)
            play_data = list(play_data)
            self.episode_len = len(play_data)
            play_data = self.data_augumentation(play_data)
            self.data_buffer.extend(play_data)

    def get_learning_rate(self, epoch):
        if epoch > self.heat_start:
            return self.learning_rate
        else:
            return self.learning_rate * epoch / self.heat_start

    def policy_update(self):

        mini_batch = random.sample(self.data_buffer, self.batch_size)
        state_batch = [data[0] for data in mini_batch]
        mcts_probs_batch = [data[1] for data in mini_batch]
        winner_batch = [data[2] for data in mini_batch]

        state_batch = torch.Tensor(state_batch)
        mcts_probs_batch = torch.Tensor(mcts_probs_batch)
        winner_batch = torch.Tensor(winner_batch)

        for i in range(self.epochs):
            loss, entropy = self.p_v_net.train_step(state_batch,
                                                    mcts_probs_batch,
                                                    winner_batch,
                                                    self.get_learning_rate(i))
            print(loss, entropy)

        return loss, entropy

    def policy_evaluate(self, player2, is_shown=False):
        state = self.mcts_player.get_player_state()
        self.mcts_player.change_to_test_mode()

        win_table = np.zeros([self.evaluation_time, 2])
        for _ in range(self.evaluation_time):
            winner = self.game.start_play(self.mcts_player,
                                          player2,
                                          is_shown=is_shown)
            win_table[_, 0] = int(winner == "X")
        for _ in range(self.evaluation_time):
            winner = self.game.start_play(self.random_player, player2)
            win_table[_, 1] = int(winner == "O")

        self.mcts_player.reset_player_state(state)
        return win_table.mean()

    def train(self, is_shown=False):
        try:
            for i in trange(self.game_batch_num):
                self.self_play(self.play_batch_size, is_shown=is_shown)
                print("batch i:{}, episode_len:{}".format(
                    i + 1, self.episode_len))
                if len(self.data_buffer) > self.batch_size:
                    loss, entropy = self.policy_update()
                # check the performance of the current model,
                # and save the model params
                if (i + 1) % self.check_freq == 0:
                    print("current self-play batch: {}".format(i + 1))
                    win_ratio = self.policy_evaluate(self.random_player)
                    self.p_v_net.save_model('./current_policy_{}.model'.format(
                        self.board_size))
                    if win_ratio > self.best_win_ratio:
                        print("New best policy!!!!!!!!", win_ratio)
                        self.best_win_ratio = win_ratio
                        # update the best_policy
                        self.p_v_net.save_model(
                            './best_policy_{}.model'.format(self.board_size))
                        # if (self.best_win_ratio == 1.0 and self.pure_mcts_playout_num < 5000):
                        #     self.pure_mcts_playout_num += 1000
                        #     self.best_win_ratio = 0.0
        except KeyboardInterrupt:
            print('\n\rquit')