Exemplo n.º 1
0
 def test_if_three_in_a_row_wins(self):
     board = Board()
     tile_type = 1 # White
     board.add_tile(tile_type, row=1, column=1)
     board.add_tile(tile_type, row=1, column=0)
     board.add_tile(tile_type, row=1, column=2)
     self.assertEqual(board.three_in_a_row, True)
Exemplo n.º 2
0
    def test_a_cell_with_2or3_neighbours_stays_alive_after_tick(self):
        my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell, self.fourth_cell])
        my_board.tick()

        self.assertTrue(my_board.cell_alive_at(6, 6))
        self.assertTrue(my_board.cell_alive_at(5, 6))
        self.assertTrue(my_board.cell_alive_at(5, 7))
        self.assertTrue(my_board.cell_alive_at(6, 7))
Exemplo n.º 3
0
 def test_combine(self):
     board = Board(4, 4)
     start_single = [2, 2, 0, 0]
     start_double = [2, 2, 4, 4]
     end_single = [4, 0, 0, 0]
     end_double = [4, 0, 8, 0]
     self.assertEqual(end_single, board._Board__combine(start_single))
     self.assertEqual(end_double, board._Board__combine(start_double))
Exemplo n.º 4
0
def ai_move():
    layout = request.args.get('layout')
    layout = json.loads(layout)
    board = Board(setup=layout)
    ai.move(board)
    layout = board.board()
    layout = json.dumps(layout)
    return layout
Exemplo n.º 5
0
 def test_transpose(self):
     board = Board()
     tile_type = 1 # White
     board.add_tile(tile_type, row=1, column=1)
     board.add_tile(tile_type, row=1, column=0)
     board.add_tile(tile_type, row=1, column=2)
     #board.transpose()
     self.assertEqual(str(board), "")
Exemplo n.º 6
0
 def test_compress(self):
     board = Board(4, 4)
     start = [2, 0, 4, 0]
     end_backward = [2, 4, 0, 0]
     end_forward = [0, 0, 2, 4]
     self.assertEqual(end_backward, board._Board__compress(start, 'left'))
     self.assertEqual(end_forward, board._Board__compress(start, 'right'))
     self.assertEqual(end_backward, board._Board__compress(start, 'up'))
     self.assertEqual(end_forward, board._Board__compress(start, 'down'))
Exemplo n.º 7
0
 def test_set(self):
     board = Board(4, 4)
     end = [
         1, 0, 0, 0,
         0, 0, 0, 0,
         0, 0, 0, 0,
         0, 0, 0, 0
     ]
     board._Board__set(0, 0, 1)
     self.assertEqual(end, board.grid)
Exemplo n.º 8
0
class TestBoard(unittest.TestCase):
    def setUp(self):
        self.board = Board(10,10)

    def test_initialise_collection(self):
        self.board.initialise_collection()
        self.assertEqual(len(self.board.collection), 100)

    def test_get_neighbours(self):
        for i in range (100):
            self.board.collection.append(Cell(False))
Exemplo n.º 9
0
    def test_board_join(self):
        """
        When player2 join to the game, the pieces are in the board.
        """
        player1 = MockPlayer()
        player2 = MockPlayer()

        board1 = Board(player1)
        board1.join(player2)

        self.assertEqual(board1._pieces[(0, 0)], Piece(player1, 7))
Exemplo n.º 10
0
class StackingTests(unittest.TestCase):

    def setUp(self):
        self.board = Board()

    def test_stack_one_red_on_one_blue(self):
        red = self.board.get_colour('red')
        blue = self.board.get_colour('blue')
        new_red, new_blue = stack(red, blue)
        self.assertEqual(red.top, new_blue.top)
        self.assertEqual([], new_red)
        self.assertEqual(2, new_blue.height)
Exemplo n.º 11
0
    def setup_board(players, num_of_nodes):

        Setup().setup_logger()

        nodes = {}
        board = Board(nodes)

        #create empty nodes
        for i in range(0, num_of_nodes):
            nodes[i] = board.Node(i, 0, 1)

        #create connections
        for i in range(1, num_of_nodes):
            board.connect_nodes(i, i - 1)
        board.connect_nodes(0, num_of_nodes - 1)
        for i in range(0, num_of_nodes/4):
            board.connect_nodes(randint(0, num_of_nodes - 1), randint(0, num_of_nodes - 1))

        #split nodes between players
        nodes_to_split = nodes.values()[:]
        shuffle(nodes_to_split)
        for i in range(0, num_of_nodes):
            player_id = i % len(players)
            nodes_to_split[i].owner = player_id

        #split armies between players
        armies_per_node = 3
        armies_per_player = num_of_nodes * (armies_per_node - 1) / len(players)
        for player in players:
            my_nodes = board.nodes_by_owner(player.id)
            for i in range(0, armies_per_player):
                node = my_nodes[randint(0, len(my_nodes) - 1)]
                node.add_army(1)
        return board
Exemplo n.º 12
0
    def test_new_tile(self):
        start = [
            0, 0, 0, 0,
            0, 0, 0, 0,
            0, 0, 0, 0,
            0, 0, 0, 0
        ]

        board = Board(4, 4)

        self.assertEqual(start, board.grid)
        
        board.new_tile()

        self.assertNotEqual(start, board.grid)
Exemplo n.º 13
0
    def test_traverse(self):
        board = Board()
        paths = board.traverse()
        # horizontals
        for y in range(0, 3):
            self.assertIn([(0, y), (1, y), (2, y)], paths)

        # verticals
        for x in range(0, 3):
            self.assertIn([(x, 0), (x, 1), (x, 2)], paths)

        # \ diagonal
        self.assertIn([(0, 0), (1, 1), (2, 2)], paths)

        # / diagonal
        self.assertIn([(0, 2), (1, 1), (2, 0)], paths)
Exemplo n.º 14
0
    def setUp(self):
        player1 = MockPlayer()
        player2 = MockPlayer()

        board1 = Board(player1)
        board1.join(player2)

        board1._pieces = {
            (0, 0): Piece(player1, 7),
            (8, 8): Piece(player2, 7),
            (0, 3): Hole(player1),
            (8, 3): Hole(player2),
        }

        self.board = board1
        self.player1 = player1
        self.player2 = player2
Exemplo n.º 15
0
    def test_double_combine(self):
        start = [
            1, 1, 1, 1,
            1, 1, 2, 2,
            2, 2, 1, 1,
            0, 0, 0, 0
        ]

        end = [
            2, 2, 0, 0,
            2, 3, 0, 0,
            3, 2, 0, 0,
            0, 0, 0, 0
        ]

        board = Board(4, 4, start)
        board.move('left')

        self.assertEqual(end, board.grid)
Exemplo n.º 16
0
    def test_combine_down(self):
        start = [
            0, 0, 1, 1,
            0, 1, 0, 1,
            1, 0, 0, 0,
            1, 1, 1, 0
        ]

        end = [
            0, 0, 0, 0,
            0, 0, 0, 0,
            0, 0, 0, 0,
            2, 2, 2, 2
        ]

        board = Board(4, 4, start)
        board.move('down')

        self.assertEqual(end, board.grid)
Exemplo n.º 17
0
    def test_combine_left(self):
        start = [
            1, 1, 0, 0,
            1, 0, 1, 0,
            1, 0, 0, 1,
            0, 0, 1, 1
        ]

        end = [
            2, 0, 0, 0,
            2, 0, 0, 0,
            2, 0, 0, 0,
            2, 0, 0, 0
        ]

        board = Board(4, 4, start)
        board.move('left')

        self.assertEqual(end, board.grid)
Exemplo n.º 18
0
    def __init__(self, w, h, owner):
        self.sid = sha1(str(time.time())).hexdigest()
        self.clients = [owner]
        owner.player = 1
        self.player_counter = 2
        self.state = STATE_JOINING
        self.board = Board(w, h)
        self.turn = owner

        owner.send(Msg('newgame', self.sid, w, h).encode())
        owner.send(Msg('turn', self.turn.player).encode())
Exemplo n.º 19
0
def main():
    solver = Solver()
    manual = Player()
    n = 17
    if(len(sys.argv)>1):
        n=int(sys.argv[1]);
    # Start simulator on Circle with 10 points with solver as player 0
    # manual (you) as player 1.
    sim = Simulator(Board.from_circle(n),solver,manual)
    print("Starting simulation on circle with <0> points. The computer is\
 playing first!\n")
    while(sim.step()): pass;
    input()
Exemplo n.º 20
0
    def start(self, PLAYERS):               
        self.board = Board.initial(GUICard)

        # Clear the ingame data.
        self.start_turn_time = time.clock()
        self.set_time = 0
        self.players = {x: [] for x in xrange(PLAYERS)}

        #self.cards_drawn = False
        self.table_clickable = False    # Is it possible to select cards?
        self.draw_END_BUTTON = False    # Draw 'No Sets' and block the game?

        self.player_turn = None         # Whose turn (== player_id or None)
Exemplo n.º 21
0
def server():
    import cv2
    s = socket.socket()
    host = socket.gethostname()
    print(host)
    print(socket.gethostbyname(socket.gethostname()))
    host = HOST
    port = PORT
    s.bind((host, port))

    s.listen(5)
    while True:
        print('listening...')
        game = Game(handicap=HANDICAP)
        board_img = game.get_current_board_img()
        cv2.imshow('board_img', board_img)
        param = {'MCTS': False}
        cv2.setMouseCallback('board_img', game.cap_click, param=param)
        cv2.waitKey(33)
        c, addr = s.accept()
        print('Got connection from', addr)

        while True:
            before_len = len(game.boards)
            board_img = game.get_current_board_img(last_move=game.current_moves[-1])
            cv2.imshow('board_img', board_img)
            cv2.waitKey(33)
            now_len = len(game.boards)
            if now_len > before_len:
                print(param['MCTS'])
                board_img = game.get_current_board_img(last_move=game.current_moves[-1])
                cv2.imshow('board_img', board_img)
                cv2.waitKey(33)
                latest_board = game.boards[-2]  # board before human move
                next_to_play = game.next_to_play
                board_str = Board.mtx2str(latest_board.board_mtx)
                next_to_play = str(next_to_play)

                print('next_to_play:', next_to_play)
                c.send(str.encode(str(game.current_moves[-1]) + '|' + next_to_play + '|' + board_str + '|'
                                  + str(int(param['MCTS']))))
                print(str(game.current_moves[-1]))

                move = c.recv(1024).decode('utf-8')
                print('move', move)
                temp = move.split(',')
                x, y = int(temp[0][1:]), int(temp[1][1:-1])
                print(x, y, game.next_to_play)
                game.mk_move(x, y)

        c.close()
Exemplo n.º 22
0
def main():
    p1 = Player(name="John")
    p2 = Player(name="Hulio")
    b = Board()
    b.add_player(p1)
    b.add_player(p2)
    b.start()
    _pprint(filled_cells(b.state))
    raw_input()
    destruct_status = False
    prev_check = []
    state = b.state
    promotions = 0
    while state not in ("FIN", ):
    #    for player in (p1, p2):
    #    b.get_state(player)
        event = smart_event_factory(state, colors, destruct_status, prev_check)
        color = 'white' if 'w' == event["name"][0] else 'black'
        if check_the_turn(event, state) == False:
            continue
        else:
            print "VALID"
            if "promoted" in event.keys() and event["promoted"]:
                promotions +=1
            print "PROMOTED CHECKS: ", promotions
            state = check_the_game_state(state, event, colors)
            if event['cutDown'] and destruction_is_possible(event['finalPos'], filled_cells(state, color=colors[1]), filled_cells(state, color=colors[0]), event.get('promoted')):
                destruct_status = True
                prev_check = event['finalPos']
                print 'NEXT DESTRUCT'
                raw_input()
            else:
                colors.reverse()
                destruct_status = False
                prev_check = []
                print 'COLORS REVERSED!'
                print 'FILLED CELLS: ' , len((filled_cells(state)))
                raw_input()
Exemplo n.º 23
0
    def __init__(self, width, height, players_num, left_player=Soldier, right_player=Soldier):
        """Initializes game board  with width and height,
        adds players_num playes on both sides
        addional parameters left_player and right_player can be set
        to different subclasses of Player's
        """
        Board.__init__(self)
        self.width = width
        self.height = height
        self.front = [width/2 for _ in range(height)]
        self.positions = {}

        # place  front in positions
        for y, x in enumerate(self.front):
            self.positions[(x, y)] = 'f'

        # place players
        for n in range(players_num):
            player = Soldier(0, n, 'left')
            self.add_player(player, 'left')

            player = Soldier(width-1, height-1-n, 'right')
            self.add_player(player, 'right')
Exemplo n.º 24
0
    def test_a_cell_without_neighbours_dies_after_tick(self):
        dying_cell = Cell(4, 4)
        my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell, dying_cell])
        my_board.tick()

        self.assertFalse(my_board.cell_alive_at(4, 4))
        self.assertTrue(my_board.cell_alive_at(6, 6))
        self.assertTrue(my_board.cell_alive_at(5, 6))
        self.assertTrue(my_board.cell_alive_at(5, 7))
Exemplo n.º 25
0
 def test_get_range(self):
     start = [
         1, 2, 3, 4,
         5, 6, 7, 8,
         9, 10, 11, 12,
         13, 14, 15, 16 
     ]
     row_first = [1, 2, 3, 4]
     row_last = [13, 14, 15, 16]
     col_first = [1, 5, 9, 13]
     col_last = [4, 8, 12, 16]
     board = Board(4, 4, start)
     self.assertEqual(row_first, board._Board__get_range(0, 'left'))
     self.assertEqual(row_first, board._Board__get_range(0, 'right'))
     self.assertEqual(row_last, board._Board__get_range(3, 'left'))
     self.assertEqual(row_last, board._Board__get_range(3, 'right'))
     self.assertEqual(col_first, board._Board__get_range(0, 'up'))
     self.assertEqual(col_first, board._Board__get_range(0, 'down'))
     self.assertEqual(col_last, board._Board__get_range(3, 'up'))
     self.assertEqual(col_last, board._Board__get_range(3, 'down'))
Exemplo n.º 26
0
def py_get_liberty(matrix):
    black_liberty = np.zeros((19, 19, 8), dtype=np.uint8)
    white_liberty = np.zeros((19, 19, 8), dtype=np.uint8)
    visited = {}
    for i in range(19):
        for j in range(19):
            if matrix[i][j] == 1 and (i, j) not in visited:
                groups = Board.get_group(i, j, matrix, visited=visited)
                num_liberty = Board.check_liberty(groups, matrix, cnt=True)
                if num_liberty > 8:
                    num_liberty = 8
                for stone in groups:
                    black_liberty[stone[0]][stone[1]][num_liberty-1] = 1

            if matrix[i][j] == 2 and (i, j) not in visited:
                groups = Board.get_group(i, j, matrix, visited=visited)
                num_liberty = Board.check_liberty(groups, matrix, cnt=True)
                if num_liberty > 8:
                    num_liberty = 8
                for stone in groups:
                    white_liberty[stone[0]][stone[1]][num_liberty-1] = 1

    stones = np.concatenate((black_liberty, white_liberty), axis=2)
    return stones
Exemplo n.º 27
0
    def test_a_died_cell_with_3_alive_neighbours_resurrects(self):
        my_board = Board([self.fst_cell, self.snd_cell, self.trd_cell])
        my_board.tick()

        self.assertTrue(my_board.cell_alive_at(6, 6))
        self.assertTrue(my_board.cell_alive_at(5, 6))
        self.assertTrue(my_board.cell_alive_at(5, 7))

        # Having 3 alive neighbours comes back to life.
        self.assertTrue(my_board.cell_alive_at(6, 7))
Exemplo n.º 28
0
    def test_a_cell_with_more_then_3_neighbours_dies(self):
        my_board = Board([self.fst_cell, self.snd_cell,
            self.trd_cell, self.fourth_cell, self.fifth_cell])

        my_board.tick()

        # dying cells: first and fifth.
        self.assertFalse(my_board.cell_alive_at(6, 6))
        self.assertFalse(my_board.cell_alive_at(7, 5))

        self.assertTrue(my_board.cell_alive_at(5, 6))
        self.assertTrue(my_board.cell_alive_at(5, 7))
        self.assertTrue(my_board.cell_alive_at(6, 7))
Exemplo n.º 29
0
    def check(self):
        in_set = self.board.has_set()

        # 1. Add up to 12 cards
        if len(self.board.table) < TABLE_SIZE:
            self.board = self.board.add_cards()

        # 2. Continue adding cards until there's a set
        elif not in_set and len(self.board.table) < TABLE_LIMIT:
            self.board = self.board.add_cards()

        # 3. Reshuffle if no set and window can't handle more cards
        elif not in_set and len(self.board.table) == TABLE_LIMIT:
            cards = random.shuffle(self.board.deck + self.board.table)
            new_deck = cards[TABLE_SIZE:]
            new_table = cards[:TABLE_SIZE]
            self.board = Board(new_deck, new_table)

        # 4. Break if added more than window can handle
        assert(len(self.board.table) <= TABLE_LIMIT)
Exemplo n.º 30
0
def client():
    s = socket.socket()
    host = HOST
    print('connecting to ' + host)
    port = PORT
    s.connect((host, port))
    game_play = GamePlay(
                         policy_net_path='../trained_models/policy',
                         value_net_path='../trained_models/value')

    game = Game(handicap=HANDICAP)
    while True:
        message = s.recv(4096)
        message = message.decode('utf-8')
        print(type(message), message)
        # board_mtx, next_to_move = message.split('|')
        move, next_to_move, current_board, is_search = message.split('|')
        print(move, next_to_move, current_board, is_search)
        if int(is_search) == 1:
            game_play.mcts.time_limit = 20
        else:
            game_play.mcts.time_limit = 0.5
        while Board.mtx2str(game.boards[-1].board_mtx) != current_board:
            print('roll_back')
            game.roll_back()
        print(len(game.boards))
        moves = move.split(',')
        x, y = int(moves[0][1:]), int(moves[1][1:-1])
        game.mk_move(x, y)

        output = game_play.play(game)
        game.mk_move(output[0], output[1])
        s.send(bytes(str(output), encoding='utf-8'))
        # s.send(str(output))

    s.close()
Exemplo n.º 31
0
from game import Board
import random
import player
import pandas as pd

inp1 = -1
inp1 = -1

#For simulating data
for _ in range(500):
    out = []
    print("Game")
    print(_)
    win_percent_history = []
    board = Board(False, False)
    board.build_board()
    win_percent_history = player.simulate_game(board, -1, 10)
    for sim in win_percent_history:
        for turn in sim:
            for play in turn:
                out.append(play)
    df = pd.DataFrame(out, columns=['player','board', 'win_percent'])
    df.to_csv('game_data2.csv', mode='a' ,index=False)
Exemplo n.º 32
0
Arquivo: ui.py Projeto: physpim/chess
 def __init__(self):
     self.board = Board()
     self.draw()
Exemplo n.º 33
0
Arquivo: ui.py Projeto: physpim/chess
class Ui:
    """Command line user interface

    Manages all the interaction with the user. 
    """
    color_dict = {0: "White", 1: "Black"}
    piece_type_dict = {
        0: "\u2654\u265a",
        1: "\u2655\u265b",
        2: "\u2656\u265c",
        3: "\u2657\u265d",
        4: "\u2658\u265e",
        5: "\u2659\u265f"
    }
    x_str2int = {
        "a": 0,
        "b": 1,
        "c": 2,
        "d": 3,
        "e": 4,
        "f": 5,
        "g": 6,
        "h": 7
    }
    y_str2int = {
        "1": 0,
        "2": 1,
        "3": 2,
        "4": 3,
        "5": 4,
        "6": 5,
        "7": 6,
        "8": 7
    }
    x_int2str = {
        0: "a",
        1: "b",
        2: "c",
        3: "d",
        4: "e",
        5: "f",
        6: "g",
        7: "h"
    }
    y_int2str = {
        0: "1",
        1: "2",
        2: "3",
        3: "4",
        4: "5",
        5: "6",
        6: "7",
        7: "8"
    }

    def __init__(self):
        self.board = Board()
        self.draw()

    def draw(self):
        """Draws the board configuration in the terminal"""
        display = "     a  b  c  d  e  f  g  h      \n" + \
                  "________________________________ \n "
        # Loop over all x and y indices
        for j in range(8):
            display += " " + str(j + 1) + "|"
            for i in range(8):
                # Find the piece index for position [i, j]
                position_ij = Position(i, j)
                piece = self.board.find_piece(position_ij)
                if piece.color != None:
                    display += " " + \
                        Ui.piece_type_dict[piece.type][piece.color] + " "
                else:
                    # Draw an empty cell
                    display += " - "
            # New line for different i value
            display += "|" + str(j + 1) + " \n "
        display += "_______________________________ \n" + \
                   "     a  b  c  d  e  f  g  h    \n"
        self.board_string = display
        print(display)

    def turn(self):
        """"Performs a turn within ui"""
        selected_piece = self.select_piece()
        position = self.select_move(selected_piece)
        self.board.turn(selected_piece, position, self.draw, self.check,
                        self.check_mate)

    def select_piece(self) -> Piece:
        """Asks the user to select a piece to make a move with"""
        question = Ui.color_dict[self.board.turn_color] + \
            ", your turn! Please select a piece. \n"
        piece = Piece(None, None, None, None, None)
        while piece.color is None or piece.color != self.board.turn_color:
            coordinate = input(question)
            position = self.coordinate2position(coordinate)
            piece = self.board.find_piece(position)
            question = "No piece of yours at this field, try again! \n"
        return piece

    def select_move(self, selected_piece: int) -> Position:
        """Asks the user where to move the selected piece"""
        question = "The selected piece can move to " + \
            self.moves2text(selected_piece) + "\n"
        coordinate = input(question)
        position = self.coordinate2position(coordinate)
        while not position in selected_piece.moves:
            question = "Your piece can't move to the selected field, try again! \n"
            coordinate = input(question)
            position = self.coordinate2position(coordinate)
        return position

    def moves2text(self, selected_piece: Piece) -> str:
        """Turns a list of positions into a string with coordinates"""
        text = ""
        for move in selected_piece.moves:
            text += self.position2coordinate(move) + ", "
        return text

    def coordinate2position(self, coordinate: str) -> Position:
        """Converts user input to a board position"""
        x = Ui.x_str2int[coordinate[0]]
        y = Ui.y_str2int[coordinate[1]]
        return Position(x, y)

    def position2coordinate(self, position: Position) -> str:
        """Converts user a position to a  ui coordinate"""
        return Ui.x_int2str[position.x] + Ui.y_int2str[position.y]

    def check(self):
        """Function that notifies players when check"""
        print('Check!')

    def check_mate(self):
        """Function that notifies players when check mate"""
        print('Check mate! The game is over')
Exemplo n.º 34
0
 def test_print_action_boom(self):
     string_action = Board().string_action
     self.assertEqual(string_action([BOOM, (0, 0)]), "BOOM at (0, 0).")
     self.assertEqual(string_action([BOOM, (5, 2)]), "BOOM at (5, 2).")
Exemplo n.º 35
0
    def test_move_row_works(self):
        b = Board(seed=False)
        # Make the top row 2 1 0 1
        b._tiles[0, :] = [2, 1, 0, 1]
        # Move to the left, so look at row left-to-right
        row_1 = ((0, 0), (0, 1), (0, 2), (0, 3))
        b._move_tile_row(row_1)
        self.assertEqual(b._tiles.take(0), 2)
        self.assertEqual(b._tiles.take(1), 2)
        b._move_tile_row(row_1)
        self.assertEqual(b._tiles.take(0), 4)

        # Make the second row 1 1 0 2
        b._tiles[1, :] = [1, 1, 0, 2]
        row_2 = ((1, 0), (1, 1), (1, 2), (1, 3))
        b._move_tile_row(row_2)
        # Expect 2 2 0 0
        self.assertEqual(b._tiles.take(4), 2)
        self.assertEqual(b._tiles.take(5), 2)
        b._move_tile_row(row_2)
        self.assertEqual(b._tiles.take(4), 4)

        # First column is now 2 2 0 0
        # Move up to merge
        col_1 = ((0, 0), (1, 0), (2, 0), (3, 0))
        b._move_tile_row(col_1)
        self.assertEqual(b._tiles.take(0), 8)
Exemplo n.º 36
0
cnt = 0
for board_mtx, move in zip(games[0], games[1]):
    cnt += 1
    if cnt % 200 != 0:
        continue

    mtx = board_mtx
    tic = time.time()
    for i in range(20):
        py_ret_mtx = py_get_liberty(mtx)
    toc = time.time()
    print(toc - tic)

    tic = time.time()
    for i in range(20):
        string = Board.mtx2str(mtx)
        string = gofeat.get_liberty(string)
        ret_mtx = np.fromstring(string, sep=' ',
                                dtype=np.int).reshape(16, 19,
                                                      19).transpose(1, 2, 0)
    toc = time.time()
    print(toc - tic)
    print(np.sum(py_ret_mtx - ret_mtx))
    print(ret_mtx.shape)

    # for i in range(16):
    #     print('num', i+1)
    #     li_b = Board(board_mtx=ret_mtx[i, :, :])
    #     li_canvas = li_b.visualize_board(grid_size=35)
    #     board = Board(board_mtx=board_mtx)
    #     canvas = board.visualize_board(grid_size=35)
Exemplo n.º 37
0
def run(width, agent1, agent2, file1, file2, start, rounds):
    n = 5
    height = width
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        # ############### human VS AI ###################
        def parse_agent(agent_type, filename):
            if agent_type == 'mcts_a0':
                model_file = 'best_policy_8_8_5.model'
                if filename:
                    model_file = filename
                # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

                # best_policy = PolicyValueNet(width, height, model_file = model_file)
                # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

                # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
                try:
                    policy_param = pickle.load(open(model_file, 'rb'))
                except:
                    policy_param = pickle.load(
                        open(model_file,
                             'rb'), encoding='bytes')  # To support python3
                best_policy = PolicyValueNetNumpy(width, height, policy_param)
                player = MCTSPlayer(
                    best_policy.policy_value_fn, c_puct=5, n_playout=400
                )  # set larger n_playout for better performance
            elif agent_type == 'mcts_pure':
                player = MCTS_Pure(c_puct=5, n_playout=1000)
            elif agent_type == 'minmax':
                player = Minimax()
            elif agent_type == 'dqn':
                model_file = 'output/v_1/epoch_100/agent_2.pkl'
                if filename:
                    model_file = filename
                player = DQNPlayer(model_file)
            elif agent_type == 'human':
                player = Human()
            else:
                player = Human()
                print('Illegal Agent Type. Defaulting to human player.')
            return player

        player1 = parse_agent(agent1, file1)
        player2 = parse_agent(agent2, file2)

        winners = []
        for i in range(rounds):
            winner = game.start_play(player1,
                                     player2,
                                     start_player=start - 1,
                                     is_shown=1)
            winners.append(winner)

        winrate1 = winners.count(1) / rounds
        winrate2 = winners.count(2) / rounds
        print('Winners: ' + ','.join([str(w) for w in winners]))
        print(str(agent1) + ' 1' + ' win rate: ' + str(winrate1))
        print(str(agent2) + ' 2' + ' win rate: ' + str(winrate2))

    except KeyboardInterrupt:
        print('\n\rquit')
Exemplo n.º 38
0
Arquivo: gui.py Projeto: physpim/chess
class Gui():
    """Grafical user interface for playing chess"""
    font = 'Courier 20'
    color_dict = {0: '#b0b0b0', 1: '#ffffff'}
    piece_type_dict = {
        0: {
            0: '\u2654',
            1: '\u265a'
        },
        1: {
            0: '\u2655',
            1: '\u265b'
        },
        2: {
            0: '\u2656',
            1: '\u265c'
        },
        3: {
            0: '\u2657',
            1: '\u265d'
        },
        4: {
            0: '\u2658',
            1: '\u265e'
        },
        5: {
            0: '\u2659',
            1: '\u265f'
        }
    }
    turn_color_dict = {0: 'White', 1: 'Black'}

    def __init__(self):
        # Init board
        self.board = Board()
        # Init root
        self.root = tk.Tk()
        # Create general structure
        self.board_frame = tk.Frame(self.root)
        self.board_frame.pack()
        self.test_frame = tk.Label(self.root, text='Welcome', font=Gui.font)
        self.test_frame.pack()
        self.user_input = tk.Entry(self.root, font=Gui.font)
        self.user_input.configure(state='readonly')
        self.user_input.pack()
        self.user_input_given = tk.IntVar(master=self.user_input,
                                          name='piece_type',
                                          value=-1)

        # Create buttons/fields
        self.buttons = [[], [], [], [], [], [], [], []]
        self.fields = [[], [], [], [], [], [], [], []]
        for x, y in product(range(8), range(8)):
            field_color = (x + y) % 2
            self.fields[x].append(
                tk.Frame(self.board_frame,
                         height=50,
                         width=50,
                         background=Gui.color_dict[field_color]))
            self.fields[x][y].propagate(False)
            self.fields[x][y].grid(column=x, row=8 - y)
            self.buttons[x].append(
                tk.Button(self.fields[x][y],
                          background=Gui.color_dict[field_color],
                          activebackground='#f2ff00',
                          borderwidth=0,
                          font='Courier 30'))
            self.buttons[x][y].pack(fill='both', expand=True)
        self.draw()
        self.select_piece()
        self.root.mainloop()

    def ask_promotion_type(self):
        """Asks the user which piece to promote"""
        self.user_input.bind('<Return>', self.promote2input)
        self.test_frame.configure(text='Promote to type:')
        self.user_input.configure(state='normal')
        self.reset_buttons()
        self.user_input_given.set(-1)
        self.user_input.wait_variable(name='piece_type')
        user_input = self.user_input_given.get()
        self.user_input.delete(0, len(self.user_input.get()))
        self.user_input.configure(state='readonly')
        return user_input

    def promote2input(self, event) -> int:
        """Gets the entered text from the entry box"""
        type_dict = {
            'king': 0,
            'queen': 1,
            'rook': 2,
            'bishop': 3,
            'knight': 4,
            'pawn': 5
        }
        promotion_type = type_dict[self.user_input.get()]
        # self.board.promote(piece, promotion_type)
        self.user_input.bind('<Return>')
        self.user_input_given.set(promotion_type)

    def select_piece(self):
        """Select piece to move"""
        color = self.board.turn_color
        for x, rows in enumerate(self.buttons):
            for y, button in enumerate(rows):
                piece = self.board.find_piece(Position(x, y))
                if piece.color == color and \
                   piece.moves != [] and \
                   piece.moves != None:
                    func = partial(self.show_moves, piece)
                    button.configure(command=func)

    def show_moves(self, piece: Piece):
        """Marks the fields where the selected piece can move to"""
        self.reset_buttons()
        for move in piece.moves:
            self.buttons[move.x][move.y].configure(background='#f2ff00',
                                                   command=partial(
                                                       self.select_move, piece,
                                                       move))

    def select_move(self, piece: Piece, position):
        """Runs when player selects where to move to"""
        self.reset_buttons()
        self.board.recalculate(piece, position, self.ask_promotion_type)
        self.board.delete_self_check()
        self.board.turn_counter += 1
        self.board.turn_color = int(not self.board.turn_color)
        self.draw()
        if self.board.check == True:
            self.board.check_mate = \
                self.board.ischeckmate(self.board.turn_color)
            if self.board.check_mate == True:
                self.test_frame.configure(text='Check mate!')
            else:
                self.test_frame.configure(text='Check!')
        else:
            message = Gui.turn_color_dict[self.board.turn_color] + \
                ', it\'s your turn'
            self.test_frame.configure(text=message)
        self.select_piece()

    def draw(self):
        """Draws pieces on the board"""
        for (x, y) in product(range(8), range(8)):
            piece = self.board.find_piece(Position(x, y))
            if piece.color != None:
                self.buttons[x][y].config(
                    text=Gui.piece_type_dict[piece.type][piece.color])
            else:
                self.buttons[x][y].config(text='')

    def reset_buttons(self):
        """Resets the buttons colors and commands"""
        for x, y in product(range(8), range(8)):
            button = self.buttons[x][y]
            button.configure(command=False,
                             background=Gui.color_dict[(x + y) % 2])
Exemplo n.º 39
0
import os
from game import Board


def bad_coordinates():
    os.system("say 'bad coordinates'")
    print("bad coordinates")


board = Board(4, 5, 6)
while (not board.game_over):
    try:
        os.system("say 'enter x coordinate'")
        x = int(input("enter x coordinate: "))
        os.system("say 'enter y coordinate'")
        y = int(input("enter y coordinate: "))
    except:
        bad_coordinates()
    if (x >= 0 and x < board.width and y >= 0 and y < board.height):
        board.guess_square(x, y)
    else:
        bad_coordinates()
Exemplo n.º 40
0
class TestCases(unittest.TestCase):
    def setUp(self):
        self.test_board1 = Board()
        # The 11 white stack can't move, extra 1 white at (0, 0)
        self.test_board2 = Board({
            'white': [[11, 4, 4], [1, 0, 0]],
            'black': [[1, 0, 4], [1, 1, 4], [1, 2, 4], [1, 3, 4], [1, 5, 4],
                      [1, 6, 4], [1, 7, 4], [1, 4, 0], [1, 4, 1], [1, 4, 2],
                      [1, 4, 3], [1, 4, 5], [1, 4, 6], [1, 4, 7]]
        })
        # White stack in corner that can move
        self.test_board3 = Board({
            'white': [[4, 7, 7]],
            'black': [[3, 7, 6], [3, 6, 7], [2, 7, 5], [2, 5, 7], [1, 7, 4],
                      [1, 4, 7]]
        })
        # Board full of white pieces
        self.test_board4 = Board({
            'white': [[1, x, y] for x in range(8) for y in range(8)],
            'black': []
        })

    def test_board_initilisation(self):
        self.assertEqual(self.test_board1.board, [[0] * 8] * 8)
        self.assertEqual(
            self.test_board2.board,
            [[1, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0],
             [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0],
             [-1, -1, -1, -1, 11, -1, -1, -1], [0, 0, 0, 0, -1, 0, 0, 0],
             [0, 0, 0, 0, -1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 0]])

        self.assertEqual(self.test_board2.n_white, 12)
        self.assertEqual(self.test_board2.n_black, 14)

    def test_get_all_white_actions(self):
        self.assertEqual(
            self.test_board1.get_all_white_actions(self.test_board1.board), [])
        self.assertEqual(
            self.test_board2.get_all_white_actions(self.test_board2.board),
            [[BOOM,
              (0, 0)], [1, (1, 0, 0, 1, 0)], [1,
                                              (1, 0, 0, 0, 1)], [BOOM,
                                                                 (4, 4)]])
        self.assertEqual(
            self.test_board3.get_all_white_actions(self.test_board3.board),
            [[0, (7, 7)], [1, (1, 7, 7, 3, 7)], [1, (1, 7, 7, 7, 3)],
             [1, (2, 7, 7, 3, 7)], [1, (2, 7, 7, 7, 3)], [1, (3, 7, 7, 3, 7)],
             [1, (3, 7, 7, 7, 3)], [1, (4, 7, 7, 3, 7)], [1, (4, 7, 7, 7, 3)]])

    def test_get_actions(self):
        self.assertEqual(
            self.test_board3.get_actions(self.test_board3.board, 7, 5),
            [[0, (7, 5)], [1, (1, 7, 5, 6, 5)], [1, (1, 7, 5, 5, 5)],
             [1, (1, 7, 5, 7, 6)], [1, (1, 7, 5, 7, 4)], [1, (1, 7, 5, 7, 3)],
             [1, (2, 7, 5, 6, 5)], [1, (2, 7, 5, 5, 5)], [1, (2, 7, 5, 7, 6)],
             [1, (2, 7, 5, 7, 4)], [1, (2, 7, 5, 7, 3)]])

    def test_explode(self):
        self.assertEqual(
            self.test_board4.explode(self.test_board4.board, 0, 0),
            ([[0] * 8] * 8, 64))
        self.assertEqual(
            self.test_board2.explode(self.test_board2.board, 7, 4),
            ([[1] + [0] * 7] + [[0] * 8] * 7, 25))

    def test_move(self):
        self.assertEqual(
            self.test_board3.move(self.test_board3.board, 1, 6, 7, 6, 5),
            [[0] * 8] * 4 + [[0] * 7 + [-1], [0] * 7 + [-2],
                             [0] * 5 + [-1, 0, -2], [0] * 4 + [-1, -2, -3, 4]])
        self.assertEqual(
            self.test_board3.move(self.test_board3.board, 3, 6, 7, 4, 7),
            [[0] * 8] * 4 + [[0] * 7 + [-4], [0] * 7 + [-2], [0] * 8,
                             [0] * 4 + [-1, -2, -3, 4]])

    def test_print_action_boom(self):
        string_action = Board().string_action
        self.assertEqual(string_action([BOOM, (0, 0)]), "BOOM at (0, 0).")
        self.assertEqual(string_action([BOOM, (5, 2)]), "BOOM at (5, 2).")

    def test_print_action_move(self):
        string_action = Board().string_action
        self.assertEqual(string_action([MOVE, (1, 0, 0, 0, 1)]),
                         "MOVE 1 from (0, 0) to (0, 1).")
        self.assertEqual(string_action([MOVE, (5, 2, 3, 7, 3)]),
                         "MOVE 5 from (2, 3) to (7, 3).")
Exemplo n.º 41
0
def cycle():
    startboard = np.array([[4] * 6, [0] * 2, [4] * 6])  # starting board
    b = Board(startboard)

    c = 1
    player = 0
    empty = [0] * 6
    moves = 0
    while b.board[0] != empty and b.board[2] != empty:  # while at least one side of the board has stones in it
        pos = r.randint(1, 6)
        if c % 2 == 1:
            player = 1
        elif c % 2 == 0:
            player = 2

        if player == 1:
            b.movep1(pos - 1)
            moves+=1
            while b.goagainp1:
                b.movep1(pos - 1)
                moves+=1
        elif player == 2:
            b.movep2(pos - 1)
            moves+=1
            while b.goagainp2:
                b.movep2(pos - 1)
                moves+=1

        c += 1
    if b.board[0] == empty:
        b.board[1][1] += sum(b.board[2])
        b.board[2] = empty
    elif b.board[2] == empty:
        b.board[1][0] += sum(b.board[0])
        b.board[0] = empty
    score = '{}-{}'.format(max(b.board[1]), min(b.board[1]))
    if b.board[1].index(max(b.board[1])) == 0 and not (max(b.board[1]) == min(b.board[1])):
        winner = 2
    elif b.board[1].index(max(b.board[1])) == 1 and not (max(b.board[1]) == min(b.board[1])):
        winner = 1
    else:
        winner = 0
    '''
    print(b)
    if winner == 0:
        print("It's a draw! Final score " + score)
    else:
        print("Player {} won! Final score ".format(winner) + score)
    '''
    return winner, moves
Exemplo n.º 42
0
    def __init__(self, init_model=None):
        # params of the board and the game
        self.board_width = 15
        self.board_height = 15
        self.n_in_row = 5
        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.game = Game(self.board)
        self.manual = Manual(self.board)
        # training params
        self.learn_rate = 1e-3
        self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
        self.temp = 1.0  # the temperature param
        self.n_playout = 100  # num of simulations for each move
        self.c_puct = 1
        self.buffer_size = 100000
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5  # num of train_steps for each update
        self.episode_len = 0
        self.kl_targ = 0.02
        self.check_freq = 1
        self.game_batch_num = 5
        self.best_win_ratio = 0.55
        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        self.pure_mcts_playout_num = 1000
        self.lock = threading.Lock()
        if init_model:
            # start training from an initial policy-value net
            self.g1 = tf.Graph()
            with self.g1.as_default():
                self.policy_value_net = PolicyValueNet(self.board_width,
                                                       self.board_height,
                                                       model_file=init_model,
                                                       graph=self.g1,
                                                       output='/data/data/')
            # tf.reset_default_graph()
            self.g2 = tf.Graph()
            with self.g2.as_default():
                self.policy_value_net_train = PolicyValueNet(self.board_width,
                                                             self.board_height,
                                                             model_file=init_model,
                                                             graph=self.g2,
                                                             output='/data/output/')
        else:
            # start training from a new policy-value net
            self.g1 = tf.Graph()
            with self.g1.as_default():
                self.policy_value_net = PolicyValueNet(self.board_width,
                                                       self.board_height,
                                                       graph=self.g1,
                                                       output='./data/')
            # tf.reset_default_graph()
            self.g2 = tf.Graph()
            with self.g2.as_default():
                self.policy_value_net_train = PolicyValueNet(self.board_width,
                                                             self.board_height,
                                                             graph=self.g2,
                                                             output='./output/')

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)
Exemplo n.º 43
0
class TrainPipeline():
    def __init__(self, init_model=None):
        # params of the board and the game
        self.board_width = 15
        self.board_height = 15
        self.n_in_row = 5
        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.game = Game(self.board)
        self.manual = Manual(self.board)
        # training params
        self.learn_rate = 1e-3
        self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
        self.temp = 1.0  # the temperature param
        self.n_playout = 100  # num of simulations for each move
        self.c_puct = 1
        self.buffer_size = 100000
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5  # num of train_steps for each update
        self.episode_len = 0
        self.kl_targ = 0.02
        self.check_freq = 1
        self.game_batch_num = 5
        self.best_win_ratio = 0.55
        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        self.pure_mcts_playout_num = 1000
        self.lock = threading.Lock()
        if init_model:
            # start training from an initial policy-value net
            self.g1 = tf.Graph()
            with self.g1.as_default():
                self.policy_value_net = PolicyValueNet(self.board_width,
                                                       self.board_height,
                                                       model_file=init_model,
                                                       graph=self.g1,
                                                       output='/data/data/')
            # tf.reset_default_graph()
            self.g2 = tf.Graph()
            with self.g2.as_default():
                self.policy_value_net_train = PolicyValueNet(self.board_width,
                                                             self.board_height,
                                                             model_file=init_model,
                                                             graph=self.g2,
                                                             output='/data/output/')
        else:
            # start training from a new policy-value net
            self.g1 = tf.Graph()
            with self.g1.as_default():
                self.policy_value_net = PolicyValueNet(self.board_width,
                                                       self.board_height,
                                                       graph=self.g1,
                                                       output='./data/')
            # tf.reset_default_graph()
            self.g2 = tf.Graph()
            with self.g2.as_default():
                self.policy_value_net_train = PolicyValueNet(self.board_width,
                                                             self.board_height,
                                                             graph=self.g2,
                                                             output='./output/')

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)

    def get_equi_data(self, play_data):
        """augment the data set by rotation and flipping
        play_data: [(state, mcts_prob, winner_z), ..., ...]
        """
        extend_data = []
        for state, mcts_porb, winner in play_data:
            for i in [1, 2, 3, 4]:
                # rotate counterclockwise
                equi_state = np.array([np.rot90(s, i) for s in state])
                equi_mcts_prob = np.rot90(np.flipud(
                    mcts_porb.reshape(self.board_height, self.board_width)), i)
                extend_data.append((equi_state,
                                    np.flipud(equi_mcts_prob).flatten(),
                                    winner))
                # flip horizontally
                equi_state = np.array([np.fliplr(s) for s in equi_state])
                equi_mcts_prob = np.fliplr(equi_mcts_prob)
                extend_data.append((equi_state,
                                    np.flipud(equi_mcts_prob).flatten(),
                                    winner))
        return extend_data

    def collect_selfplay_data(self, n_games=1):
        """collect self-play data for training"""
        for i in range(n_games):
            # self.lock.acquire()
            # print("game {}".format(i))
            with self.g1.as_default():
                '''mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                         c_puct=self.c_puct,
                                         n_playout=self.n_playout,
                                         is_selfplay=1)
                board = Board(width=self.board_width,
                              height=self.board_height,
                              n_in_row=self.n_in_row)
                game = Game(board)'''
                winner, play_data = self.game.start_self_play(self.mcts_player,
                                                              is_shown=0,
                                                              temp=self.temp)
            # self.lock.release()

            play_data = list(play_data)[:]
            self.episode_len = len(play_data)
            # augment the data
            play_data = self.get_equi_data(play_data)
            self.data_buffer.extend(play_data)

        # print("self play end...")

    def collect_manual_data(self, file):
        winner, play_data = self.manual.read_manual_data(file)
        # read the chess manual fail
        if winner == 0:
            return

        play_data = list(play_data)[:]
        self.episode_len = len(play_data)
        # augment the data
        play_data = self.get_equi_data(play_data)
        self.data_buffer.extend(play_data)

    def collect_test_data(self):
        self.board.init_board()
        states, mcts_probs, current_players = [], [], []
        move = 128
        self.board.do_move(112)
        states.append(self.board.current_state())
        probs = np.zeros(self.board.width * self.board.height)
        probs[[move]] = 1
        mcts_probs.append(probs)
        current_players.append(self.board.current_player)
        winners_z = np.array([1])
        play_data = zip(states, mcts_probs, winners_z)
        play_data = list(play_data)[:]
        self.data_buffer.extend(play_data)

    def policy_update(self):
        """update the policy-value net"""
        mini_batch = random.sample(self.data_buffer, self.batch_size)
        state_batch = [data[0] for data in mini_batch]
        mcts_probs_batch = [data[1] for data in mini_batch]
        winner_batch = [data[2] for data in mini_batch]
        with self.g2.as_default():
            for i in range(self.epochs):
                loss, entropy = self.policy_value_net_train.train_step(
                        state_batch,
                        mcts_probs_batch,
                        winner_batch,
                        self.learn_rate*self.lr_multiplier)

        print((
               "lr_multiplier:{:.3f},"
               "loss:{},"
               "entropy:{},"
               ).format(
                        self.lr_multiplier,
                        loss,
                        entropy))
        return loss, entropy

    def policy_evaluate(self, n_games=10):
        """
        Evaluate the trained policy by playing against the pure MCTS player
        Note: this is only for monitoring the progress of training
        """
        print("evaluating...")
        current_mcts_player = MCTSPlayer(self.policy_value_net_train.policy_value_fn,
                                         c_puct=self.c_puct,
                                         n_playout=self.pure_mcts_playout_num)
        best_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.pure_mcts_playout_num)

        win_cnt = defaultdict(int)
        for i in range(n_games):
            winner = self.game.start_play(current_mcts_player,
                                          best_mcts_player,
                                          start_player=i % 2,
                                          is_shown=0)
            win_cnt[winner] += 1
        win_ratio = 1.0*(win_cnt[1] + 0.5*win_cnt[-1]) / n_games
        print("num_playouts:{}, win: {}, lose: {}, tie:{}".format(
                self.pure_mcts_playout_num,
                win_cnt[1], win_cnt[2], win_cnt[-1]))

        # save the current_model
        self.policy_value_net_train.save_model('/data/output/current_policy.model')
        if win_ratio > self.best_win_ratio:
            print("New best policy!!!!!!!!")
            # update the best_policy
            self.policy_value_net_train.save_model('/data/output/best_policy.model')
            self.g1 = tf.Graph()
            with self.g1.as_default():
                self.policy_value_net = PolicyValueNet(self.board_width,
                                                       self.board_height,
                                                       model_file='/data/output/best_policy.model',
                                                       graph=self.g1,
                                                       output='/data/data/')

        return win_ratio

    def run(self):
        """run the training pipeline"""
        try:
            '''coord = tf.train.Coordinator()
            self_play = [threading.Thread(target=self.collect_selfplay_data, args=(self.play_batch_size,)) for i in range(4)]
            for sp in self_play:
                sp.start()
            coord.join(self_play)
            while len(self.data_buffer) < self.batch_size:
                print(len(self.data_buffer))
                time.sleep(3)
                pass'''
            multiplier = [0.1, 0.1, 0.01, 0.01, 0.01]
            step = 0
            for n in range(self.game_batch_num):
                self.collect_selfplay_data(self.play_batch_size)
                # self.collect_test_data()
                self.policy_value_net.n_step += 1

                print("batch i:{}, episode_len:{}".format(
                   self.policy_value_net.n_step, self.episode_len))

                # optimisation
                if len(self.data_buffer) > self.batch_size:
                    for i in range(100):
                        self.policy_update()

                # evaluation
                if self.policy_value_net.n_step % self.check_freq == 0:
                    # self.lr_multiplier = multiplier[step]
                    # step += 1
                    self.mcts_player.mcts._discount = 1 - 0.98*(1 - self.mcts_player.mcts._discount)
                    print("current self-play batch: {}, discount: {}".format(
                        self.policy_value_net.n_step, self.mcts_player.mcts._discount))

                    # self.lock.acquire()
                    self.policy_evaluate(n_games=15)
                    # self.lock.release()
        except KeyboardInterrupt:
            print('\n\rquit')
Exemplo n.º 44
0
def game():
    # For screen-casting
    if RECORD_SCREEN:
        recorder = ScreenRecorder(WIDTH, HEIGHT, FPS, out_file=OUTPUT_FILE)
    run = True
    board = Board.from_start_position()
    solver = AutoSolver(board)
    selected_piece = None

    # A surface to draw the board onto..
    board_surf = pygame.Surface(BOARD_SIZE)
    loader = Loader()

    def draw():
        board_color = (205, 127, 50)
        text_background = (0, 100, 255)
        text_color = (255, 255, 255)
        # Fill the window and the board
        win.fill(darken_color(board_color, 0.5))
        board_surf.fill(board_color)

        # Draw the title label onto the window
        pygame.draw.rect(win, text_background, (TITLE_OFFSETS, TITLE_SIZE))
        title_label = main_font.render(f"KLOTSKI PUZZLE", 1, text_color)
        win.blit(title_label,
                 (TITLE_OFFSETS[0] + TITLE_SIZE[0] // 2 -
                  title_label.get_width() // 2, TITLE_OFFSETS[1] +
                  TITLE_SIZE[1] // 2 - title_label.get_height() // 2))

        # Draw the steps label onto the window
        pygame.draw.rect(win, text_background, (SCORE_OFFSETS, SCORE_SIZE))
        steps_label = main_font.render(f"Step {board.number_of_steps}", 1,
                                       text_color)
        win.blit(steps_label,
                 (SCORE_OFFSETS[0] + SCORE_SIZE[0] // 2 -
                  steps_label.get_width() // 2, SCORE_OFFSETS[1] +
                  SCORE_SIZE[1] // 2 - steps_label.get_height() // 2))

        # Draw the board and copy it onto the window
        board.draw(board_surf, TILE_SIZE)
        win.blit(board_surf, BOARD_OFFSETS)

        if board.is_solved:
            # Show the message when game is solved
            # NOTE: Game does not end when puzzle is solved, user can continue..
            success_label = main_font.render(f"Congratulations!", 1,
                                             text_color)
            win.blit(success_label,
                     (BOARD_OFFSETS[0] + BOARD_SIZE[0] // 2 -
                      success_label.get_width() // 2, BOARD_OFFSETS[1] +
                      BOARD_SIZE[1] // 2 - success_label.get_height() // 2))

        if solver.loading:
            # Show a loader when auto-solver is computing the moves.
            loader.draw(
                win,
                pygame.Rect(
                    (WIDTH // 2 - TILE_SIZE // 2, HEIGHT // 2 - TILE_SIZE // 2,
                     TILE_SIZE, TILE_SIZE)))

    def handle_select(pos):
        # Handles mouse button down event.
        # Sets the selected_piece if a piece is selected
        nonlocal selected_piece
        selected_piece = None
        pos = pos[0] - BOARD_OFFSETS[0], pos[1] - BOARD_OFFSETS[1]
        if 0 <= pos[0] < BOARD_SIZE[0] and 0 <= pos[1] < BOARD_SIZE[1]:
            position = Position(pos[0] // TILE_SIZE, pos[1] // TILE_SIZE)
            selected_piece = board.get_piece(position)

    def handle_drop(pos):
        # Handles mouse button up event.
        # Moves the selected_piece if to specified position if allowed.
        # Specified position must be an empty position!
        nonlocal selected_piece
        pos = pos[0] - BOARD_OFFSETS[0], pos[1] - BOARD_OFFSETS[1]
        if 0 <= pos[0] < BOARD_SIZE[0] and 0 <= pos[1] < BOARD_SIZE[1]:
            click_position = Position(pos[0] // TILE_SIZE, pos[1] // TILE_SIZE)
            if selected_piece:
                possible_pos = board.can_move(selected_piece, click_position)
                if possible_pos:
                    board.move(selected_piece, possible_pos)

    def reset():
        # creates a new board to reset it
        nonlocal board, selected_piece, solver
        board = Board.from_start_position()
        selected_piece = None
        # Reset the solver as well
        solver = AutoSolver(board)

    def handle_user_event(_event):
        nonlocal selected_piece
        if _event.type == pygame.KEYDOWN:
            # Board reset
            if _event.key == pygame.K_r:
                reset()

            # History events
            if _event.key == pygame.K_LEFT:
                board.history_back()
            if _event.key == pygame.K_RIGHT:
                board.history_forward()

            # Solver
            if _event.key == pygame.K_a:  # Normal Solver
                selected_piece = None
                solver.enable()
            if _event.key == pygame.K_s:  # Fast solver
                selected_piece = None
                solver.enable(int(FPS * 0.1))

        if _event.type == pygame.MOUSEBUTTONDOWN and _event.button == 1:  # left click
            handle_select(_event.pos)

        if _event.type == pygame.MOUSEBUTTONUP and _event.button == 1:  # left click
            handle_drop(_event.pos)

    while run:
        draw()
        pygame.display.update()
        if RECORD_SCREEN:
            recorder.capture_frame(win)
        solver.loop()

        for event in pygame.event.get():
            if event.type == pygame.QUIT or \
                    (event.type == pygame.KEYDOWN and event.key == pygame.K_q):
                run = False

            if not solver.enabled:
                # User inputs taken only when solver not running
                handle_user_event(event)

        if not solver.enabled:
            # Power keys while navigating history
            # Allows continuous press
            keys = pygame.key.get_pressed()
            if keys[pygame.K_DOWN]:
                board.history_back()
            elif keys[pygame.K_UP]:
                board.history_forward()

        clock.tick(FPS)

    if RECORD_SCREEN:
        recorder.stop()
    pygame.quit()
Exemplo n.º 45
0
 def __init__(self):
     """
     關於訓練的初始設置
     
     *補充說明
     kl 用於計算 lr (learning rate)
     """
     # run() -----------------------------------------------------------------------------------
     self.game_batch_num = -1  # 跑一次訓練的重複次數,負值代表不限制
     self.play_batch_size = 1    # 自我訓練的執行次數
     self.batch_size = 1024     # 每次要訓練的資料量,當 data_buffer 的資料累積到超過本數值就會更新 policy
     self.check_freq = 50        # 每訓練 ( check_freq ) 次就會與MCTS比賽
     self.save_freq = 50 # 每訓練 ( save_freq ) 次就會存檔
     
     # collect_selfplay_data() -----------------------------------------------------------------
     self.buffer_size = 10000
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.kl_targ = 0.02
     
     # policy_update() -------------------------------------------------------------------------
     self.epochs = 5            # 每次更新的 epochs 數
     
     # board -----------------------------------------------------------------------------------
     self.board_width = 9        # 寬度
     self.board_height = 9       # 高度
     self.n_in_row = 5           # 多少顆連成一線獲得勝利
     self.board = Board(width=self.board_width,
                         height=self.board_height,
                         n_in_row=self.n_in_row)
     self.game = Game(self.board)
     
     # keras -----------------------------------------------------------------------------------
     self.learn_rate = 2e-3
     self.lr_multiplier = 1.0    # 基於KL自適應調整學習率
     self.temp = 1.0             # 溫度參數,太小會導致訓練不夠全面
     
     file_folder = './n400-o'
     model_tag = '9_9_5_o'
     self.current_model= f'{file_folder}/current_model_{model_tag}.h5'
     self.best_model= f'{file_folder}/best_model_{model_tag}.h5'
     init_model = self.current_model
     
     self.policy_value_net = PolicyValueNet(self.board_width,
                                     self.board_height,
                                     model_file = init_model if os.path.exists(init_model) else None)
     
     self.progress = file_folder + '/progress.csv'
     self.evaluate_path = file_folder + '/evaluate.csv'
     
     self.history_path = file_folder + '/history.csv'
     self.history = []
     
     # MCTS ------------------------------------------------------------------------------------
     self.c_puct = 5    # MCTS的搜索偏好
     self.loss_goal = 0 #! 存檔時 loss 小於此值會增加訓練時的 n_playout 次數
     self.pure_mcts_playout_num = 1000   # MCTS每一步的模擬次數,隨著模型強度提升
     self.pure_mcts_playout_num_upgrade = 1000   # MCTS隨著模型強度提升的模擬次數
     self.best_win_ratio = 0.0
     
     self.n_playout = 400 # 神經網路每一步的模擬次數,越大代表結果越依賴MCTS的技巧,否則依靠神經網路的判斷
     self.n_playout_training = 400 
     self.n_playout_growth = 0
     self.n_playout_limit = 2000
     self.MCTS_levelup()
Exemplo n.º 46
0
def play():
    board = Board()
    A, B = Player(board, 'white'), Player(board, 'black')
Exemplo n.º 47
0
class MctsTest():
    def __init__(self, init_model=None):
        # 棋盘大小 8*8, 5个子连起来
        self.board_width = 8
        self.board_height = 8
        self.n_in_row = 5  # n子相连
        self.policy_evaluate_size = 2  # 策略评估胜率时的模拟对局次数
        self.batch_size = 1  # data_buffer中对战次数超过n次后开始启动模型训练
        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.game = Game(self.board)
        # training params
        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0  # 基于KL的自适应学习率
        self.temp = 1.0  # the temperature param
        self.n_playout = 400  # 每个动作的模拟次数
        self.c_puct = 5
        self.buffer_size = 10000  # cache对战记录个数
        self.data_buffer = deque(maxlen=self.buffer_size)  # 完整对战历史记录,用于训练
        self.epochs = 5  # 每次更新策略价值网络的训练步骤数
        self.kl_targ = 0.02  # 策略价值网络KL值目标
        self.best_win_ratio = 0.0
        # 纯MCT的模拟数,用于评估策略模型
        self.pure_mcts_playout_num = 5
        self.policy_value_net = PolicyValueNet(self.board_width,
                                               self.board_height)
        # 创建使用策略价值网络来指导树搜索和评估叶节点的MCTS玩家
        """self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)"""

    def get_equi_data(self, play_data):
        """
        通过旋转和翻转增加数据集
        play_data: [(state, mcts_prob, winner_z), ..., ...]
        """
        extend_data = []
        for state, mcts_porb, winner in play_data:
            for i in [1, 2, 3, 4]:
                # 逆时针旋转
                equi_state = np.array([np.rot90(s, i) for s in state])
                equi_mcts_prob = np.rot90(
                    np.flipud(
                        mcts_porb.reshape(self.board_height,
                                          self.board_width)), i)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
                # 水平翻转
                equi_state = np.array([np.fliplr(s) for s in equi_state])
                equi_mcts_prob = np.fliplr(equi_mcts_prob)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
        return extend_data

    def policy_update(self):
        """更新策略价值网络policy-value"""
        # 随机抽取data_buffer中的对抗数据
        mini_batch = random.sample(self.data_buffer, self.batch_size)
        state_batch = [data[0] for data in mini_batch]
        mcts_probs_batch = [data[1] for data in mini_batch]
        winner_batch = [data[2] for data in mini_batch]
        old_probs, old_v = self.policy_value_net.policy_value(state_batch)
        # 训练策略价值网络
        for i in range(self.epochs):
            loss, entropy = self.policy_value_net.train_step(
                state_batch, mcts_probs_batch, winner_batch,
                self.learn_rate * self.lr_multiplier)
            new_probs, new_v = self.policy_value_net.policy_value(state_batch)
            kl = np.mean(
                np.sum(old_probs *
                       (np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)),
                       axis=1))
            if kl > self.kl_targ * 4:  # 如果D_KL跑偏则尽早停止
                break
        # 自动调整学习率
        if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1:
            self.lr_multiplier /= 1.5
        elif kl < self.kl_targ / 2 and self.lr_multiplier < 10:
            self.lr_multiplier *= 1.5

        explained_var_old = (1 -
                             np.var(np.array(winner_batch) - old_v.flatten()) /
                             np.var(np.array(winner_batch)))
        explained_var_new = (1 -
                             np.var(np.array(winner_batch) - new_v.flatten()) /
                             np.var(np.array(winner_batch)))
        logging.info(
            ("TEST kl:{:.5f},"
             "lr_multiplier:{:.3f},"
             "loss:{},"
             "entropy:{},"
             "explained_var_old:{:.3f},"
             "explained_var_new:{:.3f}").format(kl, self.lr_multiplier, loss,
                                                entropy, explained_var_old,
                                                explained_var_new))
        return loss, entropy

    def policy_evaluate(self, n_games=10):
        """
        策略胜率评估:模型与纯MCTS玩家对战n局看胜率
        """
        # AlphaGo Zero风格的MCTS玩家(使用策略价值网络来指导树搜索和评估叶节点)
        current_mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                         c_puct=self.c_puct,
                                         n_playout=self.n_playout)
        # 纯MCTS玩家
        pure_mcts_player = MCTSPurePlayer(c_puct=5,
                                          n_playout=self.pure_mcts_playout_num)
        win_cnt = defaultdict(int)
        for i in range(n_games):
            # 对战
            winner = self.game.start_play(current_mcts_player,
                                          pure_mcts_player,
                                          start_player=i % 2,
                                          is_shown=0)
            win_cnt[winner] += 1
        # 胜率
        win_ratio = 1.0 * (win_cnt[1] + 0.5 * win_cnt[-1]) / n_games
        logging.info("TEST Num_playouts:{}, win: {}, lose: {}, tie:{}".format(
            self.pure_mcts_playout_num, win_cnt[1], win_cnt[2], win_cnt[-1]))
        return win_ratio

    def run(self):
        """启动训练"""
        try:

            #test
            # 初始化棋盘
            self.board.init_board()
            print(self.board)
            print(self.board.current_player)
            print(self.board.availables)
            print(self.board.states)
            print(self.board.last_move)

            p1, p2 = self.board.players
            states, mcts_probs, current_players = [], [], []
            # 纯MCTS玩家
            #player = self.mcts_player
            player = MCTSPurePlayer(c_puct=5,
                                    n_playout=self.pure_mcts_playout_num)
            print('------get_action------')
            #move, move_probs = player.get_action(self.board, temp=self.temp, return_prob=1)
            move = player.get_action(self.board)
            print(move)
            """# 保存当前盘面
            states.append(self.board.current_state())
            current_players.append(self.board.current_player)
            # 执行落子
            print('------do_move------')
            self.board.do_move(move)
            self.game.graphic(self.board, p1, p2)
            # 检查游戏是否结束
            print('------check_game_end------')
            end, winner = self.board.game_end()
            if end:
                # 从当前玩家视角确定winner
                winners_z = np.zeros(len(current_players))
                if winner != -1:  # 不是和棋
                    winners_z[np.array(current_players) == winner] = 1.0  # 更新赢家步骤位置=1
                    winners_z[np.array(current_players) != winner] = -1.0  # 更新输家步骤位置=-1
                # 重置MCTS根结点
                player.reset_player()
                if winner != -1:
                    print("Game end. Winner is player:", winner)
                else:
                    print("Game end. Tie")
                print(winner, zip(states, mcts_probs, winners_z))
"""
            """
            i=0
            # 1.收集自我对抗数据
            # 使用MCTS蒙特卡罗树搜索进行自我对抗
            winner, play_data = self.game.start_self_play(self.mcts_player, temp=self.temp)
            play_data = list(play_data)[:]
            self.episode_len = len(play_data)
            print(play_data)
            print(self.episode_len)
            # 把翻转棋盘数据加到数据集里
            play_data = self.get_equi_data(play_data)
            # 保存对抗数据到data_buffer
            self.data_buffer.extend(play_data)
            logging.info("TEST Batch i:{}, episode_len:{}".format(i + 1, self.episode_len))

            # 2.使用对抗数据重新训练策略价值网络模型
            if len(self.data_buffer) >= self.batch_size:
                loss, entropy = self.policy_update()

            # 3.检查一下当前模型胜率
            logging.info("TEST Current self-play batch: {}".format(i + 1))
            # 策略胜率评估:模型与纯MCTS玩家对战n局看胜率
            win_ratio = self.policy_evaluate(self.policy_evaluate_size)
            self.policy_value_net.save_model(CUR_PATH + '/model/current_test_{}_{}.model'.format(self.board_width, self.board_height))
            if win_ratio > self.best_win_ratio:  # 胜率超过历史最优模型
                logging.info("TEST New best policy!!!!!!!!batch:{} win_ratio:{}->{} pure_mcts_playout_num:{}".format(i + 1, self.best_win_ratio, win_ratio, self.pure_mcts_playout_num))
                self.best_win_ratio = win_ratio
                # 保存当前模型为最优模型best_policy
                self.policy_value_net.save_model(CUR_PATH + '/model/best_test_{}_{}.model'.format(self.board_width, self.board_height))
                # 如果胜率=100%,则增加纯MCT的模拟数
                if (self.best_win_ratio == 1.0 and self.pure_mcts_playout_num < 5000):
                    self.pure_mcts_playout_num += 1000
                    self.best_win_ratio = 0.0
            """
        except KeyboardInterrupt:
            logging.info('\n\rquit')
Exemplo n.º 48
0
	game_input = input("game: ")
	if(game_input == "n"):
		game_num += 1
	elif(game_input == "exit"):
		break
	else:
		game_num = int(game_input)
	turn_num = 0
	game = all_data[game_num]
	while(True):
		turn_input = input("turn: ")
		if(turn_input == "n" or turn_input == ""):
			turn_num += 1
		elif(turn_input == "b"):
			break
		else:
			turn_num = int(turn_input)
		states = game["states"]
		turn = states[turn_num]
		not_board = Board()

		not_board.board[1:-1, 1:-1] = idtobits(np.array(turn["board"]))

		not_board.print_board()
		print(turn["player_turn"]-1)
		print(turn["pieces_left"])
	
#interesting games:
#599

Exemplo n.º 49
0
 def test_new_board_starts_with_two_squares(self):
     b = Board()
     self.assertFalse(b.is_board_full())
     self.assertEqual(len(b.empty_tiles()), len(b._tiles.flatten()) - 2)
Exemplo n.º 50
0
    def preprocess_board(board_mtx, y, random=True, contain_liberty=False):
        # rand = np.random.randint(0, 2)
        # if rand == 1:
        #     board_mtx = board_mtx.T
        #     y['next_to_move'] = (y['next_to_move'][1], y['next_to_move'][0])
        #     y['current_move'] = (y['current_move'][1], y['current_move'][0])
        if random:
            rand = np.random.randint(0, 8)
            if rand <= 3:
                board_mtx = board_mtx.T
                y['current_move'] = (y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (y['next_move'][1], y['next_move'][0])
            i = rand % 4
            if i == 1:
                board_mtx = np.rot90(board_mtx)
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])
                # print(a[2-idx[1]][idx[0]])

            if i == 2:
                board_mtx = np.rot90(board_mtx)
                board_mtx = np.rot90(board_mtx)
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])
            if i == 3:
                board_mtx = np.rot90(board_mtx)
                board_mtx = np.rot90(board_mtx)
                board_mtx = np.rot90(board_mtx)
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])
                y['current_move'] = (18 - y['current_move'][1],
                                     y['current_move'][0])
                y['next_move'] = (18 - y['next_move'][1], y['next_move'][0])

        black_stones = np.zeros((19, 19, 1), dtype=np.uint8)
        black_stones[board_mtx == 1] = 1
        white_stones = np.zeros((19, 19, 1), dtype=np.uint8)
        white_stones[board_mtx == 2] = 1

        if contain_liberty:
            black_liberty = np.zeros((19, 19, 8), dtype=np.uint8)
            white_liberty = np.zeros((19, 19, 8), dtype=np.uint8)
            visited = {}
            for i in range(19):
                for j in range(19):
                    if board_mtx[i][j] == 1 and (i, j) not in visited:
                        groups = Board.get_group(i,
                                                 j,
                                                 board_mtx,
                                                 visited=visited)
                        num_liberty = Board.check_liberty(groups,
                                                          board_mtx,
                                                          cnt=True)
                        if num_liberty > 8:
                            num_liberty = 8
                        for stone in groups:
                            black_liberty[stone[0]][stone[1]][num_liberty -
                                                              1] = 1

                    if board_mtx[i][j] == 2 and (i, j) not in visited:
                        groups = Board.get_group(i,
                                                 j,
                                                 board_mtx,
                                                 visited=visited)
                        num_liberty = Board.check_liberty(groups,
                                                          board_mtx,
                                                          cnt=True)
                        if num_liberty > 8:
                            num_liberty = 8
                        for stone in groups:
                            white_liberty[stone[0]][stone[1]][num_liberty -
                                                              1] = 1

            black_stones = np.concatenate((black_stones, black_liberty),
                                          axis=2)
            white_stones = np.concatenate((white_stones, white_liberty),
                                          axis=2)
            # for i in range(9):
            #     print(board_mtx)
            #     print('liberty:', i)
            #     print(black_stones[:, :, i])
            #     print('===')
            #
            # print('XXXXXX')
            # print('XXXXXX')
            # print('XXXXXX')
            #
            # for i in range(9):
            #     print(board_mtx)
            #     print('liberty:', i)
            #     print(white_stones[:, :, i])
            #     print('===')
            # exit()

        stones = np.concatenate((black_stones, white_stones), axis=2)

        ones = np.ones((19, 19, 1), dtype=np.uint8)
        last_move = np.zeros((19, 19, 1), dtype=np.uint8)
        if not y['ko_state:']:
            last_move[y['current_move'][0]][y['current_move'][1]] = 1
        else:
            last_move[y['current_move'][0]][y['current_move'][1]] = -1

        is_black_next = np.ones((19, 19, 1), dtype=np.uint8)
        if y['next_to_play'] == 2:
            is_black_next -= 1

        feat = np.concatenate((stones, last_move, is_black_next, ones), axis=2)

        return feat
Exemplo n.º 51
0
 def test_can_make_moves(self):
     b = Board(seed=False)
     # Make sure it's fully empty first
     self.assertEqual(len(b.empty_tiles()), 16)
     # Put a square in the top left corner
     val = 1
     pos = 0
     b._tiles.put(pos, val)
     # Move in the negative direction along the rows (up)
     # Then move in the negative direction along the columns (left)
     # Should do nothing, we are at the edge
     for d in (1, 3):
         b.move(d, suppress_invalid=True)
         self.assertEqual(b._tiles.take(pos), val)
     # Move in the positive direction along the rows (down)
     # This should move three rows
     b.move(0)
     pos += b.length * 3
     self.assertEqual(b._tiles.take(pos), val)
     # At the bottom - movement should do nothing
     b.move(0, suppress_invalid=True)
     self.assertEqual(b._tiles.take(pos), val)
     # Move in the positive direction along the columns (right)
     # This should move three columns
     b.move(2)
     pos += 3
     self.assertEqual(b._tiles.take(pos), val)
     # At the right - movement should do nothing
     b.move(2, suppress_invalid=True)
     self.assertEqual(b._tiles.take(pos), val)
Exemplo n.º 52
0
 def test_print_action_move(self):
     string_action = Board().string_action
     self.assertEqual(string_action([MOVE, (1, 0, 0, 0, 1)]),
                      "MOVE 1 from (0, 0) to (0, 1).")
     self.assertEqual(string_action([MOVE, (5, 2, 3, 7, 3)]),
                      "MOVE 5 from (2, 3) to (7, 3).")
Exemplo n.º 53
0
def test_board():
    print("Board() class tests")
    dimension = int(sys.argv[1])
    board = Board(dimension)
    board_2 = Board(dimension)
    print("Imprimimos tablero vacio: ")
    print(board)
    board.update_board([0, 2], 'X')
    board.update_board([0, 0], 'O')
    board.update_board([1, 2], 'X')
    board.update_board([2, 2], 'X')
    board.update_board([1, 0], 'X')
    board.update_board([2, 0], 'O')
    board.update_board([0, 1], 'O')
    board.update_board([1, 1], 'X')
    board.update_board([2, 1], 'X')
    if dimension == 4:
        board.update_board([3, 3], 'X')
    print("Imprimimos tablero con contenido: ")
    print(board)
    print(board.is_tateti())
    print(board.get_board())
    print(board.get_id())
    print(board.get_dimension())
    # board_2
    print(board_2)
    print(board_2.is_tateti())
    board_2.update_board([0, 0], 'X')
    print(board_2)
    print(board_2.is_tateti())
Exemplo n.º 54
0
def test_player():
    print("Player() class tests")
    dimension = 3
    board_player = Board(dimension)
    print("Imprimimos tablero vacio: ")
    print(board_player)
    board_player.update_board([0, 2], 'X')
    board_player.update_board([0, 0], 'O')
    board_player.update_board([1, 2], 'X')
    board_player.update_board([2, 2], 'X')
    board_player.update_board([1, 0], 'X')
    board_player.update_board([2, 0], 'O')
    board_player.update_board([0, 1], 'O')
    board_player.update_board([1, 1], 'X')
    #board_player.update_board([2, 1], 'X')
    print(board_player)
    player_1 = Player('Joaquin', 0, 0, 0)
    player_2 = Player('Xano', 1, 1, 1)
    print(player_1)
    print(player_2)
    player_1.movement(board_player)
    print(board_player)
    print(board_player.is_tateti())
Exemplo n.º 55
0
    def initUI(self):
        self.tup = (None, None)
        self.board = Board()  # 棋盘类
        self.board.init_board(1)
        palette1 = QPalette()  # 设置棋盘背景
        palette1.setBrush(self.backgroundRole(),
                          QtGui.QBrush(QtGui.QPixmap('img/linesofaction.png')))
        self.setPalette(palette1)
        # self.setStyleSheet("board-image:url(img/chessboard.jpg)")  # 不知道这为什么不行
        self.setCursor(Qt.PointingHandCursor)  # 鼠标变成手指形状
        # self.sound_piece = QSound("sound/luozi.wav")  # 加载落子音效
        # self.sound_win = QSound("sound/win.wav")  # 加载胜利音效
        # self.sound_defeated = QSound("sound/defeated.wav")  # 加载失败音效

        self.resize(WIDTH, HEIGHT)  # 固定大小 540*540
        self.setMinimumSize(QtCore.QSize(WIDTH, HEIGHT))
        self.setMaximumSize(QtCore.QSize(WIDTH, HEIGHT))

        self.setWindowTitle("Lines-Of-Action")  # 窗口名称
        self.setWindowIcon(QIcon('img/black.png'))  # 窗口图标

        # self.lb1 = QLabel('            ', self)
        # self.lb1.move(20, 10)

        self.black = QPixmap('img/black.png')
        self.white = QPixmap('img/white.png')

        self.piece_now = BLACK  # 黑棋先行
        self.my_turn = True  # 玩家先行
        self.step = 0  # 步数
        self.x, self.y = 1000, 1000

        #self.mouse_point = LaBel(self)  # 将鼠标图片改为棋子
        # self.mouse_point.setScaledContents(True)
        # self.mouse_point.setPixmap(self.black)  # 加载黑棋
        # self.mouse_point.setGeometry(270, 270, PIECE, PIECE)
        self.pieces = [[
            LaBel(self),
            LaBel(self),
            LaBel(self),
            LaBel(self),
            LaBel(self),
            LaBel(self),
            LaBel(self),
            LaBel(self)
        ] for _ in range(8)]  # 新建棋子标签,准备在棋盘上绘制棋子
        # for piece in self.pieces:
        #     piece.setVisible(True)  # 图片可视
        #     piece.setScaledContents(True)  # 图片大小根据标签大小可变
        for i in range(8):
            for j in range(8):
                self.pieces[i][j].setVisible(True)
                self.pieces[i][j].setScaledContents(True)
        #self.mouse_point.raise_()  # 鼠标始终在最上层
        self.ai_down = True  # AI已下棋,主要是为了加锁,当值是False的时候说明AI正在思考,这时候玩家鼠标点击失效,要忽略掉 mousePressEvent

        self.setMouseTracking(True)

        self.DrawPieces()

        self.show()
Exemplo n.º 56
0
class App(ConnectionListener):
    def __init__(self, host, port, run_server=False):
        self._running = True
        self._screen = None
        self.reset_sound = None
        self.run_server = run_server
        self.size = self.width, self.height = 1800, 960
        self.board = Board(self)
        self.dice = Dice(self)
        self.init_pieces()
        self.player_count = 0
        self.other_mouse = OtherMouse()
        if self.run_server:
            self.server = BackgammonServer(localaddr=(host, port))
        self.Connect((host, port))

    def init_pieces(self, send=True):
        self.pieces = list()
        self.fields = [[] for _ in range(24)]
        self.fields[0] = [True] * 2
        self.fields[5] = [False] * 5
        self.fields[7] = [False] * 3
        self.fields[11] = [True] * 5
        self.fields[23] = [False] * 2
        self.fields[18] = [True] * 5
        self.fields[16] = [True] * 3
        self.fields[12] = [False] * 5
        self.pieces = list()
        self.piece_size = 42
        self.ping_iter = 0
        ident = 1
        for field_id, field in enumerate(self.fields):
            top = field_id // 12 == 1
            for piece_id, is_black in enumerate(field):
                offset_x = self.board.triangle_width//2 + \
                    self.board.triangle_width * (field_id % 12) + \
                    ((field_id % 12) // 6) * self.board.offset_x
                x = offset_x if top else self.width - offset_x
                ((field_id % 12) // 6) * self.board.offset_x
                y = self.piece_size * \
                    (piece_id*2+1) if top else self.height - \
                    self.piece_size * (piece_id*2+1)
                pos = (x, y)
                self.pieces.append(Piece(self, ident, pos, is_black))
                ident += 1
        self.dice.reset()

        if self.reset_sound is not None:
            self.reset_sound.play()
            if send:
                connection.Send({"action": "resetboard"})

    def send_gamestate(self):
        pieces = list()
        for p in self.pieces:
            p.send_move()
        self.dice.send_state()
        self.dice.send_eyes()

    def on_init(self):
        pygame.init()
        pygame.mixer.init()
        self.reset_sound = pygame.mixer.Sound('sound/button.wav')
        self.impact_sound = pygame.mixer.Sound('sound/impact.wav')
        self.font = pygame.font.Font(pygame.font.get_default_font(), 22)
        pygame.display.set_caption('Backgammon')
        self.clock = pygame.time.Clock()
        self._screen = pygame.display.set_mode(
            self.size, pygame.HWSURFACE | pygame.DOUBLEBUF)
        self._running = True

    def ping(self):
        connection.Send({"action": "ping"})

    def keep_connection_alive(self):
        # Ping every 4 seconds
        self.ping_iter = (self.ping_iter + 1) % 240
        if self.ping_iter == 0:
            self.ping()

    def on_event(self, event):
        if event.type == pygame.QUIT:
            self._running = False
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_SPACE:
                self.dice.roll()
            elif event.key == pygame.K_ESCAPE:
                self.init_pieces()
        else:
            self.handle_piece_events(event)
            if event.type == pygame.MOUSEMOTION:
                connection.Send({'action': 'mousemotion', 'pos': event.pos})

    def handle_piece_events(self, event):
        for idx, piece in enumerate(self.pieces):
            if piece.handle_event(event):
                if idx == 0:
                    break
                for idx2, piece2 in enumerate(self.pieces):
                    if idx == idx2:
                        continue
                    if piece.rect.colliderect(piece2.rect):
                        break
                else:
                    self.pieces.insert(0, self.pieces.pop(idx))
                break
        else:
            self.dice.handle_event(event)

    def on_loop(self):
        self.keep_connection_alive()
        connection.Pump()
        self.Pump()
        if self.run_server:
            self.server.Pump()

    def on_render(self):
        self.board.render(self._screen)
        for piece in self.pieces[::-1]:
            piece.update(self._screen)
        self.dice.render(self._screen)
        self.other_mouse.render(self._screen)
        pygame.display.flip()

    def on_cleanup(self):
        pygame.quit()

    def on_execute(self):
        if self.on_init() == False:
            self._running = False

        while (self._running):
            self.clock.tick(60)
            for event in pygame.event.get():
                self.on_event(event)
            self.on_loop()
            self.on_render()
        self.on_cleanup()

    def Network_connected(self, data):
        print("Connected to the server")

    def Network_disconnected(self, data):
        print("Disconnected from the server")
        self.player_count = 0

    def Network_resetboard(self, data):
        self.init_pieces(False)

    def Network_roll(self, data):
        self.dice.roll(data)

    def Network_impact(self, data):
        self.impact_sound.play()

    def Network_eyes(self, data):
        self.dice.set_eye_counter(data['eyes'])

    def Network_pong(self, data):
        pass

    def Network_mousemotion(self, data):
        self.other_mouse.setPostion(data['pos'])

    def Network_playercount(self, data):
        new_player_count = int(data['count'])
        if self.run_server and new_player_count > self.player_count:
            self.send_gamestate()
        self.player_count = new_player_count
        if self.player_count < 2:
            self.other_mouse.set_visible(False)

    def Network_move(self, data):
        piece_move = data['piece']
        for piece in self.pieces:
            if piece.ident == piece_move[0]:
                piece.move((piece_move[1], piece_move[2]), self._screen)
                break
        else:
            raise ValueError('Invalid piece ident!')
Exemplo n.º 57
0
def main():
    parser = argparse.ArgumentParser(description='Test')
    parser.add_argument('--replay_memory_size',
                        default=50000,
                        type=int,
                        help='replayMemory_size to store training data')
    parser.add_argument('--batch_size',
                        default=512,
                        type=int,
                        help='batch size')
    parser.add_argument('--learning_rate',
                        default=1e-3,
                        type=float,
                        help='learning_rate')
    parser.add_argument('--evaluate_freq',
                        default=50,
                        type=int,
                        help='evaluate once every #evaluate_freq games')
    parser.add_argument(
        '--train_freq',
        default=1,
        type=int,
        help='train #train_epoch times replay mempry within each train')
    parser.add_argument('--n_eval_game',
                        default=10,
                        type=int,
                        help='number of games during one evaluation')
    parser.add_argument('--n_burn_in',
                        default=10,
                        type=int,
                        help='number of games to burn in the replay memory')
    parser.add_argument('--n_iteration',
                        default=20,
                        type=int,
                        help='number of train iteration')
    parser.add_argument('--width', default=6, type=int)
    parser.add_argument('--height', default=6, type=int)
    parser.add_argument('--n_in_row', default=4, type=int)
    args = parser.parse_args()

    width, height = args.width, args.height
    board = Board(width=width, height=height, n_in_row=args.n_in_row)
    game = Game(board)
    # Prepare train and eval model
    AlphaGoNet_train = PolicyValueNet(width, height)
    #AlphaGoNet_best = PolicyValueNet(width, height)
    #torch.save(AlphaGoNet_train.policy_value_net.state_dict(), 'model/init.mdl')
    AlphaGoNet_train.policy_value_net.load_state_dict(
        torch.load('model/current.mdl'))

    # Replay is used to store training data:
    ReplayMemory = deque(maxlen=args.replay_memory_size)
    player = AlphaGoPlayer(NN_fn=AlphaGoNet_train.policy_value_fn)
    #eval_player = AlphaGoPlayer(NN_fn=AlphaGoNet_best.policy_value_fn)
    eval_player = MCTSPlayer()
    max_win_ratio = .0

    # Burn in
    burn_in(game, player, ReplayMemory, args.n_burn_in)

    for i in range(args.n_iteration):
        print 'Iteration NO.:', i
        train_one_iteration(game, player, ReplayMemory, AlphaGoNet_train,
                            args.batch_size, args.learning_rate,
                            args.train_freq, args.evaluate_freq)
        win_ratio = evaluate(game, player, eval_player, args.n_eval_game)

        if win_ratio > max_win_ratio:
            print('Get current_best model!')
            max_win_ratio = win_ratio
            torch.save(AlphaGoNet_train.policy_value_net.state_dict(),
                       'model/current_best.mdl')
        else:
            print('Save current model')
            torch.save(AlphaGoNet_train.policy_value_net.state_dict(),
                       'model/current.mdl')
Exemplo n.º 58
0
    def __init__(self, init_model=None):

        self.writer = SummaryWriter(WRITER_DIR)

        # params of the board and the game
        self.board_width = 6
        self.board_height = 6
        self.n_in_row = 4

        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)

        self.game = Game(self.board)

        # training params
        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
        self.temp = 1.0  # the temperature param

        self.buffer_size = 10000
        self.batch_size = 512  # mini-batch size for training
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5  # num of train_steps for each update
        self.kl_targ = 0.02

        self.check_freq = 50
        self.game_batch_num = 5000

        self.improvement_counter = 1000
        self.best_win_ratio = 0.0

        self.input_plains_num = INPUT_PLANES_NUM

        self.c_puct = 5
        self.n_playout = 50  # num of simulations for each move
        self.shutter_threshold_availables = 1
        self.full_boards_selfplay = False

        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        self.pure_mcts_playout_num = 200
        self.pure_mcts_playout_num_step = 200

        if init_model:
            # start training from an initial policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                self.input_plains_num,
                model_file=init_model,
                shutter_threshold_availables=self.shutter_threshold_availables)
        else:
            # start training from a new policy-value net
            self.policy_value_net = PolicyValueNet(
                self.board_width,
                self.board_height,
                self.input_plains_num,
                shutter_threshold_availables=self.shutter_threshold_availables)

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)
Exemplo n.º 59
0
        best_score = float('+inf')
        best_move = None

        for legal_move in current_board.current_player.calculate_legal_moves():
            new_board = legal_move.execute()
            move_value, move_obj = minimax(new_board, depth - 1, True)

            if move_value < best_score:
                best_score = move_value
                best_move = legal_move

        return best_score, best_move


DEPTH = 2
current_board = Board.create_standard_board()

while True:
    _, white_move = minimax(current_board, DEPTH, True)
    current_board = white_move.execute()
    print(current_board)
    print(_)
    input()
    _, black_move = minimax(current_board, DEPTH, True)
    current_board = black_move.execute()
    for i in current_board.current_player.calculate_legal_moves():
        print(i)
    print(current_board)
    print(_)
    input()
Exemplo n.º 60
0
            move = -1
        if move == -1 or move not in board.availables:
            print("invalid move")
            move = self.get_action(board)
        return move

    def __str__(self):
        return "Human {}".format(self.player)



n = 5
width, height = 8, 8
model_file =  'best_policy_8_8_5.model'

board = Board(width=width, height=height, n_in_row=n)
game = Game(board)

# ############### human VS AI ###################
# load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

# best_policy = PolicyValueNet(width, height, model_file = model_file)
# mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

# load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
try:
    policy_param = pickle.load(open(model_file, 'rb'))
except:
    policy_param = pickle.load(open(model_file, 'rb'),
                               encoding='bytes')  # To support python3
#得到策略