Exemplo n.º 1
0
def main():
    # Create a 9x9 board
    game = GobangGame(n=9, nir=9)

    network = NNetWrapper(game)
    if args.load_model:
        network.load_checkpoint(args.checkpoint)

    coach = Coach(game, network, args)
    coach.learn()
Exemplo n.º 2
0
    #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING.
    'tempThreshold':
    10,  #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details.
    'alpha':
    1e-5,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution.
    'gamma':
    1,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. 
    #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem.
    'epsilon':
    1e-5,  #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. 
}

#START ALPHAZERO TRAINING:
#Initialize Game_args, nnet, Game, and Alphazero
Game_rules = Game_args()
Game = CSGame()

nnet = NNetWrapper(args)

if args['load_nn_model'] == True:
    filename = 'best'
    nnet.load_checkpoint(args['network_checkpoint'], filename)

Alphazero_train = Coach(Game, nnet, args, Game_rules)

if args['load_training'] == True:
    print('Load trainExamples from file')
    Alphazero_train.loadTrainExamples()

#Start Training Alphazero
Alphazero_train.learn()
Exemplo n.º 3
0
}

#Initialize Algorithms object to compare algorithms
Algorithms = CSAlgorithms()

#INITIALIZE ALPHAZERO FOR PREDICTION
#--------------------------------------------------------------
#initialize Game_args
#load sensing_matrix into game_args
game_args = Game_args()
matrix_filename = 'sensing_matrix.npy'
A = np.load(matrix_filename)
game_args.sensing_matrix = A
#initialize neural network wrapper object
#load weights and model we wish to predict with using nnet.load_checkpoint
nnet = NNetWrapper(args)
model_filename = 'best'
nnet.load_checkpoint(os.getcwd(), model_filename)
#initialize a new game object
new_game = CSGame()
#initialize skip_nnet if option is turned on
if args['skip_rule'] == 'bootstrap':
    skip_nnet = NNetWrapper(args)
    skip_nnet.load_checkpoint(args['skip_nnet_folder'],
                              args['skip_nnet_filename'])

elif args['skip_rule'] == None:
    skip_nnet = nnet

else:
    skip_nnet = None
Exemplo n.º 4
0
    1,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. 
    #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem.
    'alpha':
    1e-5,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution.
    'epsilon':
    1e-5,  #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. 
}

#Test the search capabilities of multiple MCTS objects using Threading_MCTS

#global Game_args object, global CSGame(for game rules and such), global policy/value net
game_args = Game_args()
game_args.generateSensingMatrix(args['m'], args['n'], args['matrix_type'])

Game = CSGame()
nnet = NNetWrapper(args)

#---------------------------------------------------
#Initialize MCTS_States_list
for i in range(args['num_batches']):
    MCTS_States_list = []
    batchTrainExamples = []

    #In loop below, we create a pair in the form of (MCTS_object, [list of States])
    for ep in range(args['eps_per_batch']):
        #Initialize Game_args() for MCTS
        temp_game_args = Game_args()
        temp_game_args.sensing_matrix = game_args.sensing_matrix
        temp_game_args.generateNewObsVec(args['x_type'], args['sparsity'])
        #Initialize MCTS object
        temp_MCTS = MCTS(Game,
Exemplo n.º 5
0
        for x in range(7):
            for y in range(7):
                action2move.append((None, None, x, y))

        for x in range(7):
            for y in range(7):
                for dx, dy in _directions_2:
                    if {x, y, x + dx, y + dy} <= set(range(7)):
                        action2move.append((x, y, x + dx, y + dy))

        action2move.append((None, None, None, None))

        g = AtaxxGame()
        args1 = dotdict({'numMCTSSims': 2000, 'cpuct': 1.0})
        n1 = NNet(g)
        n1.load_checkpoint('.', 'best.pth.tar')
        mcts1 = MCTS(g, n1, args1)
        n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

        player = __file__[2]
        board = []
        actions = {
        }  # { key: piece(start position), value: list of position(destination position) }

        # make board
        input_lines = input_str.split("\n")
        for i in range(7):
            line = input_lines[i + 1].split(" ")
            dic = {1: -1, 2: 1, 0: 0}
            line = [dic.get(n, n) for n in line]
Exemplo n.º 6
0
    # Set number of threads for OpenMP
    os.environ["OMP_NUM_THREADS"] = "1"

    g = Game(is_basic=True)
    # Suppress logging from fireplace
    logger = logging.getLogger("fireplace")
    logger.setLevel(logging.WARNING)
    # logging.disable(logging.WARNING)      # disable logging
    # logging.disable(logging.NOTSET)       # reenable logging

    # all players
    hp = HumanPlayer(g).play
    rp = RandomPlayer(g).play

    # nnet players
    n1 = NNet()
    #n1.nnet.cuda()
    # n1.load_checkpoint('./temp/', '0.pth.tar')
    # n1.load_checkpoint('./temp/', 'best18-287k-75i.pth.tar')           # newest network
    n1.load_checkpoint('../remote/models/', '0-32.pth.tar')
    argsNN = dotdict(
        {
            'numMCTSSims': 25,
            'cpuct': 1.0
        }
    )  # minimum numMCTSSims = 2 to always find a valid action (at least end turn)
    mcts1 = MCTS(g, n1, argsNN)
    #a1p = lambda x: mcts1.getActionProb(x, temp=0)
    a1p = functools.partial(
        mcts1.getActionProb, temp=0
    )  # temp=1 means we pick an action by probability, temp=0 always takes the best action (most visited edge. random if more than 1 best action is available)
Exemplo n.º 7
0
    'x_l2' : True,      #solution to min_z||A_Sz - y||_2^2, where A_S is the submatrix of columns we have currently chosen
    'lambda' : True,    #the vector of residuals, lambda = A^T(A_Sx-y), where x is the optimal solution to min_z||A_Sz - y||_2^2
    #---------------------------------------------------------------
    #MCTS parameters
    'cpuct': 1, #controls the amount of exploration at each depth of MCTS tree.
    'numMCTSSims': 500, #For each move, numMCTSSims is equal to the number of MCTS simulations in finding the next move during self play. 
    'tempThreshold': 0,    #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details.
    'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. We need gamma large enough!!!
    'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 
    'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. 
    }
    
    test_loss = np.zeros(alphazero_iterations)
    
    for i in range(alphazero_iterations):
        #-------------------------------------------------------------
        game_args = Game_args()
        matrix_filename = 'sdnormal0.npy'
        A = np.load(matrix_filename)
        game_args.sensing_matrix = A
        nnet = NNetWrapper(args)
        model_filepath = os.getcwd() + '/network_checkpoint'
        model_filename = 'nnet_checkpoint' + str(i)
        nnet.load_checkpoint(model_filepath, model_filename)
        new_game = CSGame()
        Alphazero = Coach(new_game, nnet, args, game_args)
        #--------------------------------------------------------------
        #Alphazero now ready for prediction
        avgloss_for_NN = 0
    
Exemplo n.º 8
0
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""

human_vs_cpu = True

g = HalfchessGame()

# all players
rp = RandomPlayer(g).play
# gp = GreedyOthelloPlayer(g).play
hp = HumanPlayer(g).play

# nnet player
nn = NNet(g)
#nn.load_checkpoint('./temp/','best.pth.tar')
nn.load_checkpoint('./pretrained_models/halfchess', '43it.pth.tar')
args = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts = MCTS(g, nn, args)
nnp = lambda x: np.argmax(mcts.getActionProb(x, temp=1))


if human_vs_cpu:
    player1 = hp
else:
    n2 = NNet(g)
    n2.load_checkpoint('./pretrained_models/halfchess/', '26it_fixed_logic.pth.tar')
    args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
    mcts2 = MCTS(g, n2, args2)
    n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))
Exemplo n.º 9
0
args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
mcts1 = MCTS(g, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))


n2 = NNet(g, argsNN)
n2.load_checkpoint('/content/drive/My Drive/temp/','checkpoint_1.pth.tar')
args2 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
mcts2 = MCTS(g, n2, args2)
n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))
"""

for p in range(1, 5):
    print("iter:%d" % p)
    for i in range(70, 1, -1):
        n1 = NNet(g, argsNN)
        try:
            n1.load_checkpoint('/content/drive/My Drive/model/',
                               'checkpoint_%d.pth.tar' % i)
        except:
            print("no model:%d" % i)
            continue
        args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
        mcts1 = MCTS(g, n1, args1)
        n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))  #expoitation

        for j in range(70, 1, -1):
            if (i <= j): continue
            n2 = NNet(g, argsNN)
            try:
                n2.load_checkpoint('/content/drive/My Drive/model/',
Exemplo n.º 10
0
def PlayGame():
    menu_def = [
        ['&File', ['&Do nothing', 'E&xit']],
        ['&Help', '&About...'],
    ]

    # sg.SetOptions(margins=(0,0))
    sg.ChangeLookAndFeel('GreenTan')
    # create initial board setup
    psg_board = copy.deepcopy(initial_board)
    # the main board display layout
    board_layout = [[sg.T('     ')] + [
        sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh'
    ]]
    # loop though board and create buttons with images
    for i in range(8):
        row = [sg.T(str(8 - i) + '   ', font='Any 13')]
        for j in range(4):
            piece_image = images[psg_board[i][j]]
            row.append(render_square(piece_image, key=(i, j), location=(i, j)))
        row.append(sg.T(str(8 - i) + '   ', font='Any 13'))
        board_layout.append(row)
    # add the labels across bottom of board
    board_layout.append([sg.T('     ')] + [
        sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh'
    ])

    # setup the controls on the right side of screen
    openings = ('Any', 'Defense', 'Attack', 'Trap', 'Gambit', 'Counter',
                'Sicillian', 'English', 'French', 'Queen\'s openings',
                'King\'s Openings', 'Indian Openings')

    board_controls = [
        [sg.RButton('New Game', key='New Game'),
         sg.RButton('Draw')],
        [sg.RButton('Resign Game'),
         sg.RButton('Set FEN')],
        [sg.RButton('Player Odds'),
         sg.RButton('Training')],
        [sg.Drop(openings), sg.Text('Opening/Style')],
        [sg.CBox('Play As White', key='_white_')],
        [sg.Text('Move List')],
        [
            sg.Multiline([],
                         do_not_clear=True,
                         autoscroll=True,
                         size=(15, 10),
                         key='_movelist_')
        ],
    ]

    # layouts for the tabs
    controls_layout = [[
        sg.Text('Performance Parameters', font='_ 20')
    ], [sg.T('Put stuff like AI engine tuning parms on this tab')]]

    statistics_layout = [[sg.Text('Statistics', font=('_ 20'))],
                         [sg.T('Game statistics go here?')]]

    board_tab = [[sg.Column(board_layout)]]

    # the main window layout
    layout = [[sg.Menu(menu_def, tearoff=False)],
              [
                  sg.TabGroup([[
                      sg.Tab('Board', board_tab),
                      sg.Tab('Controls', controls_layout),
                      sg.Tab('Statistics', statistics_layout)
                  ]],
                              title_color='red'),
                  sg.Column(board_controls)
              ],
              [sg.Text('Click anywhere on board for next move', font='_ 14')]]

    window = sg.Window('Chess',
                       default_button_element_size=(12, 1),
                       auto_size_buttons=False,
                       icon='kingb.ico').Layout(layout)

    g = HalfchessGame.HalfchessGame()
    nn = NNet(g)
    nn.load_checkpoint(nn_filepath, nn_filename)
    args = dotdict({'numMCTSSims': numMCTSSims, 'cpuct': cpuct})
    mcts = MCTS(g, nn, args)
    nnp = lambda x: np.argmax(mcts.getActionProb(x, temp=temp))

    board = g.getInitBoard()
    move_count = curPlayer = 1
    move_state = move_from = move_to = 0
    # ---===--- Loop taking in user input --- #
    while g.getGameEnded(board, curPlayer) == 0:

        canonicalBoard = g.getCanonicalForm(board, curPlayer)

        if curPlayer == human:
            # human_player(board)
            move_state = 0
            while True:
                button, value = window.Read()
                if button in (None, 'Exit'):
                    exit()
                if button == 'New Game':
                    sg.Popup(
                        'You have to restart the program to start a new game... sorry....'
                    )
                    break
                    psg_board = copy.deepcopy(initial_board)
                    redraw_board(window, psg_board)
                    move_state = 0
                    break

                if type(button) is tuple:
                    if move_state == 0:
                        move_from = button
                        row, col = move_from
                        piece = psg_board[row][col]  # get the move-from piece
                        button_square = window.FindElement(key=(row, col))
                        button_square.Update(button_color=('white', 'red'))
                        move_state = 1
                    elif move_state == 1:
                        move_to = button
                        row, col = move_to
                        if move_to == move_from:  # cancelled move
                            color = '#B58863' if (row + col) % 2 else '#F0D9B5'
                            button_square.Update(button_color=('white', color))
                            move_state = 0
                            continue

                        picked_move = '{}{}{}{}'.format(
                            'efgh'[move_from[1]], 8 - move_from[0],
                            'efgh'[move_to[1]], 8 - move_to[0])

                        action = moveset[picked_move]

                        valids = g.getValidMoves(canonicalBoard, 1)

                        if valids[action] != 0:
                            board, curPlayer = g.getNextState(
                                board, curPlayer, action)
                        else:
                            print('Illegal move')
                            move_state = 0
                            color = '#B58863' if (
                                move_from[0] + move_from[1]) % 2 else '#F0D9B5'
                            button_square.Update(button_color=('white', color))
                            continue

                        psg_board[move_from[0]][move_from[
                            1]] = BLANK  # place blank where piece was
                        psg_board[row][
                            col] = piece  # place piece in the move-to square
                        redraw_board(window, psg_board)
                        move_count += 1

                        window.FindElement('_movelist_').Update(picked_move +
                                                                '\n',
                                                                append=True)

                        break
        else:

            best_move = nnp(canonicalBoard)
            move_str = moveset[best_move]

            if curPlayer == -1:
                move_str = HalfchessGame.mirrored_move(move_str)

            from_col = ord(move_str[0]) - ord('e')
            from_row = 8 - int(move_str[1])
            to_col = ord(move_str[2]) - ord('e')
            to_row = 8 - int(move_str[3])

            window.FindElement('_movelist_').Update(move_str + '\n',
                                                    append=True)

            piece = psg_board[from_row][from_col]
            psg_board[from_row][from_col] = BLANK
            psg_board[to_row][to_col] = piece
            redraw_board(window, psg_board)

            board, curPlayer = g.getNextState(board, curPlayer, best_move)
            move_count += 1
    sg.Popup('Game over!', 'Thank you for playing')
Exemplo n.º 11
0
    'lr': 0.001,
    'dropout': 0.3,
    'epochs': 10,
    'batch_size': 64,
    'cuda': torch.cuda.is_available(),
    'num_channels': 512,
})

# all players
rp = RandomPlayer(g).play
gp = GreedyOthelloPlayer(g).play
hp = HumanOthelloPlayer(g).play
mmp = MinMaxOthelloPlayer(g).play

# nnet players
n1 = NNet(g, argsNN)
n1.load_checkpoint('./model/', 'best.pth.tar')
args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts1 = MCTS(g, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

n2 = NNet(g, argsNN)
n2.load_checkpoint('./model/', 'checkpoint_5.pth.tar')
args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts2 = MCTS(g, n2, args2)
n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))

arena = Arena.Arena(n1p, mmp, g, [0, 0], display=display)
print(arena.playGames(6, verbose=True))

#arena = Arena.Arena(n2p, mmp, g, [0,0], display=display)
Exemplo n.º 12
0
            'skip_nnet_filename': 'skip_nnet', 
    'beta': 1, #Recall the augmented probability aug_prob = beta * probs + (1-beta) * 1/(len(x)) * x_I, where x_I is the indicator vector of ones of the true sparse solution x. Hence, higher beta values increase the probabilities towards choosing the correct column choices. 
                 #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 
    'tempThreshold': 25,    #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details.
    'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. 
                #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem.
    'alpha':1e-5,  #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution.
    'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. 
}

#START ALPHAZERO TRAINING:
#Initialize Game_args, nnet, Game, and Alphazero
Game_args = Game_args()
Game = CSGame()

nnet = NNetWrapper(args)

if args['load_nn_model'] == True:
    filename = 'best'
    nnet.load_checkpoint(args['network_checkpoint'], filename)
    
if args['skip_rule'] == 'bootstrap':
    skip_nnet = NNetWrapper(args)
    skip_nnet.load_checkpoint(args['skip_nnet_folder'], args['skip_nnet_filename'])
    
elif args['skip_rule'] == None:
    skip_nnet = nnet
    
else:
    skip_nnet = None
Exemplo n.º 13
0
    'tempThreshold': 40,
    'updateThreshold':
    0.6,  # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue':
    300000,  # Number of game examples to train the neural networks.
    'numMCTSSims': 25,  # Number of games moves for MCTS to simulate.
    'arenaCompare':
    40,  # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    #'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
    'load_folder_file': ('temp/', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
    'dirichlet': 0.03,
})

if __name__ == "__main__":
    g = Game()
    nnet = HalfchessNNet(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Exemplo n.º 14
0
num_games = 2

nets = [1, 3, 14, 29, 48, 70, 98]
#nets = [1,14,98]
victories = {}
runner_up = {}

rp = RandomPlayer(g).play
victories[rp] = [0, 0]
runner_up[rp] = [0, 0]

playerpool = [rp]
args = dotdict({'numMCTSSims': 20, 'cpuct': 1.0, 'dirichlet': 0.05})

for i in nets:
    nn = NNet(g)
    nn.load_checkpoint(filepath, 'checkpoint_%d.pth.tar' % i)
    mcts = MCTS(g, nn, args)

    def stochastic(x):
        pi = mcts.getActionProb(x, temp=1)
        return np.random.choice(len(pi), p=pi)

    def deterministic(x):
        return np.argmax(mcts.getActionProb(x, temp=0))

    nnp = deterministic if det_pol else stochastic
    victories[nnp] = [i, 0]
    runner_up[nnp] = [i, 0]
    playerpool.append(nnp)
Exemplo n.º 15
0
def PlayGame():
    # menu_def = [['&File', ['&Open PGN File', 'E&xit']],
    #             ['&Help', '&About...'], ]

    # sg.SetOptions(margins=(0,0))
    sg.ChangeLookAndFeel('GreenTan')
    # create initial board setup
    psg_board = copy.deepcopy(initial_board)
    # the main board display layout
    board_layout = [[sg.T('     ')] + [
        sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh'
    ]]
    # loop though board and create buttons with images
    for i in range(8):
        row = [sg.T(str(8 - i) + '   ', font='Any 13')]
        for j in range(4):
            piece_image = images[psg_board[i][j]]
            row.append(render_square(piece_image, key=(i, j), location=(i, j)))
        row.append(sg.T(str(8 - i) + '   ', font='Any 13'))
        board_layout.append(row)
    # add the labels across bottom of board
    board_layout.append([sg.T('     ')] + [
        sg.T('{}'.format(a), pad=((23, 27), 0), font='Any 13') for a in 'efgh'
    ])

    # setup the controls on the right side of screen
    # openings = (
    #     'Any', 'Defense', 'Attack', 'Trap', 'Gambit', 'Counter', 'Sicillian', 'English', 'French', 'Queen\'s openings',
    #     'King\'s Openings', 'Indian Openings')

    # board_controls = [[sg.RButton('New Game', key='New Game'), sg.RButton('Draw')],
    #                   [sg.RButton('Resign Game'), sg.RButton('Set FEN')],
    #                   [sg.RButton('Player Odds'), sg.RButton('Training')],
    #                   [sg.Drop(openings), sg.Text('Opening/Style')],
    #                   [sg.CBox('Play As White', key='_white_')],
    #                   [sg.Text('Move List')],
    #                   [sg.Multiline([], do_not_clear=True, autoscroll=True, size=(15, 10), key='_movelist_')],
    #                   ]

    # # layouts for the tabs
    # controls_layout = [[sg.Text('Performance Parameters', font='_ 20')],
    #                    [sg.T('Put stuff like AI engine tuning parms on this tab')]]

    # statistics_layout = [[sg.Text('Statistics', font=('_ 20'))],
    #                      [sg.T('Game statistics go here?')]]

    board_tab = [[sg.Column(board_layout)]]

    # the main window layout
    layout = [  #[sg.Menu(menu_def, tearoff=False)],
        [sg.TabGroup([[sg.Tab('Board', board_tab)]])]
    ]
    # sg.Tab('Controls', controls_layout),
    # sg.Tab('Statistics', statistics_layout)]], title_color='red'),
    #sg.Column(board_controls)],
    #[sg.Text('Click anywhere on board for next move', font='_ 14')]]

    window = sg.Window('Chess',
                       default_button_element_size=(12, 1),
                       auto_size_buttons=False,
                       icon='kingb.ico').Layout(layout)

    g = HalfchessGame.HalfchessGame()
    nn = NNet(g)
    nn.load_checkpoint(nn_filepath, nn_filename)
    args = dotdict({
        'numMCTSSims': numMCTSSims,
        'cpuct': cpuct,
        'dirichlet': 0.5
    })
    mcts = MCTS(g, nn, args)

    def deterministic(x):
        return np.argmax(mcts.getActionProb(x, temp=0))

    def stochastic(x):
        pi = mcts.getActionProb(x, temp=temp)
        return np.random.choice(len(pi), p=pi)

    nnp = stochastic

    nn2 = NNet(g)
    nn2.load_checkpoint(nn2_filepath, nn2_filename)
    mcts2 = MCTS(g, nn2, args)

    def deterministic(x):
        return np.argmax(mcts2.getActionProb(x, temp=0))

    def stochastic(x):
        pi = mcts2.getActionProb(x, temp=temp)
        return np.random.choice(len(pi), p=pi)

    nnp2 = stochastic

    rp = RandomPlayer(g).play

    board = g.getInitBoard()
    move_count = curPlayer = 1
    move_state = move_from = move_to = 0

    # ---===--- Loop taking in user input --- #
    while g.getGameEnded(board, curPlayer) == 0:

        window.Read()

        canonicalBoard = g.getCanonicalForm(board, curPlayer)

        if curPlayer == p2:
            if twonets:
                best_move = nnp2(canonicalBoard)
                move_str = moveset[best_move]
            else:
                best_move = rp(canonicalBoard)
                move_str = moveset[best_move]

            if curPlayer == -1:
                move_str = HalfchessGame.mirrored_move(move_str)

            from_col = ord(move_str[0]) - ord('e')
            from_row = 8 - int(move_str[1])
            to_col = ord(move_str[2]) - ord('e')
            to_row = 8 - int(move_str[3])

            #window.FindElement('_movelist_').Update(move_str + '\n', append=True)

            piece = psg_board[from_row][from_col]
            psg_board[from_row][from_col] = BLANK
            psg_board[to_row][to_col] = piece

            redraw_board(window, psg_board)

            board, curPlayer = g.getNextState(board, curPlayer, best_move)

            move_state = 0

        else:

            best_move = nnp(canonicalBoard)
            move_str = moveset[best_move]

            if curPlayer == -1:
                move_str = HalfchessGame.mirrored_move(move_str)

            from_col = ord(move_str[0]) - ord('e')
            from_row = 8 - int(move_str[1])
            to_col = ord(move_str[2]) - ord('e')
            to_row = 8 - int(move_str[3])

            #window.FindElement('_movelist_').Update(move_str + '\n', append=True)

            piece = psg_board[from_row][from_col]
            psg_board[from_row][from_col] = BLANK
            psg_board[to_row][to_col] = piece

            redraw_board(window, psg_board)

            board, curPlayer = g.getNextState(board, curPlayer, best_move)
            move_count += 1
    sg.Popup('Game over!', 'Thank you for playing')