Exemplo n.º 1
0
 def test_make_dataset_from_sgf(self):
     with tempfile.NamedTemporaryFile() as sgf_file, \
             tempfile.NamedTemporaryFile() as record_file:
         sgf_file.write(TEST_SGF.encode('utf8'))
         sgf_file.seek(0)
         preprocessing.make_dataset_from_sgf(
             sgf_file.name, record_file.name)
         recovered_data = self.extract_data(record_file.name)
     start_pos = go.Position()
     first_move = coords.from_sgf('fd')
     next_pos = start_pos.play_move(first_move)
     second_move = coords.from_sgf('cf')
     expected_data = [
         (
             features.extract_features(start_pos),
             preprocessing._one_hot(coords.to_flat(first_move)),
             -1
         ), (
             features.extract_features(next_pos),
             preprocessing._one_hot(coords.to_flat(second_move)),
             -1
         )]
     self.assertEqualData(expected_data, recovered_data)
Exemplo n.º 2
0
def play(model, p_fail_count):
    history = []

    state = go.Position()

    while True:
        if state.is_game_over():
            break
        
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE, p_fail_count)

        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(get_legal_actions(state.all_legal_moves()), scores):
            policies[action] = policy

        x = features.extract_features(state, features.AGZ_FEATURES)

        history.append([x, policies, None])

        pv_mcts_coord = None
        action = np.random.choice(get_legal_actions(state.all_legal_moves()), p=scores)
        if action == (go.N * go.N):
            pv_mcts_coord = None
        else:
            coord_row = action // go.N
            coord_column = action % go.N
            pv_mcts_coord = (coord_row, coord_column)

        state = state.play_move(pv_mcts_coord)

    value = state.result()
    
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    
    return history
    def test_replay_position(self):
        sgf_positions = list(replay_sgf(NO_HANDICAP_SGF))
        initial = sgf_positions[0]
        self.assertEqual(initial.metadata.result, 'W+1.5')
        self.assertEqual(initial.metadata.board_size, 9)
        self.assertEqual(initial.position.komi, 6.5)

        final = sgf_positions[-1].position

        # sanity check to ensure we're working with the right position
        final_board = load_board('''
            .OXX.....
            O.OX.X...
            .OOX.....
            OOOOXXXXX
            XOXXOXOOO
            XOOXOO.O.
            XOXXXOOXO
            XXX.XOXXO
            X..XOO.O.
        ''')
        expected_final_position = go.Position(
            final_board,
            n=62,
            komi=6.5,
            caps=(3, 2),
            ko=None,
            recent=tuple(),
            to_play=go.BLACK
        )
        self.assertEqualPositions(expected_final_position, final)
        self.assertEqual(final.n, len(final.recent))

        replayed_positions = list(replay_position(final))
        for sgf_pos, replay_pos in zip(sgf_positions, replayed_positions):
            self.assertEqualPositions(sgf_pos.position, replay_pos.position)
Exemplo n.º 4
0
    def test_replay_position(self):
        sgf_positions = list(
            sgf_wrapper.replay_sgf(utils_test.BOARD_SIZE, NO_HANDICAP_SGF))
        initial = sgf_positions[0]
        self.assertEqual(initial.result, go.WHITE)

        final = sgf_positions[-1].position.play_move(
            sgf_positions[-1].next_move)

        # sanity check to ensure we're working with the right position
        final_board = utils_test.load_board('''
        .OXX.....
        O.OX.X...
        .OOX.....
        OOOOXXXXX
        XOXXOXOOO
        XOOXOO.O.
        XOXXXOOXO
        XXX.XOXXO
        X..XOO.O.
      ''')
        expected_final_position = go.Position(utils_test.BOARD_SIZE,
                                              final_board,
                                              n=62,
                                              komi=6.5,
                                              caps=(3, 2),
                                              ko=None,
                                              recent=tuple(),
                                              to_play=go.BLACK)
        self.assertEqualPositions(expected_final_position, final)
        self.assertEqual(final.n, len(final.recent))

        replayed_positions = list(
            go.replay_position(utils_test.BOARD_SIZE, final, 1))
        for sgf_pos, replay_pos in zip(sgf_positions, replayed_positions):
            self.assertEqualPositions(sgf_pos.position, replay_pos.position)
Exemplo n.º 5
0
ALMOST_DONE_BOARD = test_utils.load_board('''
.XO.XO.OO
X.XXOOOO.
XXXXXOOOO
XXXXXOOOO
.XXXXOOO.
XXXXXOOOO
.XXXXOOO.
XXXXXOOOO
XXXXOOOOO
''')

TEST_POSITION = go.Position(board=ALMOST_DONE_BOARD,
                            n=105,
                            komi=2.5,
                            caps=(1, 4),
                            ko=None,
                            recent=(go.PlayerMove(go.BLACK, (0, 1)),
                                    go.PlayerMove(go.WHITE, (0, 8))),
                            to_play=go.BLACK)

SEND_TWO_RETURN_ONE = go.Position(board=ALMOST_DONE_BOARD,
                                  n=75,
                                  komi=0.5,
                                  caps=(0, 0),
                                  ko=None,
                                  recent=(go.PlayerMove(go.BLACK, (0, 1)),
                                          go.PlayerMove(go.WHITE, (0, 8)),
                                          go.PlayerMove(go.BLACK, (1, 0))),
                                  to_play=go.WHITE)

from test_utils import load_board, GoPositionTestCase

go.set_board_size(9)
EMPTY_ROW = '.' * go.N + '\n'
TEST_BOARD = load_board('''
.X.....OO
X........
XXXXXXXXX
''' + EMPTY_ROW * 6)

TEST_POSITION = go.Position(
    board=TEST_BOARD,
    n=0,
    komi=6.5,
    caps=(1,2),
    ko=None,
    recent=(go.PlayerMove(go.BLACK, (0, 1)),
            go.PlayerMove(go.WHITE, (0, 8)),
            go.PlayerMove(go.BLACK, (1, 0))),
    to_play=go.BLACK,
)

TEST_BOARD2 = load_board('''
.XOXXOO..
XO.OXOX..
XXO..X...
''' + EMPTY_ROW * 6)

TEST_POSITION2 = go.Position(
    board=TEST_BOARD2,
    n=0,
Exemplo n.º 7
0
            new_vars.append(
                tf.Variable(v,
                            name=name.replace('PolicNetwork',
                                              'PlayerNetwork')))
        saver = tf.train.Saver(new_vars)
        sess.run(tf.global_variables_initializer())
        saver.save(sess,
                   os.path.join(save_dir, str(t), 'player' + str(t) + '.ckpt'))


g1 = tf.Graph()
with g1.as_default():
    train_net = PolicyNetwork(scope="PolicNetwork")
    train_net.initialize_variables('model/sl/epoch_48.ckpt')

pos = go.Position()
train_net.run(pos)

g2 = tf.Graph()
with g2.as_default():
    player_net = PolicyNetwork(scope="PlayerNetwork")
    player_net.initialize_variables('model/rl/2/player2.ckpt')
pos = go.Position()
player_net.run(pos)

save_trained_policy(1, 'model/rl')

print("===========load new model=================")
g2 = tf.Graph()
with g2.as_default():
    player_net = PolicyNetwork(scope="PlayerNetwork")
Exemplo n.º 8
0
.XXOOOOOO
X.XOO...O
.XXOO...O
X.XOO...O
.XXOO..OO
X.XOOOOOO
.XXOOOOOO
X.XXXXXXX
XXXXXXXXX
''')

SEND_TWO_RETURN_ONE = go.Position(
    board=ALMOST_DONE_BOARD,
    n=70,
    komi=2.5,
    caps=(1, 4),
    ko=None,
    recent=(go.PlayerMove(go.BLACK, (0, 1)),
            go.PlayerMove(go.WHITE, (0, 8))),
    to_play=go.BLACK
)


class DummyNet():
    def __init__(self, fake_priors=None, fake_value=0):
        if fake_priors is None:
            fake_priors = np.ones((go.N ** 2) + 1) / (go.N ** 2 + 1)
        self.fake_priors = fake_priors
        self.fake_value = fake_value

    def run(self, position):
        return self.fake_priors, self.fake_value
        value = value.detach().numpy()
        if use_random_symmetry:
            probabilities = symmetries.invert_symmetries_pi(
                syms_used, probabilities)

        return probabilities, value.flatten()

    def bootstrap(working_dir):
        raise NotImplementedError


if __name__ == "__main__":
    input_ = np.zeros((1, 18, 19, 19)).astype(np.float32)
    input_[:, 16, :, :] = 1.
    # batch = torch.from_numpy(input_)

    fn = "/private/home/zhuoyuan/AlphaGo/ELF2_models/save-1661000.bin"
    # fn = "/Users/zhuoyuan/Exp/AlphaGo/ELF2_models/save-1661000.bin"
    model = DualNetwork(fn)

    # to run directly
    # res = model.model(batch)

    position = go.Position()
    prob, val = model.run(position)

    for i in range(19):
        print(prob[i * 19:(i + 1) * 19])
    print(prob[-1])
    print(val)
Exemplo n.º 10
0
import go
import numpy as np

state = go.Position()


def random_action(state):
    legal_actions = state.all_legal_moves()
    sum_legal_actions = np.sum(legal_actions)
    possibilities = legal_actions / sum_legal_actions
    num = np.random.choice(len(legal_actions),
                           p=possibilities)  # if 9x9 -> 0 ~ 81

    if num == len(legal_actions) - 1:
        return None

    row = num // go.N
    column = num % go.N

    coord = (row, column)

    return coord


if __name__ == '__main__':
    while True:
        if state.is_game_over():
            print(state.result_string())
            break

        action = random_action(state)
Exemplo n.º 11
0
import time
import go
import strategies
import policy
import sgf_wrapper
import load_data_sets
import utils

net = policy.PolicyNetwork()
net.initialize_variables('/Users/brilee/dev/MuGo/saved_models/20170718')
now = time.time()


positions = [go.Position(to_play=go.WHITE) for i in range(1)]
# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
strategies.simulate_many_games(net, net, positions)
print(time.time() - now)
now = time.time()

def get_winrate(final_positions):
    black_win = [utils.parse_game_result(pos.result()) == go.BLACK
                 for pos in final_positions]
    return sum(black_win) / len(black_win)

def extract_moves(final_positions):
    winning_moves = []
    losing_moves = []
    for final_position in final_positions:
        positions_w_context = utils.take_n(
            strategies.POLICY_CUTOFF_DEPTH,
Exemplo n.º 12
0
def playMatches(EPISODES, logger, turns_until_tau0, goes_first=0):

    BOARD = np.array(
        [[0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, -1, 1, 0, -1, 0, 0, 1, 0, -1, 0, 0, 0]])
    board = []
    for r in range(19):
        for c in range(19):
            board.append(BOARD[r, c])
    board.append(0)
    board.append(0)
    board = np.array(board)
    env = Game(board)
    player1 = Agent('best_player', env.state_size, env.action_size,
                    config.MCTS_SIMS, config.CPUCT)
    player2 = Agent('best_player', env.state_size, env.action_size,
                    config.MCTS_SIMS, config.CPUCT)
    scores = {player1.name: 0, "drawn": 0, player2.name: 0}
    sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0}
    points = {player1.name: [], player2.name: []}
    pos = go.Position(board=BOARD)

    for e in range(EPISODES):
        print(str(e + 1) + ' ', end='')
        game_start = time.time()
        state = env.reset(board)

        done = 0
        turn = 0
        player1.mcts = None
        player2.mcts = None

        if goes_first == 0:
            player1Starts = random.randint(0, 1) * 2 - 1
        else:
            player1Starts = goes_first

        if player1Starts == 1:
            players = {
                1: {
                    "agent": player1,
                    "name": player1.name
                },
                -1: {
                    "agent": player2,
                    "name": player2.name
                }
            }
        else:
            players = {
                1: {
                    "agent": player2,
                    "name": player2.name
                },
                -1: {
                    "agent": player1,
                    "name": player1.name
                }
            }
        env.gameState.render()
        perv_turn = time.time()
        while done == 0:
            turn = turn + 1
            #### Run the MCTS algo and return an action

            action, pos = players[state.playerTurn]['agent'].act(pos, state)

            print(pos.score())

            this_turn = time.time()
            print()
            print()
            print()
            print("************************* TURN: ", turn,
                  " *************************")
            print("This turn took ", (this_turn - perv_turn) / 60, " mins")
            print("It has been ", (this_turn - game_start) / 60,
                  " mins from the start of the game")

            perv_turn = time.time()
            ### Do the action
            state, value, done, _ = env.step(
                action
            )  #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
            value = pos.score()
            env.gameState.render()  #Send state to GUI lib

            state.arr = ((19 * 19) * ctypes.c_int)()
            data = np.copy(state.board[0:361])
            estimated_board_score = 0
            for i, v in enumerate(data):
                state.arr[i] = v
            score = _estimator_so.estimate(19, 19, state.arr, state.playerTurn,
                                           1000, ctypes.c_float(0.4))
            data[:] = state.arr
            current_player_score = state.playerTurn * score
            other_player_score = -state.playerTurn * score
            estimated_board_score = (current_player_score,
                                     current_player_score, other_player_score)
            print("Estimated board score: ", estimated_board_score)

            if done == 1:
                print("Done")

                if value > 0:
                    print('%s WINS!', players[state.playerTurn]['name'])
                    # logger.info('Player %d Wins!', state.playerTurn)
                    # logger.info(str(value))
                    scores[players[state.playerTurn]['name']] = scores[players[
                        state.playerTurn]['name']] + 1
                    if state.playerTurn == 1:
                        sp_scores['sp'] = sp_scores['sp'] + 1
                    else:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1

                elif value < 0:
                    print('%s WINS!', players[-state.playerTurn]['name'])
                    # logger.info('Player %d Wins!', -state.playerTurn)
                    # logger.info(str(value))
                    scores[players[-state.playerTurn]['name']] = scores[
                        players[-state.playerTurn]['name']] + 1

                    if state.playerTurn == 1:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1
                    else:
                        sp_scores['sp'] = sp_scores['sp'] + 1

                else:
                    print('DRAW...')
                    scores['drawn'] = scores['drawn'] + 1
                    sp_scores['drawn'] = sp_scores['drawn'] + 1

                pts = state.score
                points[players[state.playerTurn]['name']].append(pts[0])
                points[players[-state.playerTurn]['name']].append(pts[1])

    return (scores, points, sp_scores)
Exemplo n.º 13
0
    with gfile.FastGFile("../data/goai.pb", 'rb') as f:
        graph = tf.get_default_graph()
        graphdef = graph.as_graph_def()
        graphdef.ParseFromString(f.read())
        _ = tf.import_graph_def(graphdef)
        summary_write = tf.summary.FileWriter("./log", graph)
        predict = graph.get_tensor_by_name('import/add_7:0')

    evalset = []
    testfiles = [f for f in listdir('../data/estimate/') if f[-4:] == 'json']

    for f in testfiles:
        with open('../data/estimate/' + f) as json_data:
            record = json.load(json_data)
            s = 0
            parent = go.Position()
            while s < len(record) and s <= go.LN:
                position = go.Position()
                position.fromJSON(record[s])
                position.parent = parent
                parent = position
                if position.vertex != 0:
                    evalset.append(position)
                s += 1

    right = 0
    for pos in evalset:
        if pos.vertex != 0:
            x_data = np.zeros(go.LN, dtype=np.float32).reshape(1, 1, go.N, go.N)
            p, q = go.toJI(pos.vertex)
            v = q * go.N + p - go.N - 1
Exemplo n.º 14
0
 def test_add_child(self):
     root = mcts.MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     self.assertIn(17, root.children)
     self.assertEqual(root, child.parent)
     self.assertEqual(17, child.fmove)
Exemplo n.º 15
0
import time
import go
import strategies
import policy
import sgf_wrapper
import load_data_sets
import utils

net = policy.PolicyNetwork()
net.initialize_variables('/Users/brilee/dev/MuGo/saved_models/20170718')
now = time.time()

positions = [
    go.Position(to_play=go.BLACK if i % 2 == 0 else go.WHITE) for i in range(2)
]
# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
strategies.simulate_many_games(net, net, positions)
print(time.time() - now)
now = time.time()


def get_winrate(final_positions):
    black_win = [
        utils.parse_game_result(pos.result()) == go.BLACK
        for pos in final_positions
    ]
    return sum(black_win) / len(black_win)


def extract_moves(final_positions):
Exemplo n.º 16
0
async def main():
    #initialize game
    global GameInfo
    env = None
    gamepos = None
    mode = -1
    playerColor = ""
    oppcolor = ""
    playerName = "JoGo"
    playerTurn = 0
    action = 0
    sc = [0, 0, 0]

    ai = None
    while mode == -1:
        f = open("gui3.txt", "r")
        f1 = f.readlines()
        j = 0
        for i in f1:
            if j == 0:
                #if mode =0 --> AI Vs AI else mode=1--> AI VS User
                mode = int(i)
            if j == 1:
                #PlayerNum=0 --> black(player1) 1--> white(player2)
                playerColor = str(i)
            j = j + 1

    if mode == 0:
        print("AI vs AI")
        turn = 0
        game_start = time.time()
        perv_turn = time.time()
        GameInfo.State = States.INIT
        while (1):
            print("Waiting for server")
            # print(type(GameInfo.RemainTime))
            # print(GameInfo.score)

            if GameInfo.State == States.INIT:
                print("Initialized")
                await InitState(GameInfo, playerName)
            elif GameInfo.State == States.READY:
                await ReadyState(GameInfo)
                playerColor = GameInfo.PlayerColor

                if GameInfo.endgame == False:
                    board = initialboard()
                    if playerColor == "B":
                        oppcolor = "W"
                        playerTurn = 1
                    else:
                        oppcolor = "B"
                        playerTurn = -1
                    if env is None:
                        env = Game(board, 1)
                        env.gameState.render()
                        ai = Agent('current_player', env.state_size,
                                   env.action_size, config.MCTS_SIMS,
                                   config.CPUCT)
                        gamepos = go.Position(board=initialdboard(board))
                else:
                    theend()

            elif GameInfo.State == States.IDLE:
                await IdleState(GameInfo)
                action = None
                baction = None
                if GameInfo.getOppMove(0) == -1:
                    if playerColor == "B":
                        action = 362
                    else:
                        action = 361
                elif GameInfo.getOppMove(0) == -2:
                    pass
                else:
                    baction = (int(GameInfo.getOppMove(0)),
                               int(GameInfo.getOppMove(1)))
                    action = int(GameInfo.getOppMove(0)) * 19 + int(
                        GameInfo.getOppMove(1))
                gamepos = gamepos.play_move(baction,
                                            env.gameState.playerTurn,
                                            mutate=True)
                env.step(action)
                turn = turn + 1
                this_turn = time.time()
                print()
                print()
                print()
                print("************************* TURN: ", turn,
                      " *************************")
                print("This turn took ", (this_turn - perv_turn) / 60, " mins")
                print("It has been ", (this_turn - game_start) / 60,
                      " mins from the start of the game")
                env.gameState.render()
                env.gameState.renderThink()

            elif GameInfo.State == States.THINK:

                action, gamepos = ai.act(gamepos, env.gameState, turn)
                print(gamepos.score())
                typ = 0
                if action == 361:
                    typ = 1
                elif action == 362:
                    typ = 1
                else:
                    typ = 0
                    y = action % 19
                    x = math.floor(action / 19)

                env.step(action)
                await ThinkState(GameInfo, x, y, typ)
                turn = turn + 1
                this_turn = time.time()
                print()
                print()
                print()
                print("************************* TURN: ", turn,
                      " *************************")
                print("This turn took ", (this_turn - perv_turn) / 60, " mins")
                print("It has been ", (this_turn - game_start) / 60,
                      " mins from the start of the game")
                env.gameState.render()
                env.gameState.renderWait()
Exemplo n.º 17
0
 def clear(self):
     self.position = go.Position()
 def clear(self):
     self.position = go.Position(komi=self.komi)
Exemplo n.º 19
0
def testAgainstRandom(model, matches):
    #untrained models do not play at random, they have random weight initializations and then alawys play in terms of those
    #this function takes a model (a trained one) and plays it against a player who makes a random move every time.
    #it plays matches number of matches
    veteran = model
    veteranWins = 0
    beginnerWins = 0
    white = None
    black = None
    for i in range(matches):
        if i % 2 == 0:
            black = veteran
        else:
            white = veteran
        position = go.Position()
        while not position.is_game_over():
            if position.n >= 100:
                position = position.pass_move()
            else:
                if position.to_play == 1:
                    if black == veteran:
                        boards, playerCaps, opponentCaps = gamesToData(
                            [[position, 1]])
                        actions = black.callPol(boards, playerCaps,
                                                opponentCaps)[0]
                        pdist = tf.nn.softmax(
                            tf.cast(actions, dtype=tf.float64))
                        legalMoves = position.all_legal_moves()
                        move = np.random.choice(np.arange(0, len(pdist)),
                                                p=pdist)
                        if legalMoves[move] == 0:
                            actions = actions * legalMoves
                            move = tf.math.argmax(actions).numpy()
                        position = position.play_move(coords.from_flat(move))
                    else:
                        position = choose_and_play_move(position)
                else:
                    if white == veteran:
                        boards, playerCaps, opponentCaps = gamesToData(
                            [[position, 1]])
                        actions = white.callPol(boards, playerCaps,
                                                opponentCaps)[0]
                        pdist = tf.nn.softmax(
                            tf.cast(actions, dtype=tf.float64))
                        legalMoves = position.all_legal_moves()
                        move = np.random.choice(np.arange(0, len(pdist)),
                                                p=pdist)
                        if legalMoves[move] == 0:
                            actions = actions * legalMoves
                            move = tf.math.argmax(actions).numpy()
                        position = position.play_move(coords.from_flat(move))
                    else:
                        position = choose_and_play_move(position)

        if black == veteran:
            if position.result() == 1:
                veteranWins += 1
            elif position.result() == -1:
                beginnerWins += 1
            else:
                print("No one wins!!")
        else:
            if position.result() == 1:
                beginnerWins += 1
            elif position.result() == -1:
                veteranWins += 1
            else:
                print("No one wins!!")
    print("The model wins " + str(veteranWins))
    print("The random wins " + str(beginnerWins))
    return veteranWins - beginnerWins