def test_against_random(model, size=100): ''' Evaluate the model against random performance ''' wins = {0: 0, 1: 0, 2: 0} for _ in range(size): game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None while winner is None: board = game.get_board(pid) if pid == 1: x, y = random_choice(board) else: r_board = game.get_board_raw() x, y, _ = evaluate(model, game, pid, tau=1) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 wins[winner] += 1 print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1])) return (wins[2] / (wins[0] + wins[1] + wins[2]))
for it in range(ITER): for i in range(EPOCH): print('{}: {}/{}'.format(it, i, EPOCH), end='\r') game = TicTacToe(3) pid = np.random.random_integers(low=1, high=2, size=1)[0] winner = None inputs = {1: [], 2: []} probs = {1 : [], 2: []} while winner is None: board = game.get_board(pid) r_board = game.get_board_raw() x, y, prob = evaluate(model, game, (pid % 2) + 1) inputs[pid].append(copy.copy(game.get_input(pid))) probs[pid].append(prob) game.place(pid, x, y) winner = game.check_win() pid = (pid % 2) + 1 if winner != 0: one_reward = [1 if winner == 1 else -1] * len(inputs[1]) two_reward = [1 if winner == 2 else -1] * len(inputs[2]) else: one_reward = [0] * len(inputs[1])