示例#1
0
def test1():

    env = Environment(RandomPlayer('O'))
    agent = RLPlayer('X')
    episode = Episode(agent, env)

    board = p.empty_board()
    agent, final_board = episode.execute(board)

    return agent, final_board
示例#2
0
def play_game(p1, p2, file=None):

    board = p.empty_board()

    players = [p1, p2]
    current_player_index = 0
    winner = None
    move_count = 0
    while (True):

        print("Current move is for player: ", players[current_player_index].player)

        if (file is not None):
            file.write("PRIOR TO MOVE " + str(move_count) + " ------------\n")
            file.write(to_display_string(board))

        if p.is_cat_game(board):
            if (file is not None):
                file.write("RESULT IS CAT GAME")
            break

        m = players[current_player_index].pick_next_move(board)

        board = p.add_move(m[0], m[1], board)

        p.display_board(board)

        move_count += 1

        if p.is_winner(board, players[current_player_index].player):
            winner = players[current_player_index]
            if (file is not None):
                file.write("FINAL BOARD AFTER MOVE " + str(move_count) + " WINNER IS: " + winner.player + "\n")
                file.write(to_display_string(board))
            break

        # alternate players
        if (current_player_index == 0):
            print("Switching to player 1...")
            current_player_index = 1
        else:
            print("Switching to player 0...")
            current_player_index = 0


    if (winner is None):
        print("CAT GAME")
    else:
        print("WINNER IS PLAYER: ", winner.player)

    return winner
示例#3
0
def generate_tic_tac_toe_episode(policy):

    current_board = p.empty_board()

    opponent = GreedyRandomPlayer('O')

    game_complete = False
    transitions = []
    while not game_complete:

        previous_state = current_board

        print("PRIOR TO MOVE:")
        print(game.to_display_string(current_board))
        file.write("PRIOR TO MOVE:\n")
        file.write(game.to_display_string(current_board))

        selectedAction = sample_tic_tac_toe_policy(current_board, policy)

        # model the environment --> returns a next state and a reward
        next_state, reward, game_complete = model_environment(opponent, current_board, selectedAction)

        # append the episode
        transition = {}
        transition['from_state'] = current_board
        transition['to_state'] = next_state
        transition['action'] = selectedAction
        transition['reward'] = reward
        transitions.append(transition)

        file.write("ADDING TRANSITION: " + str(transition) + "\n")

        current_board = next_state

    # now figure out the reward
    print("BOARD AT END OF EPISODE")
    print(game.to_display_string(current_board))
    file.write("BOARD AT END OF EPISODE\n")
    file.write(game.to_display_string(current_board))

    return transitions
示例#4
0
def test_sample_policy():

    board = p.empty_board()
    policy = {}
    move = sample_tic_tac_toe_policy(board,policy)
    print("SELECTED MOVE: ", move)