Exemplo n.º 1
0
def debug_run(model):
    '''
    Shows the board and value for each step in a game
    '''
    game = TicTacToe(3)

    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None
    while winner is None:

        board = game.get_board(pid)

        val = model.evaluate(game.get_input(pid))
        print(board)
        print(val)

        x, y = q_select(pid, board, model, game)

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1

    print(
        model.sess.run(model.probs,
                       feed_dict={model.states: game.get_input(pid)})[0])
Exemplo n.º 2
0
def test_against_random(model):
    '''
    Evaluate the model against random performance
    '''

    wins = {0: 0, 1: 0, 2: 0}

    for _ in range(1000):
        game = TicTacToe(3)

        pid = np.random.random_integers(low=1, high=2, size=1)[0]
        winner = None
        while winner is None:

            board = game.get_board(pid)

            if pid == 1:
                x, y = random_choice(board)
            else:
                x, y = q_select(pid, board, model, game)

            game.place(pid, x, y)

            winner = game.check_win()

            pid = (pid % 2) + 1

        wins[winner] += 1

    print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1]))
    return (wins[2] / (wins[0] + wins[1] + wins[2]))
Exemplo n.º 3
0
def play_user(model):
    '''
    Test the model against human skill level
    '''
    game = TicTacToe(3)
        
    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None

    while winner is None:

        board = game.get_board(pid)
        print(board)
        
        if pid == 2:
            x, y, prob = evaluate(model, game, pid, tau=.1)
            print(prob)
            print(model.evaluate(game.get_input(pid)))
        else:
            x = int(input('x: '))
            y = int(input('y: '))

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1

    print(game.get_input(1))
Exemplo n.º 4
0
def debug_run(model):
    '''
    Shows the board and value for each step in a game
    '''
    game = TicTacToe()
        
    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None
    while winner is None:

        board = game.get_board(pid)

        val = model.evaluate(board.reshape(1, 3, 3))
        print(board)
        print(val)

        x, y = q_select(board, model)

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1
Exemplo n.º 5
0
def play_user(model):
    '''
    Test the model against human skill level
    '''
    game = TicTacToe()

    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None
    while winner is None:

        board = game.get_board(pid)
        val = model.evaluate(board.reshape(1, 3, 3))
        print(board)

        if pid == 2:
            x, y = q_select(pid, board, model, game)
        else:
            x = int(input('x: '))
            y = int(input('y: '))

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1
Exemplo n.º 6
0
    boards = {1: [], 2: []}

    pid = 1
    opp_pid = 2
    winner = None
    while winner is None:
        
        board = game.get_board(pid)

        if random.random() < gamma:
            x, y = random_choice(board)
        else:
            x, y = q_select(board, model)

        game.place(pid, x, y)
        board = game.get_board(pid)
        boards[pid].append(board)
        winner = game.check_win()

        pid = (pid % 2) + 1
        
    board = game.get_board(pid)
    boards[pid].append(board)

    if winner != 0:
        loser = (winner % 2) + 1

        #winner_rewards = decay_reward(1, len(boards[winner]))
        #loser_rewards = decay_reward(-1, len(boards[loser]))
        winner_rewards = [1] * len(boards[winner])