示例#1
0
        winner = ''
        counter = 0
        symbols = ['X', 'O']
        #we need to store samples temporarily because we don't get their values till the end of each game
        samples = [
        ]  #each sample contains state, action, reward, and next state
        while winner == '':
            state = state_from_board(board, counter)

            action = choose_action(epsilon, state, model, sess)

            current_sample = []
            current_sample.append(state)
            current_sample.append(action)

            winner = board.setSquare(action, symbols[counter])
            current_sample.append(
                0.5
            )  #placeholder reward. we change this when we know the winner

            samples.append(current_sample)
            #switch to next player
            counter = (counter + 1) % 2

        #lol this is so ugly
        xreward = 0
        if winner == 'X':
            xreward = 0.5
        elif winner == 'O':
            xreward = -0.5
示例#2
0
    player_symbol = input('Invalid symbol. Pick X or O: ')

if player_symbol == 'X':
    player_num = 0
else:
    player_num = 1

winner = ''
counter = 0
symbols = ['X', 'O']
while winner == '':
    board.printBoard()
    if counter == player_num:
        index = input('Choose an index for a square: ')
        while not is_int(index) or int(index) < 0 or int(
                index) > 8 or board.getSquare(int(index)) != 0:
            index = input('Your entry was invalid. Choose again: ')
        index = int(index)
    else:
        index = ai_pick(board, counter)
        print(symbols[counter] + ' chooses index ' + str(index))
        print()
    winner = board.setSquare(index, symbols[counter])
    counter = (counter + 1) % 2

board.printBoard()
if winner == 'D':
    print('Draw!')
else:
    print(winner + ' won!')