def play(agent_1, agent_2): # initialize 3x3 tic tac toe board board = Board(3) while board.move_still_possible(): if board.player == 1: # X-player move = agent_1(board) else: # O-player move = agent_2(board) board.make_move(move) # evaluate game state if board.game_is_over(): # return winner return board.player # return 'game ended in a draw' return 0
def learn_probabilities(): # Global counters for winning moves of X and O winX = np.zeros((3, 3)) winO = np.zeros((3, 3)) # Learn through 50000 plays for i in range(5000): board = Board(3) # Local counters countX = np.zeros((3, 3)) countO = np.zeros((3, 3)) while board.move_still_possible(): move = get_random_move(board) x, y = move # Update corresponding local counter if board.player == 1: countX[x, y] += 1 else: countO[x, y] += 1 board.make_move(move) if board.move_was_winning_move(board.player): winner = board.player # Update winner's global counter if winner == 1: winX += countX elif winner == -1: winO += countO break # Collect statistics of winning moves of both players win = winX + winO # Normalize to obtain probabilities win_normalized = preprocessing.normalize(win, norm='l2') # Write probabilities to file f = open('probabilities', 'w') np.savetxt(f, win_normalized) print "Learning finished!" return win_normalized