Пример #1
0
def run(user, opponent, opponentFirst):
    s = tictactoe.State()

    if opponentFirst:
        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)

    printBoard(s)
    while True:
        a = getUserAction(user, s)
        s = tictactoe.takeAction(user, s, a)
        printBoard(s)
        if s.terminal():
            break

        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)
        printBoard(s)
        if s.terminal():
            break

    printWinner(s, user)
Пример #2
0
def rewardPerEpisode(q, gamma):
  if q.player == tictactoe.PlayerCircle:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCross)
  else:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCircle)

  rpe = 0.0 # reward per episode
  t = 0 # time step
  s = tictactoe.State()

  # Randomly determine whether the player or her opponent should move first.
  if random.random() < 0.5:
    a = tictactoe.chooseAction(opponent, s, 0)
    s = tictactoe.takeAction(opponent.player, s, a)
    t += 1

  while True:
    # Player makes a move and defers observing the reward until her opponent has made his move.
    # Only under the special case where the move is the last move should the player observe reward before exiting.
    a = tictactoe.chooseAction(q, s, 0)
    s1 = tictactoe.takeAction(q.player, s, a)
    t += 1
    if s1.terminal():
      reward = tictactoe.observeReward(q.player, s1)
      rpe += math.pow(gamma, t) * reward
      break

    # Opponent make a move, and the resulting state is observed by player to calculate her reward.
    opponentAction = tictactoe.chooseAction(opponent, s1, 0)
    s2 = tictactoe.takeAction(opponent.player, s1, opponentAction)
    t += 1
    reward = tictactoe.observeReward(q.player, s2)
    rpe += math.pow(gamma, t) * reward

    s = s2
    if s.terminal():
      break

  return rpe
Пример #3
0
def run(user, opponent, opponentFirst):
  s = tictactoe.State()

  if opponentFirst:
    a = tictactoe.chooseAction(opponent, s, 0)
    s = tictactoe.takeAction(opponent.player, s, a)

  printBoard(s)
  while True:
    a = getUserAction(user, s)
    s = tictactoe.takeAction(user, s, a)
    printBoard(s)
    if s.terminal():
      break

    a = tictactoe.chooseAction(opponent, s, 0)
    s = tictactoe.takeAction(opponent.player, s, a)
    printBoard(s)
    if s.terminal():
      break

  printWinner(s, user)
Пример #4
0
def runEpisode(algo, q, epsilon, alpha, gamma):
  s = tictactoe.State()
  a = tictactoe.chooseAction(q[0], s, epsilon)
  s1 = tictactoe.takeAction(q[0].player, s, a)
  while True:
    # After the first player has made her move, let the second make his move, too.
    # The resulting state s2 is effectively the outcome of the action taken by the first player earlier.
    # From the first player's point of view, with
    #
    #   * the current state: "s"
    #   * the taken action: "a"
    #   * the new state: "s2"
    #
    # we can update her action-value function according to the algorithm.
    opponentAction = tictactoe.chooseAction(q[1], s1, epsilon)
    s2 = tictactoe.takeAction(q[1].player, s1, opponentAction)

    if algo == SARSA:
      SARSA(q[0], s, a, s2, epsilon, alpha, gamma)
    else:
      QLearning(q[0], s, a, s2, alpha, gamma)

    # Roll forward states and switch sides.
    s = s1
    s1 = s2
    a = opponentAction
    q[0], q[1] = q[1], q[0]

    # When the game ends, due to a time step lag, the player that made the last move has not observed the reward yet.
    # Let her observe the terminal state and update her action-value function before leaving.
    if s1.terminal():
      if algo == SARSA:
        SARSA(q[0], s, a, s1, epsilon, alpha, gamma)
      else:
        QLearning(q[0], s, a, s1, alpha, gamma)
      break