Exemplos de State em Python, exemplos de tictactoe.State em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main.py Projeto: kentokura/TicTacToe_py

def play(action_modes):
    """1ゲームの実行
    """
    # 3目並べの状態を保持するクラス"State"を初期化する。
    state = game.State()

    # ゲーム終了までループ。（Stateクラスのis_doneで確認）
    while (state.is_done() != True):
        # 行動の取得
        action_mode = action_modes[0] if state.is_first_player(
        ) else action_modes[1]
        action = ai.action(state, action_mode)

        # 行動を状態に反映させた次の状態に更新する。
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)

Exemplo n.º 2

0

Exibir arquivo

def run(user, opponent, opponentFirst):
    s = tictactoe.State()

    if opponentFirst:
        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)

    printBoard(s)
    while True:
        a = getUserAction(user, s)
        s = tictactoe.takeAction(user, s, a)
        printBoard(s)
        if s.terminal():
            break

        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)
        printBoard(s)
        if s.terminal():
            break

    printWinner(s, user)

Exemplo n.º 3

0

Exibir arquivo

def rewardPerEpisode(q, gamma):
  if q.player == tictactoe.PlayerCircle:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCross)
  else:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCircle)

  rpe = 0.0 # reward per episode
  t = 0 # time step
  s = tictactoe.State()

  # Randomly determine whether the player or her opponent should move first.
  if random.random() < 0.5:
    a = tictactoe.chooseAction(opponent, s, 0)
    s = tictactoe.takeAction(opponent.player, s, a)
    t += 1

  while True:
    # Player makes a move and defers observing the reward until her opponent has made his move.
    # Only under the special case where the move is the last move should the player observe reward before exiting.
    a = tictactoe.chooseAction(q, s, 0)
    s1 = tictactoe.takeAction(q.player, s, a)
    t += 1
    if s1.terminal():
      reward = tictactoe.observeReward(q.player, s1)
      rpe += math.pow(gamma, t) * reward
      break

    # Opponent make a move, and the resulting state is observed by player to calculate her reward.
    opponentAction = tictactoe.chooseAction(opponent, s1, 0)
    s2 = tictactoe.takeAction(opponent.player, s1, opponentAction)
    t += 1
    reward = tictactoe.observeReward(q.player, s2)
    rpe += math.pow(gamma, t) * reward

    s = s2
    if s.terminal():
      break

  return rpe

Exemplo n.º 4

0

Exibir arquivo

def runEpisode(algo, q, epsilon, alpha, gamma):
  s = tictactoe.State()
  a = tictactoe.chooseAction(q[0], s, epsilon)
  s1 = tictactoe.takeAction(q[0].player, s, a)
  while True:
    # After the first player has made her move, let the second make his move, too.
    # The resulting state s2 is effectively the outcome of the action taken by the first player earlier.
    # From the first player's point of view, with
    #
    #   * the current state: "s"
    #   * the taken action: "a"
    #   * the new state: "s2"
    #
    # we can update her action-value function according to the algorithm.
    opponentAction = tictactoe.chooseAction(q[1], s1, epsilon)
    s2 = tictactoe.takeAction(q[1].player, s1, opponentAction)

    if algo == SARSA:
      SARSA(q[0], s, a, s2, epsilon, alpha, gamma)
    else:
      QLearning(q[0], s, a, s2, alpha, gamma)

    # Roll forward states and switch sides.
    s = s1
    s1 = s2
    a = opponentAction
    q[0], q[1] = q[1], q[0]

    # When the game ends, due to a time step lag, the player that made the last move has not observed the reward yet.
    # Let her observe the terminal state and update her action-value function before leaving.
    if s1.terminal():
      if algo == SARSA:
        SARSA(q[0], s, a, s1, epsilon, alpha, gamma)
      else:
        QLearning(q[0], s, a, s1, alpha, gamma)
      break

Exemplo n.º 5

0

Exibir arquivo

    # else:
    # 	raise game.Error("Please select a valid player.")

    P1 = utic.OurPlayer(1)
    P2 = AIPlayer(2)

    # if not args.p2 or args.p2 == 'RP':
    # 	P2 = utic.RandomPlayer(2)
    # elif args.p2 == 'AP':
    # 	P2 = AIPlayer(2)
    # elif args.p2 == 'OP':
    # 	P2 = utic.OurPlayer(2)
    # else:
    # 	raise game.Error("Please select a valid player.")

    State = utic.State([P1, P2], 2)
    # Change the third argument to True to print the gamestate after every move.
    # Change the fourth argument to True to wait for keyboard input to move to the next state.
    # Press enter to advance the game by two moves.
    Game = utic.TicTacToeGame(State, [P1, P2], False, False)

    Game.run()
    Game.genScore()
    print Game.score
    moves = ""
    for i in xrange(len(Game.State.moves) - 1):
        moves += str(Game.State.moves[i] + ",")
    moves += str(Game.State.moves[len(Game.State.moves) - 1])

    ## I'm assuming a table Id(int),IfWon(bool),IfTie(bool),Moves(string),Result(string),Score(int)
    print Game.State.winner

Exemplo n.º 6

0

Exibir arquivo