Exemplo n.º 1
0
def play(action_modes):
    """1ゲームの実行
    """
    # 3目並べの状態を保持するクラス"State"を初期化する。
    state = game.State()

    # ゲーム終了までループ。(Stateクラスのis_doneで確認)
    while (state.is_done() != True):
        # 行動の取得
        action_mode = action_modes[0] if state.is_first_player(
        ) else action_modes[1]
        action = ai.action(state, action_mode)

        # 行動を状態に反映させた次の状態に更新する。
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)
Exemplo n.º 2
0
def run(user, opponent, opponentFirst):
    s = tictactoe.State()

    if opponentFirst:
        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)

    printBoard(s)
    while True:
        a = getUserAction(user, s)
        s = tictactoe.takeAction(user, s, a)
        printBoard(s)
        if s.terminal():
            break

        a = tictactoe.chooseAction(opponent, s, 0)
        s = tictactoe.takeAction(opponent.player, s, a)
        printBoard(s)
        if s.terminal():
            break

    printWinner(s, user)
Exemplo n.º 3
0
def rewardPerEpisode(q, gamma):
  if q.player == tictactoe.PlayerCircle:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCross)
  else:
    opponent = tictactoe.ActionValueFunc(tictactoe.PlayerCircle)

  rpe = 0.0 # reward per episode
  t = 0 # time step
  s = tictactoe.State()

  # Randomly determine whether the player or her opponent should move first.
  if random.random() < 0.5:
    a = tictactoe.chooseAction(opponent, s, 0)
    s = tictactoe.takeAction(opponent.player, s, a)
    t += 1

  while True:
    # Player makes a move and defers observing the reward until her opponent has made his move.
    # Only under the special case where the move is the last move should the player observe reward before exiting.
    a = tictactoe.chooseAction(q, s, 0)
    s1 = tictactoe.takeAction(q.player, s, a)
    t += 1
    if s1.terminal():
      reward = tictactoe.observeReward(q.player, s1)
      rpe += math.pow(gamma, t) * reward
      break

    # Opponent make a move, and the resulting state is observed by player to calculate her reward.
    opponentAction = tictactoe.chooseAction(opponent, s1, 0)
    s2 = tictactoe.takeAction(opponent.player, s1, opponentAction)
    t += 1
    reward = tictactoe.observeReward(q.player, s2)
    rpe += math.pow(gamma, t) * reward

    s = s2
    if s.terminal():
      break

  return rpe
Exemplo n.º 4
0
def runEpisode(algo, q, epsilon, alpha, gamma):
  s = tictactoe.State()
  a = tictactoe.chooseAction(q[0], s, epsilon)
  s1 = tictactoe.takeAction(q[0].player, s, a)
  while True:
    # After the first player has made her move, let the second make his move, too.
    # The resulting state s2 is effectively the outcome of the action taken by the first player earlier.
    # From the first player's point of view, with
    #
    #   * the current state: "s"
    #   * the taken action: "a"
    #   * the new state: "s2"
    #
    # we can update her action-value function according to the algorithm.
    opponentAction = tictactoe.chooseAction(q[1], s1, epsilon)
    s2 = tictactoe.takeAction(q[1].player, s1, opponentAction)

    if algo == SARSA:
      SARSA(q[0], s, a, s2, epsilon, alpha, gamma)
    else:
      QLearning(q[0], s, a, s2, alpha, gamma)

    # Roll forward states and switch sides.
    s = s1
    s1 = s2
    a = opponentAction
    q[0], q[1] = q[1], q[0]

    # When the game ends, due to a time step lag, the player that made the last move has not observed the reward yet.
    # Let her observe the terminal state and update her action-value function before leaving.
    if s1.terminal():
      if algo == SARSA:
        SARSA(q[0], s, a, s1, epsilon, alpha, gamma)
      else:
        QLearning(q[0], s, a, s1, alpha, gamma)
      break
Exemplo n.º 5
0
    # else:
    # 	raise game.Error("Please select a valid player.")

    P1 = utic.OurPlayer(1)
    P2 = AIPlayer(2)

    # if not args.p2 or args.p2 == 'RP':
    # 	P2 = utic.RandomPlayer(2)
    # elif args.p2 == 'AP':
    # 	P2 = AIPlayer(2)
    # elif args.p2 == 'OP':
    # 	P2 = utic.OurPlayer(2)
    # else:
    # 	raise game.Error("Please select a valid player.")

    State = utic.State([P1, P2], 2)
    # Change the third argument to True to print the gamestate after every move.
    # Change the fourth argument to True to wait for keyboard input to move to the next state.
    # Press enter to advance the game by two moves.
    Game = utic.TicTacToeGame(State, [P1, P2], False, False)

    Game.run()
    Game.genScore()
    print Game.score
    moves = ""
    for i in xrange(len(Game.State.moves) - 1):
        moves += str(Game.State.moves[i] + ",")
    moves += str(Game.State.moves[len(Game.State.moves) - 1])

    ## I'm assuming a table Id(int),IfWon(bool),IfTie(bool),Moves(string),Result(string),Score(int)
    print Game.State.winner
Exemplo n.º 6
0
    def test_first_column_player_one_wins(self):
        s = tictactoe.State([[1, 1, None], [1, 1, None], [1, None, None]], 1)

        self.assertEqual(tictactoe.did_win(s), True)
Exemplo n.º 7
0
    def test_empty_board_noone_wins(self):
        s = tictactoe.State(
            [[None, None, None], [None, None, None], [None, None, None]], 1)

        self.assertEqual(tictactoe.did_win(s), False)
Exemplo n.º 8
0
    def test_noone_wins(self):
        s = tictactoe.State([[2, 1, None], [1, 2, None], [2, None, None]], 1)

        self.assertEqual(tictactoe.did_win(s), False)
Exemplo n.º 9
0
    def test_top_diagonal_player_zero_wins(self):
        s = tictactoe.State([[2, 1, None], [1, 2, None], [2, None, 2]], 1)

        self.assertEqual(tictactoe.did_win(s), True)
Exemplo n.º 10
0
    def test_stalemate(self):
        s = tictactoe.State([[1, 1, 2], [2, 1, 1], [1, 2, 2]], 1)

        self.assertEqual(tictactoe.did_win(s), False)
Exemplo n.º 11
0
            elif temp[1]:
                state.data[i] = 2
        return state

    def state_size(self):
        return 2 * (self.dim * self.dim)


if __name__ == "__main__":
    DIM = 4
    MARK = "O"
    actions = []
    for pos in xrange(DIM**2):
        actions.append(tictactoe.Action(pos, MARK))

    opp = tictactoe.RandomPlayer("X")
    state_parser = TicTacToeStateParser(DIM)
    world = TicTacToeWorld(DIM, MARK, opp, actions, state_parser)
    rl = DQN(world, state0=tictactoe.State(DIM))

    rl.buffer_size = 10000
    rl.batch_size = 100
    rl.clone_network_steps = 50

    try:
        rl.train(4000)
    finally:
        print "SAVING FILES...",
        rl.save_data("graph/dqn_ttt_" + str(DIM))
        print "DONE"
Exemplo n.º 12
0
    def load(self, n):
        i = self.dim**2 - 1
        state = tictactoe.State(self.dim)
        while n != 0:
            d = 3**i
            state.data[i] = n // d
            n = n % d
            i -= 1
        return state


if __name__ == "__main__":
    DIM = 4
    MARK = "O"
    actions = []
    for pos in xrange(DIM**2):
        actions.append(tictactoe.Action(pos, MARK))

    opp = tictactoe.RandomPlayer("X")
    state_parser = TicTacToeStateParser(DIM)
    world = TicTacToeWorld(DIM, MARK, opp, actions, state_parser)
    qlearning = QLearning(world, state0=tictactoe.State(DIM))

    try:
        qlearning.train(4000)
    finally:
        print "SAVING FILES...",
        qlearning.save_data("graph/ql_ttt_" + str(DIM))
        print "DONE"