Пример #1
0
        print("*** userターン○ ***")
        env.print_screen()
        enables = env.get_enables(1)
        if len(enables) > 0:
            flg = False
            while not flg:
                print("番号を入力してください")
                print(enables)
                inp = input('>>>  ')
                action_t = int(inp)
                for i in enables:
                    if action_t == i:
                        flg = True
                        break

            env.update(action_t, 1)
        else:
            print("パス")

        if env.isEnd() == True: break

        print("*** AIターン● ***")
        env.print_screen()
        enables = env.get_enables(2)
        if len(enables) > 0:
            qvalue, action_t = agent.select_enable_action(env.screen, enables)
            print('>>>  {:}'.format(action_t))
            env.update(action_t, 2)
        else:
            print("パス")
Пример #2
0
                            reword = 0
                            if end == True:
                                if win == playerID[j]:
                                    # 勝ったら報酬1を得る
                                    reword = 1

                            players[j].store_experience(
                                state, targets, tr, reword, state_X, target_X,
                                end)
                            players[j].experience_replay()

                    # 行動を選択
                    action = players[i].select_action(state, targets,
                                                      players[i].exploration)
                    # 行動を実行
                    env.update(action, playerID[i])
                    # for log
                    loss = players[i].current_loss
                    Q_max, Q_action = players[i].select_enable_action(
                        state, targets)
                    print(
                        "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}"
                        .format(playerID[i], action, loss, Q_max))

                # 行動を実行した結果
                terminal = env.isEnd()

        w = env.winner()
        print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w))

    # 保存は後攻のplayer2 を保存する。
Пример #3
0
from Reversi import Reversi
game = Reversi(size=800, gameName='Toby\'s Game')

while True:
    if (not game.update()):
        break