Пример #1
0
                print("番号を入力してください")
                print(enables)
                inp = input('>>>  ')
                action_t = int(inp)
                for i in enables:
                    if action_t == i:
                        flg = True
                        break

            env.update(action_t, 1)
        else:
            print("パス")

        if env.isEnd() == True: break

        print("*** AIターン● ***")
        env.print_screen()
        enables = env.get_enables(2)
        if len(enables) > 0:
            qvalue, action_t = agent.select_enable_action(env.screen, enables)
            print('>>>  {:}'.format(action_t))
            env.update(action_t, 2)
        else:
            print("パス")

    print("*** ゲーム終了 ***")
    if env.winner() == 1:
        print("あなたの勝ち! スコアは、{:}です。".format(env.get_score(1)))
    else:
        print("あなたの負け! AIのスコアは、{:}です。".format(env.get_score(2)))
Пример #2
0
                                if win == playerID[j]:
                                    # 勝ったら報酬1を得る
                                    reword = 1

                            players[j].store_experience(
                                state, targets, tr, reword, state_X, target_X,
                                end)
                            players[j].experience_replay()

                    # 行動を選択
                    action = players[i].select_action(state, targets,
                                                      players[i].exploration)
                    # 行動を実行
                    env.update(action, playerID[i])
                    # for log
                    loss = players[i].current_loss
                    Q_max, Q_action = players[i].select_enable_action(
                        state, targets)
                    print(
                        "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}"
                        .format(playerID[i], action, loss, Q_max))

                # 行動を実行した結果
                terminal = env.isEnd()

        w = env.winner()
        print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w))

    # 保存は後攻のplayer2 を保存する。
    players[1].save_model()
Пример #3
0
            for i in range(0, 2):
                state = env.screen  # 현재 보드판의 상태(배열)을 가져옴
                targets = env.getEnables(
                    playerID[i])  # 배열(플레이어가 둘 수 있는 위치를 가짐)을 가져옴

                if len(targets) > 0:  # 어딘가에 둘 수 있는 장소가있는 경우
                    # 행동을 선택
                    action = players[i].selectAction(state, targets,
                                                     1 / ((e / 10) + 1))

                    # 행동을 실행
                    env.doFlip([action // 8, action % 8], playerID[i])

                    # 종료 판정
                    win = env.winner()  # 현재 유리한 플레이어
                    end = env.isEnd()

                    # 다음 상태
                    state_X = env.screen
                    target_X = env.getEnables(playerID[i + 1])
                    if len(target_X) == 0:
                        target_X = env.getEnables(playerID[i])

                    reward = 0
                    if end == True:
                        reward = 1

                    players[i].storeExperience(state, targets, action, reward,
                                               state_X, target_X, end)