print("*** userターン○ ***") env.print_screen() enables = env.get_enables(1) if len(enables) > 0: flg = False while not flg: print("番号を入力してください") print(enables) inp = input('>>> ') action_t = int(inp) for i in enables: if action_t == i: flg = True break env.update(action_t, 1) else: print("パス") if env.isEnd() == True: break print("*** AIターン● ***") env.print_screen() enables = env.get_enables(2) if len(enables) > 0: qvalue, action_t = agent.select_enable_action(env.screen, enables) print('>>> {:}'.format(action_t)) env.update(action_t, 2) else: print("パス")
reword = 0 if end == True: if win == playerID[j]: # 勝ったら報酬1を得る reword = 1 players[j].store_experience( state, targets, tr, reword, state_X, target_X, end) players[j].experience_replay() # 行動を選択 action = players[i].select_action(state, targets, players[i].exploration) # 行動を実行 env.update(action, playerID[i]) # for log loss = players[i].current_loss Q_max, Q_action = players[i].select_enable_action( state, targets) print( "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}" .format(playerID[i], action, loss, Q_max)) # 行動を実行した結果 terminal = env.isEnd() w = env.winner() print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w)) # 保存は後攻のplayer2 を保存する。
from Reversi import Reversi game = Reversi(size=800, gameName='Toby\'s Game') while True: if (not game.update()): break