def play(model): # 学習データ history = [] # 状態の生成 state = State() while True: # ゲーム終了時 if state.is_done(): break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([state.pieces_array(), policies, None]) # 行動の取得 action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model): history = [] state = State() while True: if state.is_done(): break scores = pv_mcts_scores(model, state, SP_TEMPERATURE) policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([state.pieces_array(), policies, None]) action = np.random.choice(state.legal_actions(), p=scores) state = state.next(action) value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model): # 학습 데이터 history = [] # 상태 생성 state = State() while True: # 게임 종료 시 if state.is_done(): break # 합법적인 수의 확률 분포 얻기 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 학습 데이터에 상태와 정책 추가 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 행동 얻기 action = np.random.choice(state.legal_actions(), p=scores) # 다음 상태 얻기 state = state.next(action) # 학습 데이터에 가치 추가 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model, using_saved_state=False, saving_ontheway_state=False): ''' 1ゲームの実行 ''' # 学習データ history = [] # 状態の生成 if using_saved_state: state = load_state() if not state: state = State() else: state = State() starttime = time.time() print('') while True: # ゲーム終了時 if state.is_done(): endtime = time.time() print("first player is ", "lose" if state.is_lose() else "win") print("first player num:", state.piece_count(state.pieces)) print('elapsed time', endtime - starttime) print(state) break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 行動の取得 if len(history) % 10 == 0: print("state len: ", len(history)) print(state) if saving_ontheway_state and len(history) == 25: save_state(state) action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model): # 学習データ history = [] # 状態の生成 state = State() while True: # ゲーム終了時 if state.is_done(): break # 合法手の確率分布の取得 scores, values = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy # 行動の取得 action = np.random.choice(state.legal_actions(), p=scores) # state, policy, value, 探索結果, 選ばれた手、それから先の局面 history.append([[state.pieces, state.enemy_pieces], policies, None, values, action, None]) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value # 最後の局面情報を取っておく last_state = history[-1][0] last_policy = [0] * DN_OUTPUT_SIZE v0 = history[0][2] v1 = history[1][2] for i in range(len(history)): rp = [] for inc in range(3): index = i + inc if index < len(history): rp.append(history[i + inc]) else: v = v0 if ((i + inc) % 2) == 0 else v1 a = randint(9) rp.append([last_state, last_policy, v, v, a, None]) history[i][5] = rp return history
def turn_of_human(self, touch): global state # ゲーム終了時 if state.is_done(): state = State() self.reset() return # 先手でない時 if not state.is_first_player(): return # クリック位置を行動に変換 x = int(touch.pos[0] / 160) y = int(touch.pos[1] / 160) action = x + y * 3 if x < 0 or 2 < x or y < 0 or 2 < y: # 範囲外 return # 合法手でない時 if not (action in state.legal_actions()): return # 次の状態の取得 state = state.next(action) # 丸追加 self.draw_piece(action) # AIのターン self.turn_of_ai()
def predict(model: tf.keras.models.Model, state: State): x = np.array([state.pieces, state.enemy_pieces]) x = x.reshape([DN_INPUT_SHAPE[-1]] + DN_INPUT_SHAPE[:-1]).transpose( 1, 2, 0).reshape([1] + DN_INPUT_SHAPE) y = model.predict(x, batch_size=1) policies = y[0][0][list(state.legal_actions())] policies /= sum(policies) if sum(policies) > 0 else 1 value = y[1][0][0] return policies, value
def play(model): # 학습 데이터 history = [] # 상태 생성 state = State() while True: # 게임 종료 시 if state.is_done(): break # 합법적인 수의 확률 분포 얻기 # (모델, 게임 상태, 온도파라미터:변동성을주기위해사용하는변수) # 각 노드의 점수가 계산 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 학습 데이터에 상태와 정책 추가 policies = [0] * DN_OUTPUT_SIZE # 행동수 :7 # 돌을 놓을수 있는 후보지, 점수를 넣어서 for action, policy in zip(state.legal_actions(), scores): # 행동과 정책을 세팅 # 어떤 열에 정책 세팅 policies[action] = policy # 내역을 기록 ( [내돌상태, 적돌상태], 정책, None(점수)) history.append([[state.pieces, state.enemy_pieces], policies, None]) # 행동 얻기 action = np.random.choice(state.legal_actions(), p=scores) # 다음 상태 얻기 state = state.next(action) # 학습 데이터에 가치 추가 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def play(model): history = [] state = State() while True: if state.is_done(): break scores = pv_mcts_scores(model, state, SP_TEMPERATURE) with open('action_list.txt', 'rb') as f: action_list = pickle.load(f) # print('action_list:', len(action_list)) policies = np.zeros(len(action_list)) # for action_num, policy in zip(state.legal_actions(), scores): # policies[action_num] = policy # print('size check', len(policies), len(scores)) legal_actions = state.legal_actions() for i in range(len(legal_actions)): policies[legal_actions[i]] = scores[i] # print(policies) # print('policies:', policies) history.append([[state.pieces, state.enemy_pieces], policies, None]) # action_list_num = np.arange(len(action_list)) # action_num = np.random.choice(action_list_num, p=scores) action_num = np.random.choice(legal_actions, p=scores) # print(action_num) state.next(action_num) value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def turn_of_human(self, touch): global state # ゲーム終了時 if state.is_done(): state = State() self.reset() return # 先手でない時 if not state.is_first_player(): return # クリック位置を行動に変換 x = int(touch.pos[0] / 80) y = int(touch.pos[1] / 80) action = x + y * 6 if x < 0 or 5 < x or y < 0 or 5 < y: # 範囲外 return # 合法手でない時 legal_actions = state.legal_actions() if legal_actions == [36]: action = 36 # パス if action != 36 and not (action in legal_actions): return # 次の状態の取得 state = state.next(action) # 丸追加 self.draw_piece() sleep(1) # AIのターン self.turn_of_ai()
# obs = convert_state_to_obs(state) # else: # pass if __name__ == "__main__": os.environ["OMP_NUM_THREADS"] = "1" with open("config.yaml") as f: args = yaml.safe_load(f) # print(args) # ここに実験用のコードを書く state = State() while True: print(state.legal_actions()) state = state.next(random_action(state)) # path = "models/10000.pth" # EvalHandyRL(100, path) # policies = obs_to_policy_to_use_game(agent, obs, state) # print(policies) # convert_state_to_obs(state) # test_predict() # test_cigeister() # 方策を持ってくる
class GameUI(tk.Frame): # 초기화 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('간이 장기') # 게임 상태 생성 self.state = State() self.select = -1 # 선택(-1: 없음, 0~11: 매스, 12~14: 획득한 말) # 방향 정수 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTS를 활용한 행동 선택을 수행하는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 이미지 준비 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # 캔버스 생성 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw() # 사람의 턴 def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 획득한 말의 종류 얻기 captures = [] for i in range(3): if self.state.pieces[12 + i] >= 2: captures.append(1 + i) if self.state.pieces[12 + i] >= 1: captures.append(1 + i) # 말 선택과 이동 위치 계산(0~11: 매스. 12~13: 획득한 말) p = int(event.x / 80) + int((event.y - 40) / 80) * 3 if 40 <= event.y and event.y <= 360: select = p elif event.x < len(captures) * 40 and event.y > 360: select = 12 + int(event.x / 40) else: return # 말 선택 if self.select < 0: self.select = select self.on_draw() return # 말 선택과 이동을 행동으로 변환 action = -1 if select < 12: # 말 이동 시 if self.select < 12: action = self.state.position_to_action(p, self.position_to_direction(self.select, p)) # 획득한 말 배치 시 else: action = self.state.position_to_action(p, 8 - 1 + captures[self.select - 12]) # 합법적인 수가 아닌 경우 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 다음 상태 얻기 self.state = self.state.next(action) self.select = -1 self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai) # AI의 턴 def turn_of_ai(self): # 게임 종료 시 if self.state.is_done(): return # 행동 얻기 action = self.next_action(self.state) # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # 말의 이동 대상 위치를 말의 이동 방향으로 변환 def position_to_direction(self, position_src, position_dst): dx = position_dst % 3 - position_src % 3 dy = int(position_dst / 3) - int(position_src / 3) for i in range(8): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return 0 # 말 그리기 def draw_piece(self, index, first_player, piece_type): x = (index % 3) * 80 y = int(index / 3) * 80 + 40 index = 0 if first_player else 1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW) # 획득한 말 그리기 def draw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0) captures = [] for i in range(3): if pieces[12 + i] >= 2: captures.append(1 + i) if pieces[12 + i] >= 1: captures.append(1 + i) for i in range(len(captures)): self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW) # 커서 그리기 def draw_cursor(self, x, y, size): self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') # 화면 갱신 def on_draw(self): # 매스 눈금 self.c.delete('all') self.c.create_rectangle(0, 0, 240, 400, width=0.0, fill='#EDAA56') for i in range(1, 3): self.c.create_line(i * 80 + 1, 40, i * 80, 360, width=2.0, fill='#000000') for i in range(5): self.c.create_line(0, 40 + i * 80, 240, 40 + i * 80, width=2.0, fill='#000000') # 말 for p in range(12): p0, p1 = (p, 11 - p) if self.state.is_first_player() else (11 - p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1]) # 획득한 말 self.draw_capture(self.state.is_first_player(), self.state.pieces) self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces) # 선택 커서 if 0 <= self.select and self.select < 12: self.draw_cursor(int(self.select % 3) * 80, int(self.select / 3) * 80 + 40, 80) elif 12 <= self.select: self.draw_cursor((self.select - 12) * 40, 360, 40)
class GameUI(tk.Frame): def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title("三目並べ") self.state = State() self.next_action = mini_max_action self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind("<Button-1>", self.turn_of_human) self.c.pack() self.on_draw() def turn_of_human(self, event): if self.state.is_done(): self.state = State() self.on_draw() return if not self.state.is_first_player(): return x = int(event.x / 80) y = int(event.y / 80) if x < 0 or 2 < x or y < 0 or 2 < y: return action = x + y * 3 if not (action in self.state.legal_actions()): return self.state = self.state.next(action) self.on_draw() self.master.after(1, self.turn_of_ai) def turn_of_ai(self): if self.state.is_done(): return action = self.next_action(self.state) self.state = self.state.next(action) self.on_draw() def draw_piece(self, index, first_player): x = (index % 3) * 80 + 10 y = int(index / 3) * 80 + 10 if first_player: self.c.create_oval(x, y, x + 60, y + 60, width=2.0, outline="#FFFFFF") else: self.c.create_line(x, y, x + 60, y + 60, width=2.0, fill="#5D5D5D") self.c.create_line(x + 60, y, x, y + 60, width=2.0, fill="#5D5D5D") def on_draw(self): self.c.delete("all") self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill="#00A0FF") self.c.create_line(80, 0, 80, 240, width=2.0, fill="#0077BB") self.c.create_line(160, 0, 160, 240, width=2.0, fill="#0077BB") self.c.create_line(0, 80, 240, 80, width=2.0, fill="#0077BB") self.c.create_line(0, 160, 240, 160, width=2.0, fill="#0077BB") for i in range(9): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 초기화 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('오셀로') # 게임 상태 생성 self.state = State() # PV MCTS를 활용한 행동을 선택하는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 캔버스 생성 self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw() # 사람의 턴 def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 클릭 위치를 행동으로 변환 x = int(event.x / 40) y = int(event.y / 40) if x < 0 or 5 < x or y < 0 or 5 < y: # 범위 외 return action = x + y * 6 # 합법적인 수가 아닌 경우 legal_actions = self.state.legal_actions() if legal_actions == [36]: action = 36 # 패스 if action != 36 and not (action in legal_actions): return # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai) # AI의 턴 def turn_of_ai(self): # 게임 종료 시 if self.state.is_done(): return # 행동 얻기 action = self.next_action(self.state) # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # 돌 그리기 def draw_piece(self, index, first_player): x = (index % 6) * 40 + 5 y = int(index / 6) * 40 + 5 if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#C2272D') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#FFFFFF') # 화면 갱신 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill='#C69C6C') for i in range(1, 8): self.c.create_line(0, i * 40, 240, i * 40, width=1.0, fill='#000000') self.c.create_line(i * 40, 0, i * 40, 240, width=1.0, fill='#000000') for i in range(36): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 初期化 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') # ゲーム状態の生成 self.state = State() self.select = -1 # 選択(-1:なし, 0~11:マス, 12~14:持ち駒) # 方向定数 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # イメージの準備 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # 持ち駒の種類の取得 captures = [] for i in range(3): if self.state.pieces[12 + i] >= 2: captures.append(1 + i) if self.state.pieces[12 + i] >= 1: captures.append(1 + i) # 駒の選択と移動の位置の計算(0?11:マス, 12?14:持ち駒) p = int(event.x / 80) + int((event.y - 40) / 80) * 3 if 40 <= event.y and event.y <= 360: select = p elif event.x < len(captures) * 40 and event.y > 360: select = 12 + int(event.x / 40) else: return # 駒の選択 if self.select < 0: self.select = select self.on_draw() return # 駒の選択と移動を行動に変換 action = -1 if select < 12: # 駒の移動時 if self.select < 12: action = self.state.position_to_action(p, self.position_to_direction(self.select, p)) # 持ち駒の配置時 else: action = self.state.position_to_action(p, 8 - 1 + captures[self.select - 12]) # 合法手でない時 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 次の状態の取得 self.state = self.state.next(action) self.select = -1 self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): return # 行動の取得 action = self.next_action(self.state) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 駒の移動先を駒の移動方向に変換 def position_to_direction(self, position_src, position_dst): dx = position_dst % 3 - position_src % 3 dy = int(position_dst / 3) - int(position_src / 3) for i in range(8): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return 0 # 駒の描画 def draw_piece(self, index, first_player, piece_type): x = (index % 3) * 80 y = int(index / 3) * 80 + 40 index = 0 if first_player else 1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW) # 持ち駒の描画 def draw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0) captures = [] for i in range(3): if pieces[12 + i] >= 2: captures.append(1 + i) if pieces[12 + i] >= 1: captures.append(1 + i) for i in range(len(captures)): self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW) # カーソルの描画 def draw_cursor(self, x, y, size): self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') # 描画の更新 def on_draw(self): # マス目 self.c.delete('all') self.c.create_rectangle(0, 0, 240, 400, width=0.0, fill='#EDAA56') for i in range(1, 3): self.c.create_line(i * 80 + 1, 40, i * 80, 360, width=2.0, fill='#000000') for i in range(5): self.c.create_line(0, 40 + i * 80, 240, 40 + i * 80, width=2.0, fill='#000000') # 駒 for p in range(12): p0, p1 = (p, 11 - p) if self.state.is_first_player() else (11 - p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1]) # 持ち駒 self.draw_capture(self.state.is_first_player(), self.state.pieces) self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces) # 選択カーソル if 0 <= self.select and self.select < 12: self.draw_cursor(int(self.select % 3) * 80, int(self.select / 3) * 80 + 40, 80) elif 12 <= self.select: self.draw_cursor((self.select - 12) * 40, 360, 40) # ゲームUIの実行 f = GameUI(model=model) f.pack() f.mainloop()
class GameUI(tk.Frame): # コンストラクタ def __init__(self, master=None, model=None): # Frameクラスを継承し、Frameクラスの初期処理を実行 tk.Frame.__init__(self, master) self.master.title("グラフィックの描画") # 状態クラスからインスタンスを設定 self.state = State() # AIの行動関数(方策)を指定 self.next_action = pv_mcts_action(model, 0.0) # 盤面の大きさを設定 self.c = tk.Canvas(self, width=675, height=675, highlightthickness=0) # 左クリック操作を指定 self.c.bind("<Button-1>", self.turn_of_human) # 描画 self.c.pack() self.on_draw() def turn_of_human(self, event): # 終局時には描画のみした上でNoneを返す処理 if self.state.is_done(): self.state = State() self.on_draw() return # 先手じゃない場合はNoneを返す処理 if not self.state.is_first_player(): return # クリック位置を取得 x = int(event.x / 45) y = int(event.y / 45) # 画面外ならNoneを返す処理 # print(x, y) if x < 0 or 15 < x or y < 0 or 15 < y: return # クリックした位置に基づき行動(どこに打ったか)を取得 action = x + y * 15 print(action) # 選択した行動がルールに則ったものかチェック if not (action in self.state.legal_actions()): return # ある行動を取った後の状態を取得し、元の状態を更新・描画 self.state = self.state.next(action) self.on_draw() # 描画処理を待つため1ミリ秒待機した後に後手に順番を移動 self.master.after(1, self.turn_of_ai) def turn_of_ai(self): # 終局チェック if self.state.is_done(): return # 状態に応じて次の行動を取得 action = self.next_action(self.state) print(action) # 行動に応じて次の状態を取得し、元の状態を更新・描画 self.state = self.state.next(action) self.on_draw() def draw_piece(self, index, first_player): x = (index % 15) * 45 + 10 y = int(index / 15) * 45 + 10 if first_player: self.c.create_oval(x, y, x + 25, y + 25, width=0.0, fill="#333333") else: self.c.create_oval(x, y, x + 25, y + 25, width=0.0, fill="#FFFFFF") def on_draw(self): # 盤面の描画初期化 self.c.delete("all") # 盤面の格子を描画 self.c.create_rectangle(0, 0, 675, 675, width=0.0, fill="#DEB887") for r in range(15): self.c.create_line(0, r * 45, 720, r * 45, width=1.0, fill="#333333") for c in range(15): self.c.create_line(c * 45, 0, c * 45, 720, width=1.0, fill="#333333") self.c.create_text(337.5, 337.5, text="真ん中", font="courier 10", anchor=tk.CENTER) # 現在の状況から石の配置を描画 for i in range(225): if self.state.pieces[i] == 1: # print(self.state.pieces) self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 초기화 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('컨넥트4') # 게임 상태 생성 self.state = State() # PV MCTS를 활용한 행동 선택을 따르는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 캔버스 생성 self.c = tk.Canvas(self, width=280, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw() # 사람의 턴 def turn_of_human(self, event): # 게임 종료 시 if self.state.is_done(): self.state = State() self.on_draw() return # 선 수가 아닌 경우 if not self.state.is_first_player(): return # 클릭 위치를 행동으로 변환 x = int(event.x / 40) if x < 0 or 6 < x: # 범위 외 return action = x # 합법적인 수가 아닌 경우 if not (action in self.state.legal_actions()): return # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # AI의 턴 self.master.after(1, self.turn_of_ai) # AI의 턴 def turn_of_ai(self): # 게임 종료 시 if self.state.is_done(): return # 행동 얻기 action = self.next_action(self.state) # 다음 상태 얻기 self.state = self.state.next(action) self.on_draw() # 돌 그리기 def draw_piece(self, index, first_player): x = (index % 7) * 40 + 5 y = int(index / 7) * 40 + 5 if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FF0000') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FFFF00') # 화면 갱신 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 280, 240, width=0.0, fill='#00A0FF') for i in range(42): x = (i % 7) * 40 + 5 y = int(i / 7) * 40 + 5 self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FFFFFF') for i in range(42): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 初期化 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('三目並べ') # ゲーム状態の生成 self.state = State() # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # キャンパスの生成 self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # クリック位置を行動に変換 x = int(event.x / 80) y = int(event.y / 80) if x < 0 or 2 < x or y < 0 or 2 < y: # 範囲外 return action = x + y * 3 # 合法手でない時 if not (action in self.state.legal_actions()): return # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): return # 行動の取得 action = self.next_action(self.state) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 石の描画 def draw_piece(self, index, first_player): x = (index % 3) * 80 + 10 y = int(index / 3) * 80 + 10 if first_player: self.c.create_oval(x, y, x + 60, y + 60, width=2.0, outline='#FFFFFF') else: self.c.create_line(x, y, x + 60, y + 60, width=2.0, fill='#5D5D5D') self.c.create_line(x + 60, y, x, y + 60, width=2.0, fill='#5D5D5D') # 描画の更新 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill='#00A0FF') self.c.create_line(80, 0, 80, 240, width=2.0, fill='#0077BB') self.c.create_line(160, 0, 160, 240, width=2.0, fill='#0077BB') self.c.create_line(0, 80, 240, 80, width=2.0, fill='#0077BB') self.c.create_line(0, 160, 240, 160, width=2.0, fill='#0077BB') for i in range(9): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): ''' ゲームUIの定義 ''' # 初期化 def __init__(self, master=None, model=None, ai_is_first=True): self.ai_is_first = ai_is_first tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() self.prev_state = None # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # self.next_action = mcs_action # キャンバスの生成 self.c = tk.Canvas(self, width=BOARD_SIZE * 40 + 40, height=BOARD_SIZE * 40, highlightthickness=0) # 後手の場合 if self.ai_is_first: self.turn_of_ai() self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): print("first player is ", "lose" if self.state.is_lose() else "win") self.state = State() self.prev_state = None self.on_draw() return # 手番をチェック is_human_turn = None if self.ai_is_first: is_human_turn = not self.state.is_first_player() else: is_human_turn = self.state.is_first_player() if not is_human_turn: return # クリック位置を行動に変換 x = int(event.x / 40) y = int(event.y / 40) is_back = x > BOARD_SIZE - 1 print("x y", x, y) if is_back and self.prev_state: print("check modoru") print("") self.state = self.prev_state self.prev_state = None self.on_draw() return if x < 0 or (BOARD_SIZE - 1) < x or y < 0 or (BOARD_SIZE - 1) < y: # 範囲外 print("範囲外") return action = x + y * BOARD_SIZE print("human", action, get_coodicate(action)) # 合法手でない時 legal_actions = self.state.legal_actions() if legal_actions == [ALL_PIECES_NUM]: action = ALL_PIECES_NUM # パス if action != ALL_PIECES_NUM and not (action in legal_actions): return # 次の状態の取得 self.prev_state = self.state # 現在の状態を保存 self.state = self.state.next(action) print("check2") self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): print("first player is ", "lose" if self.state.is_lose() else "win") return # 行動の取得 action = self.next_action(self.state) print(action, get_coodicate(action)) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 石の描画 def draw_piece(self, index, first_player): x = (index % BOARD_SIZE) * 40 + (BOARD_SIZE - 1) y = int(index / BOARD_SIZE) * 40 + (BOARD_SIZE - 1) if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#000000') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#FFFFFF') # 描画の更新 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, BOARD_SIZE * 40, BOARD_SIZE * 40, width=0.0, fill='#C69C6C') for i in range(1, BOARD_SIZE + 2 + 1): self.c.create_line(0, i * 40, BOARD_SIZE * 40, i * 40, width=1.0, fill='#000000') self.c.create_line(i * 40, 0, i * 40, BOARD_SIZE * 40, width=1.0, fill='#000000') for i in range(ALL_PIECES_NUM): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 初期化 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') # ゲーム状態の生成 self.state = State() self.select = -1 # 選択(-1:なし, 0~11:マス, 12~14:持ち駒) # 方向定数 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1), (1, -2), (-1, -2), (1, -1), (2, -2), (3, -3), (4, -4), (5, -5), (6, -6), (7, -7), (8, -8), (-1, 1), (-2, 2), (-3, 3), (-4, 4), (-5, 5), (-6, 6), (-7, 7), (-8, 8), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5), (-6, -6), (-7, -7), (-8, -8), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (-1, 0), (-2, 0), (-3, 0), (-4, 0), (-5, 0), (-6, 0), (-7, 0), (-8, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, -1), (0, -2), (0, -3), (0, -4), (0, -5), (0, -6), (0, -7), (0, -8)) #self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # イメージの準備 self.images = [(None, None, None, None)] for i in range(1, 19): image = Image.open('koma_gif/piece{}.gif'.format(i)) self.images.append( (ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # キャンバスの生成 self.c = tk.Canvas(self, width=720, height=800, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # 持ち駒の種類の取得 captures = [] for i in range(8): if self.state.pieces[81 + i] >= 2: captures.append(1 + i) if self.state.pieces[81 + i] >= 1: captures.append(1 + i) # 駒の選択と移動の位置の計算(0-80:マス, 81-88:持ち駒) p = int(event.x / 80) + int((event.y - 40) / 80) * 9 if 40 <= event.y and event.y <= 760: select = p elif event.x < len(captures) * 40 and event.y > 760: select = 81 + int(event.x / 40) else: return # 駒の選択 if self.select < 0: self.select = select self.on_draw() return # 駒の選択と移動を行動に変換 action = -1 if select < 81: # 駒の移動時 if self.select < 81: action = self.state.position_to_action( p, self.position_to_direction(self.select, p)) # 持ち駒の配置時 else: action = self.state.position_to_action( p, 74 - 1 + captures[self.select - 81]) # 合法手でない時 if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return # 次の状態の取得 self.state = self.state.next(action) self.select = -1 self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): return # 行動の取得 action = self.next_action(self.state) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 駒の移動先を駒の移動方向に変換 def position_to_direction(self, position_src, position_dst): dx = position_dst % 9 - position_src % 9 dy = int(position_dst / 9) - int(position_src / 9) for i in range(74): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return 0 # 駒の描画 def draw_piece(self, index, first_player, piece_type): x = (index % 9) * 80 + 20 y = int(index / 9) * 80 + 40 + 20 index = 0 if first_player else 1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW) # 持ち駒の描画 def draw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 760) if first_player else (3, 680, -40, 0) captures = [] for i in range(8): if pieces[81 + i] >= 2: captures.append(1 + i) if pieces[81 + i] >= 1: captures.append(1 + i) for i in range(len(captures)): self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW) # カーソルの描画 def draw_cursor(self, x, y, size): self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') # 描画の更新 def on_draw(self): # マス目 self.c.delete('all') self.c.create_rectangle(0, 0, 720, 800, width=0.0, fill='#EDAA56') for i in range(1, 9): self.c.create_line(i * 80 + 1, 40, i * 80, 760, width=2.0, fill='#000000') for i in range(10): self.c.create_line(0, 40 + i * 80, 720, 40 + i * 80, width=2.0, fill='#000000') # 駒 for p in range(81): p0, p1 = (p, 80 - p) if self.state.is_first_player() else (80 - p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1]) # 持ち駒 self.draw_capture(self.state.is_first_player(), self.state.pieces) self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces) # 選択カーソル if 0 <= self.select and self.select < 81: self.draw_cursor( int(self.select % 9) * 80, int(self.select / 9) * 80 + 40, 80) elif 81 <= self.select: self.draw_cursor((self.select - 81) * 40, 760, 40)
class GameUI(tk.Frame): def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') self.state = State() self.select = -1 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) self.next_action = pv_mcts_action(model, 0.0) self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append( (\ ImageTk.PhotoImage(image), \ ImageTk.PhotoImage(image.rotate(180)),\ ImageTk.PhotoImage(image.resize((40,40))), \ ImageTk.PhotoImage(image.resize((40,40)).rotate(180))\ ) ) self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() self.on_draw() def turn_of_human(self, event): if self.state.is_done(): self.state = State() self.on_draw() return if not self.state.is_first_player(): return captures = [] for i in range(3): if self.state.pieces[12 + i] >= 2: captures.append(1 + i) if self.state.pieces[12 + i] >= 1: captures.append(1 + i) p = int(event.x / 80) + int((event.y - 40) / 80) * 3 if 40 <= event.y and event.y <= 360: select = p elif event.x < len(captures) * 40 and event.y > 360: select = 12 + int(event.x / 40) else: return if self.select < 0: self.select = select self.on_draw() return action = -1 if select < 12: if self.select < 12: action = self.state.position_to_action( p, self.position_to_direction(self.select, p)) else: action = self.state.position_to_action( p, 8 - 1 + captures[self.select - 12]) if not (action in self.state.legal_actions()): self.select = -1 self.on_draw() return self.state = self.state.next(action) self.select = -1 self.on_draw() self.master.after(1000, self.turn_of_ai) def turn_of_ai(self): if self.state.is_done(): return action = self.next_action(self.state) self.state = self.state.next(action) self.on_draw() def position_to_direction(self, position_src, position_dst): dx = position_dst % 3 - position_src % 3 dy = int(position_dst / 3) - int(position_src / 3) for i in range(8): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return 0 def draw_piece(self, index, first_player, piece_type): x = (index % 3) * 80 y = int(index / 3) * 80 + 40 index = 0 if first_player else 1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW) def draw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0) captures = [] for i in range(3): if pieces[12 + i] >= 2: captures.append(1 + i) if pieces[12 + i] >= 1: captures.append(1 + i) for i in range(len(captures)): self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW) def draw_cursor(self, x, y, size): self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000') self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000') def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 240, 400, width=0.0, fill='#EDAA56') for i in range(1, 3): self.c.create_line(i * 80, 40, i * 80, 360, width=2.0, fill='#000000') for i in range(5): self.c.create_line(0, 40 + i * 80, 240, 40 + i * 80, width=2.0, fill='#000000') for p in range(12): p0, p1 = (p, 11 - p) if self.state.is_first_player() else (11 - p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1]) self.draw_capture(self.state.is_first_player(), self.state.pieces) self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces) if 0 <= self.select and self.select < 12: self.draw_cursor( int(self.select % 3) * 80, int(self.select / 3) * 80 + 40, 80) elif 12 <= self.select: self.draw_cursor((self.select - 12) * 40, 360, 40)
class GameUI(tk.Frame): # 初期化 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('コネクトフォー') # ゲーム状態の生成 self.state = State() # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # キャンバスの生成 self.c = tk.Canvas(self, width=280, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # クリック位置を行動に変換 x = int(event.x / 40) if x < 0 or 6 < x: # 範囲外 return action = x # 合法手でない時 if not (action in self.state.legal_actions()): return # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): return # 行動の取得 action = self.next_action(self.state) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 石の描画 def draw_piece(self, index, first_player): x = (index % 7) * 40 + 5 y = int(index / 7) * 40 + 5 if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FF0000') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FFFF00') # 描画の更新 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 280, 240, width=0.0, fill='#00A0FF') for i in range(42): x = (i % 7) * 40 + 5 y = int(i / 7) * 40 + 5 self.c.create_oval(x, y, x + 30, y + 30, width=1.0, fill='#FFFFFF') for i in range(42): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
class GameUI(tk.Frame): # 初期化 def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): self.state = State() self.on_draw() return # 先手でない時 if not self.state.is_first_player(): return # クリック位置を行動に変換 x = int(event.x / 40) y = int(event.y / 40) if x < 0 or 5 < x or y < 0 or 5 < y: # 範囲外 return action = x + y * 6 # 合法手でない時 legal_actions = self.state.legal_actions() if legal_actions == [36]: action = 36 # パス if action != 36 and not (action in legal_actions): return # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): return # 行動の取得 action = self.next_action(self.state) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 石の描画 def draw_piece(self, index, first_player): x = (index % 6) * 40 + 5 y = int(index / 6) * 40 + 5 if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#C2272D') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#FFFFFF') # 描画の更新 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill='#C69C6C') for i in range(1, 8): self.c.create_line(0, i * 40, 240, i * 40, width=1.0, fill='#000000') self.c.create_line(i * 40, 0, i * 40, 240, width=1.0, fill='#000000') for i in range(36): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
def evaluate_problem(): # ベストプレイヤーのモデルの読み込み device = torch.device('cpu') model00 = RepNet() model00.load_state_dict(torch.load('./model/best_r.h5')) model00 = model00.double() model00 = model00.to(device) model00.eval() model01 = DynamicsNet() model01.load_state_dict(torch.load('./model/best_d.h5')) model01 = model01.double() model01 = model01.to(device) model01.eval() model02 = PredictNet() model02.load_state_dict(torch.load('./model/best_p.h5')) model02 = model02.double() model02 = model02.to(device) model02.eval() model = (model00, model01, model02) # 状態の生成 state = State() print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = state.next(2) print(state) moves = state.legal_actions() score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = state.next(1) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = state.next(4) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = state.next(6) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = State() state = state.next(2) state = state.next(0) state = state.next(4) state = state.next(1) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = State() state = state.next(2) state = state.next(0) state = state.next(5) state = state.next(1) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values) print("---------------------") state = State() state = state.next(0) state = state.next(6) state = state.next(1) state = state.next(7) print(state) score, values = pv_mcts_scores(model, state, EN_TEMPERATURE) moves = state.legal_actions() for i in range(len(moves)): print(str(moves[i]) + ":" + str(score[i])) print(values)