def move(self, position, board, is_my_move=True): ''' returns the board after given movement. you can get either your's or opponent's movement with the parameter "is_my_move" ''' board = copy.deepcopy(board) prediction = Kalah(board) if not is_my_move: prediction.board = reverse_board(board) _, free_turn = prediction.move(position) board = prediction.board if not is_my_move: board = reverse_board(board) return board, prediction.is_game_over(), free_turn
def main(): num_episodes = 10000 # player = User(simulation_depth=6, number_of_simulation=1000) if runner.am_i_minmax: runner.user = Minimax() else: runner.user = User(simulation_depth=6, number_of_simulation=200) if runner.is_user_defined_opponent: module, name = runner.opponent_path.rsplit('.', 1) runner.opponent = getattr(importlib.import_module(module), name)(number_of_simulation=1000) else: runner.opponent = Minimax() for i_episode in range(num_episodes): # Initialize the environment and state print("New games for training!") initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0] new_game = Kalah(initial_board) if i_episode % 2 == 0: new_game.player = False if not runner.am_i_minmax: runner.user.initial_root(initial_board) if runner.is_user_defined_opponent: runner.opponent.initial_root(initial_board) num = 0 loss_sum = 0 for turn in count(): # 행동 선택과 수행 current_board = copy.deepcopy(new_game.get_board()) state = game_state_to_tensor(current_board) if new_game.player: # 내차례 cur_policy = left_policy_net # 왼쪽 모델 cur_target = left_target_net opt = optimizer_left while True: action = select_action(current_board, cur_target) next_position = action.item() if new_game.get_board( )[next_position] != 0 or new_game.is_game_over(): break else: # 적 차례 cur_policy = left_policy_net # 오른쪽 모델 cur_target = left_target_net opt = optimizer_left while True: #action = torch.tensor([[runner.opponent.search(current_board)]], device=device) action = select_action(current_board, cur_target) next_position = action.item() if new_game.get_board( )[next_position] != 0 or new_game.is_game_over(): break _, free_turn = new_game.move(next_position) # 새로운 상태 관찰 next_board = copy.deepcopy(new_game.get_board()) next_state = game_state_to_tensor(next_board) reward = evaluation(new_game.is_game_over(), next_board) # 메모리에 변이 저장 reward = torch.tensor([reward], device=device, dtype=torch.float) memory.push(state, action, next_state, reward) # 로스 계산 loss = optimize_model(free_turn, cur_policy, cur_target, opt) loss_sum += loss if not runner.am_i_minmax: runner.user.update_root(next_position, copy.deepcopy(new_game.get_board()), copy.deepcopy(new_game.player)) if runner.is_user_defined_opponent: runner.opponent.update_root( next_position, copy.deepcopy(new_game.get_board()), copy.deepcopy(not new_game.player)) if new_game.is_game_over(): num = turn break runner.score_board(i_episode, new_game.result()) print(i_episode, 'game Average loss: ', loss_sum / num) print() # 목표 네트워크 업데이트 if i_episode % TARGET_UPDATE == 1: left_target_net.load_state_dict(left_policy_net.state_dict()) #right_target_net.load_state_dict(right_policy_net.state_dict()) if i_episode % 50 == 49: # 50번 마다 한번 저장 torch.save(left_target_net.state_dict(), 'checkpoint_left.pth') runner.wins = 0 runner.losses = 0 runner.draws = 0 #torch.save(right_target_net.state_dict(), 'checkpoint_right.pth') torch.save(left_target_net.state_dict(), 'dqn_cnn_left.pth') #torch.save(right_target_net.state_dict(), 'dqn_cnn_right.pth') print('Complete')
def run_game(self, tree_visualization=True): for i in range(self.num_of_games): if self.am_i_minmax is True: self.user = Minimax() else: module, name = self.user_path.rsplit('.', 1) self.user = getattr(importlib.import_module(module), name)(number_of_simulation=500, simulation_depth=6) if self.is_user_defined_opponent: module, name = self.opponent_path.rsplit('.', 1) self.opponent = getattr(importlib.import_module(module), name)(number_of_simulation=1000) else: self.opponent = Minimax() print("New game!") print("Initial board >>") # initialization: initial_board = [4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 0] new_game = Kalah(initial_board) if i % 2 == 1: new_game.player = False new_game.show_board() if not self.am_i_minmax: self.user.initial_root(initial_board) if self.is_user_defined_opponent: self.opponent.initial_root(initial_board) turn = 0 while not new_game.is_game_over(): turn += 1 # pick a hole: if new_game.player: start_time = time.time() next_position = self.user.search( copy.deepcopy(new_game.get_board())) end_time = time.time() print('measured time: ', end_time - start_time) self.is_time_out(start_time, end_time) else: next_position = self.opponent.search( copy.deepcopy(new_game.get_board())) # update: tmp_score, free_turn = new_game.move(next_position) # print: if not self.am_i_minmax: print("winning rate:", self.user.print_winning_rate(next_position)) if tree_visualization: show_image(self.user.g.render(view=False), auto_close=False) if not self.am_i_minmax: self.user.update_root(next_position, copy.deepcopy(new_game.get_board()), copy.deepcopy(new_game.player)) if self.is_user_defined_opponent: self.opponent.update_root( next_position, copy.deepcopy(new_game.get_board()), copy.deepcopy(not new_game.player)) # end of a game, print result: new_game.show_board() turn = 0 self.score_board(i, new_game.result()) del self.user del self.opponent
class KalahUi: def __init__(self): self.game = Kalah(5, 5) self.maxSeeds = 50 self.maxDigits = 2 self.padding = ' ' * 12 self.player1string = ' Player 1 ' self.player2string = ' Player 2 ' self.canContinue = True def interrupt(self): self.canContinue = False def formatFirstPlayer(self, player): invert = player % 2 == 1 padding = ' ' * 12 playerstring = f' Player {player + 1} ' playerSeeds = [padding] playerSeeds += [ f'[{str(seeds).rjust(self.maxDigits)}] ' for seeds in self.game.getPlayerSlice(player, includeStore=False) ] playerSeeds += [playerstring] playerHouses = [padding] playerHouses += [ f' {str(house).rjust(self.maxDigits)} ' for house in range(1, self.game._houses + 1) ] playerHouses += [padding] return [ ''.join(playerSeeds[::-1 if invert else 1]), ''.join(playerHouses[::-1 if invert else 1]) ][::-1 if invert else 1] def formatStores(self): lead = ' ' * 4 trail = ' ' * (12 - 6 - self.maxDigits) store1 = self.game.board[self.game.storeIndex(0)] store2 = self.game.board[self.game.storeIndex(1)] store1str = f'[{str(store1).rjust(self.maxDigits)}]' store2str = f'[{str(store2).rjust(self.maxDigits)}]' spaces = self.game._houses * (4 + self.maxDigits) * ' ' return lead + store2str + trail + spaces + trail + store1str + lead def formatState(self): return f'\n> Next player: {self.game.nextPlayer + 1}. Select your move (1-{self.game._houses})\n' def formatField(self): return '\n'.join( self.formatFirstPlayer(1) + [self.formatStores()] + self.formatFirstPlayer(0)) def run(self): while not self.game.gameEnded and self.canContinue: print(self.formatField()) print(self.formatState()) move = None while move is None: try: inp = input() if inp == 'q': return move = (int(inp) - 1) % self.game._houses print(f"> Moving house {move + 1}") except: print("> Not a valid move. Try again\n") self.game.move(move) print(self.formatField()) print(self.formatState()) winner = self.game.winningPlayer() if winner is not None: print( f"> Game has ended. Player {self.game.winningPlayer() + 1} won!" ) else: print("> Game has ended. Game ended in a tie!")