def run_single_game(self): board = CheckerBoard() turn = 0 unresolved = False while not board.is_over(): turn += 1 log_file.write("#### Turn %3d\n" % turn) log_file.write(str(board)) log_file.flush() if turn % 100 == 0: print("Over %d turns played" % turn) for player, agent in self.players.items(): while not board.is_over() and board.active == player: print("Player %d is making a decision" % player) start_time = time.time() move = agent.best_move(board) self.stats["thinking_time"][player].append(time.time() - start_time) board.update(move) if turn > 200: unresolved = True break self.stats["score"].append(board.winner if not unresolved else -1) self.stats["played_rounds"] += turn
class CheckersEnvironmentWrapper: def __init__(self): # env initialization self.actions = {} self.observation = [] self.reward = 0 self.done = False self.last_action_idx = 0 # initialize the board self.board = CheckerBoard() self.width = len(self.board.get_state_vector()) self.height = 1 self.win_reward = 100 self.defeat_reward = -100 self.game_turns = 0 self.score = 0 self.enable_capturing_reward = False for idx, move in enumerate(self.board.get_all_moves()): self.actions[idx] = move print("total actions: ", len(self.actions)) self.action_space_size = len(self.actions) self.reset() def update_game_info(self): self.observation = self.board.get_state_vector() def restart_environment_episode(self): self.board = CheckerBoard() self.update_game_info() return self.observation def _idx_to_action(self, action_idx): return self.actions[action_idx] def get_valid_idx_actions(self): possible_idx_actions = [] possible_moves = self.board.get_legal_moves() for idx, action in self.actions.items(): if action in possible_moves: possible_idx_actions.append(idx) return possible_idx_actions def step(self, action_idx): assert self.board.get_current_player( ) == self.board.BLACK_PLAYER, "Training player should be black!" self.last_action_idx = action_idx action = self.actions[action_idx] # print("take action ", action_idx, " : ", action) white_pieces_before = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces_before = self.board.get_white_kings_num() black_pieces_before = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces_before = self.board.get_black_kings_num() self.board.make_move(action) if self.board.get_current_player() == self.board.WHITE_PLAYER: if not self.board.is_over(): # make AI opponent move self.opponent_move() self.update_game_info() white_pieces = self.board.get_white_num( ) + self.board.get_white_kings_num() white_kings_pieces = self.board.get_white_kings_num() black_pieces = self.board.get_black_num( ) + self.board.get_black_kings_num() black_kings_pieces = self.board.get_black_kings_num() if self.board.is_over(): print("black: p. %d, k. %d, white: p. %d, k. %d" % (black_pieces, black_kings_pieces, white_pieces, white_kings_pieces)) if self.board.get_winner() == self.board.BLACK_PLAYER: # black wins print("black wins") self.reward = self.win_reward else: print("white wins") self.reward = self.defeat_reward else: if self.enable_capturing_reward: captured_whites = white_pieces_before - white_pieces captured_black = black_pieces_before - black_pieces self.reward = captured_whites - captured_black else: self.reward = 0 self.score += self.reward self.game_turns += 1 self.done = self.board.is_over() return self.observation, self.reward, self.done def opponent_move(self): current_player = self.board.get_current_player() moves = self.board.get_legal_moves() action = random.choice(moves) # print("opponent takes action ", action) self.board.make_move(action) if self.board.get_current_player() == current_player: # print("opponent takes a jump") self.opponent_move() def reset(self): self.restart_environment_episode() self.done = False self.reward = 0.0 self.last_action_idx = 0 self.game_turns = 0 self.score = 0 return self.observation, self.reward, self.done