Пример #1
0
    def run_single_game(self):
        board = CheckerBoard()
        turn = 0
        unresolved = False

        while not board.is_over():
            turn += 1

            log_file.write("#### Turn %3d\n" % turn)
            log_file.write(str(board))
            log_file.flush()

            if turn % 100 == 0:
                print("Over %d turns played" % turn)

            for player, agent in self.players.items():
                while not board.is_over() and board.active == player:
                    print("Player %d is making a decision" % player)
                    start_time = time.time()
                    move = agent.best_move(board)
                    self.stats["thinking_time"][player].append(time.time() -
                                                               start_time)
                    board.update(move)

            if turn > 200:
                unresolved = True
                break

        self.stats["score"].append(board.winner if not unresolved else -1)
        self.stats["played_rounds"] += turn
Пример #2
0
class CheckersEnvironmentWrapper:
    def __init__(self):
        # env initialization
        self.actions = {}
        self.observation = []
        self.reward = 0
        self.done = False
        self.last_action_idx = 0

        # initialize the board
        self.board = CheckerBoard()

        self.width = len(self.board.get_state_vector())
        self.height = 1

        self.win_reward = 100
        self.defeat_reward = -100

        self.game_turns = 0
        self.score = 0

        self.enable_capturing_reward = False

        for idx, move in enumerate(self.board.get_all_moves()):
            self.actions[idx] = move

        print("total actions: ", len(self.actions))

        self.action_space_size = len(self.actions)

        self.reset()

    def update_game_info(self):
        self.observation = self.board.get_state_vector()

    def restart_environment_episode(self):
        self.board = CheckerBoard()

        self.update_game_info()

        return self.observation

    def _idx_to_action(self, action_idx):
        return self.actions[action_idx]

    def get_valid_idx_actions(self):
        possible_idx_actions = []

        possible_moves = self.board.get_legal_moves()

        for idx, action in self.actions.items():
            if action in possible_moves:
                possible_idx_actions.append(idx)

        return possible_idx_actions

    def step(self, action_idx):
        assert self.board.get_current_player(
        ) == self.board.BLACK_PLAYER, "Training player should be black!"

        self.last_action_idx = action_idx

        action = self.actions[action_idx]

        # print("take action ", action_idx, " : ", action)

        white_pieces_before = self.board.get_white_num(
        ) + self.board.get_white_kings_num()
        white_kings_pieces_before = self.board.get_white_kings_num()
        black_pieces_before = self.board.get_black_num(
        ) + self.board.get_black_kings_num()
        black_kings_pieces_before = self.board.get_black_kings_num()

        self.board.make_move(action)

        if self.board.get_current_player() == self.board.WHITE_PLAYER:
            if not self.board.is_over():
                # make AI opponent move
                self.opponent_move()

        self.update_game_info()

        white_pieces = self.board.get_white_num(
        ) + self.board.get_white_kings_num()
        white_kings_pieces = self.board.get_white_kings_num()
        black_pieces = self.board.get_black_num(
        ) + self.board.get_black_kings_num()
        black_kings_pieces = self.board.get_black_kings_num()

        if self.board.is_over():
            print("black: p. %d, k. %d, white: p. %d, k. %d" %
                  (black_pieces, black_kings_pieces, white_pieces,
                   white_kings_pieces))

            if self.board.get_winner() == self.board.BLACK_PLAYER:
                # black wins
                print("black wins")
                self.reward = self.win_reward
            else:
                print("white wins")
                self.reward = self.defeat_reward
        else:
            if self.enable_capturing_reward:
                captured_whites = white_pieces_before - white_pieces
                captured_black = black_pieces_before - black_pieces

                self.reward = captured_whites - captured_black
            else:
                self.reward = 0

        self.score += self.reward
        self.game_turns += 1

        self.done = self.board.is_over()

        return self.observation, self.reward, self.done

    def opponent_move(self):
        current_player = self.board.get_current_player()

        moves = self.board.get_legal_moves()
        action = random.choice(moves)

        # print("opponent takes action ", action)

        self.board.make_move(action)

        if self.board.get_current_player() == current_player:
            # print("opponent takes a jump")
            self.opponent_move()

    def reset(self):
        self.restart_environment_episode()
        self.done = False
        self.reward = 0.0
        self.last_action_idx = 0

        self.game_turns = 0
        self.score = 0

        return self.observation, self.reward, self.done