Пример #1
0
import gym_connect4
from agents.random_agent import RandomAgent

if __name__ == "__main__":
    # Build environment
    print("[.] Build Environment")
    env = gym.make('gym_connect4:connect4-v0')

    # Create random agent
    print("[.] Create Random Agent")
    agent = RandomAgent(env.action_space, env.observation_space)

    print(env.action_space.n)

    # Init environment
    done = False
    obs = env.reset()

    # Run game
    print("[.] Running game")
    while not done:
        obs, reward, done, info = env.step(agent.get_action(obs))

    # Final render
    print("[+] Done.")
    print("Infos: ", info)
    print("Final board: ")
    env.render()

    # Close environment
    env.close()
Пример #2
0
class Connect4Env(Env):
    def __init__(self):

        # Board dimension
        self.nb_rows = 6
        self.nb_columns = 7
        self.done = False

        # nb_empty indicate the number of available space per column
        self.nb_empty = [self.nb_rows] * self.nb_columns

        # Save the board state
        self.state = np.zeros((self.nb_rows, self.nb_columns), dtype=int)

        # Learn about spaces here: http://gym.openai.com/docs/#spaces
        self.action_space = spaces.Discrete(self.nb_columns)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(self.nb_rows,
                                                   self.nb_columns),
                                            dtype=np.int)

        # Tuple corresponding to the min and max possible rewards
        self.reward_range = (-10, 1)
        self.rewards = {
            "invalid": -10,
            "valid": 1 / 42,
            "won": 1,
            "lost": -1,
            "draw": 0,
        }

        # Render properties
        self.render_tokens = {}
        self.render_tokens[-1] = 'x'
        self.render_tokens[1] = 'o'
        self.render_tokens[0] = ' '

        # Random agent
        self.opponent = RandomAgent(self.action_space, self.state)

        # StableBaselines throws error if these are not defined
        self.spec = None
        self.metadata = None

    def reset(self):
        """
        Reinitialize the environment to the initial state

        :return: state
        """
        self.state = np.zeros((self.nb_rows, self.nb_columns), dtype=int)
        self.nb_empty = [self.nb_rows] * self.nb_columns
        self.done = False
        return self.state

    def is_action_valid(self, action) -> bool:
        """ If we have a space then the move is valid"""
        return self.nb_empty[action] != 0

    def render(self):
        """
        Visualize in the console or graphically the current state
        """
        print("+---" * self.nb_columns + '+')
        for row in range(self.nb_rows):
            print('| ' + ' | '.join(
                list(
                    map(lambda x: self.render_tokens[x],
                        list(self.state[row, ::])))) + ' |')
            print("+---" * self.nb_columns + '+')

    def check_line(self, align):
        no_token = 0
        token_play1 = 1
        token_play2 = -1
        count_play1, count_play2 = 0, 0
        for token in align:
            if token == no_token:
                count_play1, count_play2 = 0, 0
            elif token == token_play1:
                count_play1 += 1
                count_play2 = 0
            else:
                count_play2 += 1
                count_play1 = 0
            if count_play2 == 4:
                return True, token_play2
            if count_play1 == 4:
                return True, token_play1
        return False, no_token

    def is_boad_full(self) -> bool:
        """
        Check if the board is full
        :return: Bool
        """
        empty_slot_id = 0
        board_full = not (empty_slot_id in self.state)
        return board_full

    def has_player_won(self) -> bool:
        """ Check if the game is over """
        # DIRECTION NORTH EAST
        a1 = [
            self.state[::-1, :].diagonal(i)
            for i in range(-self.nb_rows + 4, self.nb_columns - 3)
        ]
        # DIRECTION EAST
        a2 = [self.state[i, :] for i in range(self.nb_rows)]
        # DIRECTION SOUTH EST
        a3 = [
            self.state.diagonal(i)
            for i in range(-self.nb_rows + 4, self.nb_columns - 3)
        ]
        # DIRECTION SOUTH
        a4 = [self.state[:, j] for j in range(self.nb_columns)]

        aligns = a1 + a2 + a3 + a4
        done = any([self.check_line(align)[0] for align in aligns])
        return done

    def emplace_token(self, column, token):
        # Check if column is already full
        if self.nb_empty[column] <= 0:
            raise ColumnIsFull

        # Add the token
        self.nb_empty[column] -= 1
        row = self.nb_empty[column]
        self.state[row, column] = token

    def step(self, action: int) -> (np.ndarray, int, bool, dict):
        """
        One can make a step on the environment and obtain its reaction:
        - the new state
        - the reward of the new state
        - should we continue the game?

        :return: state, reward, game_over, info
        """

        # Init return values
        reward = self.rewards["valid"]
        done = False
        info = {}

        # Let first agent make a move
        try:
            self.emplace_token(action, token=1)
        except ColumnIsFull:
            reward = self.rewards["invalid"]
            done = True
            info = {"Invalid Action"}
            return self.state, reward, done, info

        # Check win condition
        if self.has_player_won():
            reward = self.rewards["won"]
            done = True
            info = {"Won"}
            return self.state, reward, done, info

        # Check draw condition
        if self.is_boad_full():
            reward = self.rewards["draw"]
            done = True
            info = {"Draw"}
            return self.state, reward, done, info

        # Let the opponent agent make a move
        try:
            opp_action = self.opponent.get_action(self.state)
            self.emplace_token(opp_action, token=-1)
        except ColumnIsFull:
            # Opponent mistake, should not happen and we don't want to
            # reward our trained agent for this
            reward = self.rewards["valid"]
            done = True
            info = {"Opponent Invalid Action"}
            return self.state, reward, done, info

        # Check win condition
        if self.has_player_won():
            reward = self.rewards["lost"]
            done = True
            info = {"Lost"}
            return self.state, reward, done, info

        # Check draw condition
        if self.is_boad_full():
            reward = self.rewards["draw"]
            done = True
            info = {"Draw"}
            return self.state, reward, done, info

        self.done = done
        return self.state, reward, done, info