Exemplo n.º 1
0
class Breakout(AbstractGame):
    """The Gym CartPole environment"""
    def __init__(self, discount: float):
        super().__init__(discount)
        self.env = gym.make('BreakoutDeterministic-v4')
        self.env = ResizeObservation(self.env, shape=(84, 84))
        self.env = GrayScaleObservation(self.env, keep_dim=True)
        self.actions = list(
            map(lambda i: Action(i), range(self.env.action_space.n)))
        self.observations = [self.env.reset()]
        self.done = False

    @property
    def action_space_size(self) -> int:
        """Return the size of the action space."""
        return len(self.actions)

    def step(self, action) -> int:
        """Execute one step of the game conditioned by the given action."""

        observation, reward, done, _ = self.env.step(action.index)
        self.observations += [observation]
        self.done = done
        return reward

    def terminal(self) -> bool:
        """Is the game is finished?"""
        return self.done

    def legal_actions(self) -> List[Action]:
        """Return the legal actions available at this instant."""
        return self.actions

    def make_image(self, state_index: int):
        """Compute the state of the game."""
        return self.observations[state_index]
Exemplo n.º 2
0
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)

    mem = Memory()

    episode_count = 50
    reward = 0
    done = False

    rewardPerEp = numpy.array([])
    ep = numpy.array([])
    interaction = numpy.array([])

    for i in range(episode_count):
        env.reset()
        actual_state, reward, done, info = env.step(env.action_space.sample())

        nbInteraction = 0
        sumReward = 0

        while True:
            nbInteraction += 1
            # Choose the action to do
            action = agent.act(actual_state, epsilon_start)

            # Calculate next state, reward and if the episode is finished or not
            next_state, reward, done, _ = env.step(action)

            sumReward += reward
            
            if test_mode == False: