Пример #1
0
class BaseEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, display_time=50, **kwargs):
        self.game_name = 'Game Name'
        self.display_time = display_time
        self.init(**kwargs)

    def init(self, **kwargs):
        self.game = Environment(env_name=self.game_name, **kwargs)
        self.action_set = self.game.env.action_map
        self.action_space = spaces.Discrete(self.game.num_actions())
        self.observation_space = spaces.Box(0.0,
                                            1.0,
                                            shape=self.game.state_shape(),
                                            dtype=np.float32)

    def step(self, action):
        reward, done = self.game.act(action)
        return (self.game.state(), reward, done, {})

    def reset(self):
        self.game.reset()
        return self.game.state()

    def seed(self, seed=None):
        self.game = Environment(env_name=self.game_name, random_seed=seed)
        return seed

    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.state()
        elif mode == 'human':
            self.game.display_state(self.display_time)

    def close(self):
        if self.game.visualized:
            self.game.close_display()
        return 0
Пример #2
0
    G = 0.0

    # Initialize the environment
    env.reset()
    terminated = False

    # Obtain first state, unused by random agent, but included for illustration
    s = env.state()
    while (not terminated):
        # Select an action uniformly at random
        action = random.randrange(num_actions)

        # Act according to the action and observe the transition and reward
        reward, terminated = env.act(action)

        # Obtain s_prime, unused by random agent, but included for illustration
        s_prime = env.state()

        env.display_state(50)

        G += reward

    # Increment the episodes
    e += 1

    # Store the return for each episode
    returns.append(G)

print("Avg Return: " + str(numpy.mean(returns)) + "+/-" +
      str(numpy.std(returns) / numpy.sqrt(NUM_EPISODES)))