def main(): with tf.Session() as sess: print('Creating environment...') env = TFBatchedEnv(sess, Pong(), 1) env = BatchedFrameStack(env) print('Creating model...') model = CNN(sess, gym_space_distribution(env.action_space), gym_space_vectorizer(env.observation_space)) print('Creating roller...') roller = TruncatedRoller(env, model, 1) print('Initializing variables...') sess.run(tf.global_variables_initializer()) if os.path.exists('params.pkl'): print('Loading parameters...') with open('params.pkl', 'rb') as in_file: params = pickle.load(in_file) for var, val in zip(tf.trainable_variables(), params): sess.run(tf.assign(var, val)) else: print('Warning: parameter file does not exist!') print('Running agent...') viewer = SimpleImageViewer() while True: for obs in roller.rollouts()[0].step_observations: viewer.imshow(obs[..., -3:])
class SimpleRenderAgent(RL.Agent): def __init__(self, plotfig_getter=None, image_getter=None, render_fn=None) -> None: self.render_fn = render_fn self.image_getter = image_getter self.plotfig_getter = plotfig_getter self.viewer = None def post_act(self): try: if self.render_fn is not None: self.render_fn() elif self.image_getter is not None: if self.viewer is None: self.viewer = SimpleImageViewer() img = self.image_getter() self.viewer.imshow(img) elif self.plotfig_getter is not None: if self.viewer is None: self.viewer = SimpleImageViewer() fig = self.plotfig_getter() # type: Figure data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8) img = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) self.viewer.imshow(img) else: self.env.render() except Exception: logging.getLogger(__name__).exception( f'{self.name}: Unable to render. Disabling agent!') self.disable()
class PyBullet(EnvExt): def __init__( self, name: str = 'Hopper', add_timestep: bool = False, nosuffix: bool = False ) -> None: self.name = name try: import pybullet_envs # noqa except ImportError: raise ImportError('pybullet is not installed') if not nosuffix: name += 'BulletEnv-v0' env = gym.make(name) if add_timestep: env = AddTimeStep(env) super().__init__(RewardMonitor(env)) self.viewer = None self.spec.use_reward_monitor = True def render(self, mode: str = 'human') -> Optional[ndarray]: if mode == 'human': arr = self._env.render('rgb_array') if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(arr) # type: ignore return None else: return self._env.render(mode)
class Runner: def __init__(self, env, model, num_steps, discount_rate, summary_frequency, performance_num_episodes, summary_log_dir): self.env = env self.model = model self.discount_rate = discount_rate self.observation = env.reset() self.num_steps = num_steps self.stats_recorder = StatsRecorder( summary_frequency=summary_frequency, performance_num_episodes=performance_num_episodes, summary_log_dir=summary_log_dir, save=True) self.viewer = SimpleImageViewer() def render(self): columns = [] for i in range(80): rows = [] for j in range(80): if self.observation[i][j] == 1: rows.append([255, 255, 255]) else: rows.append([0, 0, 0]) columns.append(rows) self.viewer.imshow(np.asarray(columns, dtype=np.uint8)) def run(self): observations = [] rewards = [] actions = [] terminals = [] values = [] for _ in range(self.num_steps): action_index, value = self.model.predict([self.observation]) observations.append(self.observation) action = action_with_index(action_index) values.append(value) self.observation, reward, terminal = self.env.step(action) self.stats_recorder.after_step(reward=reward, terminal=terminal) rewards.append(reward) actions.append(action_index) terminals.append(terminal) if terminal: self.observation = self.env.reset() if terminals[-1] == 0: next_value = self.model.predict_value([self.observation])[0] discounted_rewards = discount(rewards + [next_value], terminals + [False], self.discount_rate)[:-1] else: discounted_rewards = discount(rewards, terminals, self.discount_rate) self.model.train(observations, discounted_rewards, actions, values)
def main(): global restart, action parser = argparse.ArgumentParser() parser.add_argument("--bot", type=int, default=0, help="Number of bot cars_full in environment.") parser.add_argument("--track", type=int, default=0, help="Track for agents cars_full in environment.") parser.add_argument("--discrete", type=int, default=1, help="Apply discrete wrapper?") parser.add_argument("--sleep", type=float, default=None, help="time in s between actions") parser.add_argument("--debug", action='store_true', default=False, help="debug mode") parser.add_argument( "--env-settings", type=str, default='envs/gym_car_intersect_fixed/settings_sets/env_settings__basic_small_rotation.json', help="debug mode" ) args = parser.parse_args() env = CarRacingHackatonContinuousFixed(args.env_settings) env = DiscreteWrapper(env) env.reset() time.sleep(3.0) viewer = SimpleImageViewer() viewer.imshow(env.get_true_picture()) viewer.window.on_key_press = key_press viewer.window.on_key_release = key_release # while True: env.reset() total_reward = 0.0 steps = 0 restart = False while True: s = None done = None info = {} for _ in range(1): s, r, done, info = env.step(action) total_reward += r print("\naction " + str(action)) print("step {} total_reward {:+0.2f}".format(steps, total_reward)) print(info) steps += 1 viewer.imshow(env.get_true_picture()) if done or restart or 'need_restart' in info.keys(): print('restart') break
class BaseEnv(gym.Env, ABC): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 3 } reward_range = (-float('inf'), float('inf')) def __init__(self): self.viewer = None self.seed() @abstractmethod def step(self, action): pass def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @abstractmethod def reset(self): pass @abstractmethod def get_image(self): pass def render(self, mode='rgb_array', max_width=20): img = self.get_image() img = np.asarray(img).astype(np.uint8) img_height, img_width = img.shape[:2] ratio = max_width / img_width img = Image.fromarray(img).resize( [int(ratio * img_width), int(ratio * img_height)]) img = np.asarray(img) if mode == 'rgb_array': # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return img elif mode == 'human': from gym.envs.classic_control.rendering import SimpleImageViewer if self.viewer is None: self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
def main(): args = arg_parser().parse_args() conn = redis.StrictRedis(host=args.redis_host, port=args.redis_port) pubsub = conn.pubsub() pubsub.subscribe(args.channel + ':state:' + args.env_id) viewer = SimpleImageViewer() for msg in pubsub.listen(): if msg['type'] != 'message': continue img = np.frombuffer(msg['data'][:3 * (args.obs_size**2)], dtype='uint8') img = img.reshape([args.obs_size] * 2 + [3]) viewer.imshow(img)
class SpecialWrapper(gym.Wrapper): metadata = {'render.modes': ['human', 'rgb_array', 'encoding']} def __init__(self, env, terminal_condition=None): super(SpecialWrapper, self).__init__(env) self.terminal_condition = terminal_condition def reset(self): return self.env.reset() def step(self, action): observation, reward, terminal, info = self.env.step(action) if not terminal and not self.terminal_condition is None: terminal = self.terminal_condition.isterminal( reward, terminal, info) return observation, reward, terminal, info def render(self, mode='human', **kwargs): if mode == 'encoding': if not 'encoder' in kwargs: raise TypeError('Expected an encoder model `encoder`') if not 'observation' in kwargs: raise TypeError('Expected previous observation `observation`') encoder = kwargs['encoder'] observation = kwargs['observation'] encoding = encoder.predict(np.expand_dims(observation, axis=0))[0] encoding = (encoding - encoding.min()) / (encoding.max() - encoding.min()) image = np.repeat(np.expand_dims(encoding, axis=-1), 3, axis=-1) image = np.uint8(image * 255) image = cv2.resize(image, (210, 210), interpolation=cv2.INTER_AREA) image = np.concatenate((observation, image), axis=1) if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(image) else: return self.env.render(mode, **kwargs)
def test(): with open(os.path.join("demo", "Pong.demo"), "rb") as f: dat = pickle.load(f) viewer = SimpleImageViewer() env = gym.make('PongNoFrameskip-v4') checkpoint = dat['checkpoints'][18] checkpoint_action_nr = dat['checkpoint_action_nr'][18] env.reset() env.unwrapped.restore_state(checkpoint) t = 0 while True: print("t ", t) action = dat['actions'][checkpoint_action_nr + t] observation, reward, done, _ = env.step(action) viewer.imshow(observation) if reward != 0: print("*** reset ***") env.reset() break time.sleep(0.5) t += 1
def watch_random(env, frame_rate=60.0): """ Watch random agent play an environment. """ init_state = env.reset(1) states = tf.placeholder(init_state.dtype, shape=init_state.get_shape()) actions = tf.random_uniform(shape=[1], minval=0, maxval=env.num_actions, dtype=tf.int32) new_states, rews, dones = env.step(states, actions) image = env.observe_visual(states) viewer = SimpleImageViewer() with tf.Session() as sess: cur_states = sess.run(init_state) while True: cur_states, cur_rews, cur_dones = sess.run( [new_states, rews, dones], feed_dict={states: cur_states}) cur_image = sess.run(image, feed_dict={states: cur_states}) viewer.imshow(cur_image[0]) if cur_dones[0]: print('done with reward: %f' % cur_rews[0]) time.sleep(1.0 / frame_rate)
class Water(gym.Env): metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 3} FIELD = [ 'M', # 0 agent 'S', # 1 start 'G', # 2 goal 'W', # 3 water 'N', # 4 nothing ] # this is the restriction of over iteration MAX_STEPS = 5000 def __init__(self): super().__init__() self.viewer = None self.radius = 5 self.rotation = 10 self.ellipce_r = 10 self.ellipce_c = 12 self.x_shape = 10 * self.radius self.y_shape = 10 * self.radius self.MAP_shape = (self.x_shape, self.y_shape) # set an action space self.action_space = gym.spaces.Discrete(4) self.observation_space = gym.spaces.Box( low=0, high=len(self.FIELD), shape=self.MAP_shape ) nrows, ncols = self.MAP_shape reward_range = [-1., 1.] self.reset() def reset(self): self.map = self.ellipse_map nrows, ncols = self.MAP_shape self.pos = self.find_pos('S')[0] # self.goal = self.find_pos('G')[0] self.done = False self.reward = 0 self.steps = 0 self.visited = [] return self.observe() # dipict the map def ellipse_map(self): self.x = np.ones((self.x_shape, self.y_shape), dtype=np.uint8) self.x[self.x == 1] = 4 # Start self.x[(0, 0)] = 1 self.x_a, self.y_a = ellipse(self.x_shape/2, self.y_shape/2, self.ellipce_r, self.ellipce_c, rotation=np.deg2rad(self.rotation)) self.x[(self.x_a, self.y_a)] = 3 return self.x def is_movable(self, pos): return ((0 <= pos[0] < self.x_shape) and (0 <= pos[1] < self.y_shape)) # judge whether agent gets to the goal def is_goal(self, show=False): nrows, ncols = self.MAP_shape if self.pos[0] == nrows - 1 and self.pos[1] == ncols - 1: if show: print("Goal") return True else: return False def is_done(self, show=False): return (not self.is_movable) or self.is_goal(show) or self.steps > self.MAX_STEPS def observe(self): # to copy the map with the place of the agent observation = np.copy(self.map()) observation[tuple(self.pos)] = self.FIELD.index('M') return observation def point_finder(self): flat_space = np.reshape(self.observe(), [-1, 1]) #print(flat_space) point = np.where(flat_space == 0) return int(point[0]) def trace(self): self.row, self.col = np.where(self.observe() == 0) self.visited.append((int(self.row), int(self.col))) return self.visited def get_reward(self, pos, moved): nrows, ncols = self.MAP_shape if moved: if self.map()[tuple(pos)] == self.FIELD.index('W'): self.reward -= 10 elif self.map()[tuple(pos)] == self.FIELD.index('N'): self.reward -= 0.3 else: self.reward -= 0.5 # Goal if self.is_goal(): self.reward += 15 return self.reward def find_pos(self, field_type): return np.array([np.where(self.map() == self.FIELD.index(field_type))]) def step(self, action): nrows, ncols = self.MAP_shape if action == 0: next_pos = [x + y for (x, y) in zip(self.pos, [0, 1])] elif action == 1: next_pos = [x + y for (x, y) in zip(self.pos, [-1, 0])] elif action == 2: next_pos = [x + y for (x, y) in zip(self.pos, [1, 0])] elif action == 3: next_pos = [x + y for (x, y) in zip(self.pos, [-1, 0])] if self.is_movable(next_pos): self.pos = next_pos moved = True else: moved = False reward = self.get_reward(self.pos, moved) observation = self.observe() trace = self.trace() state = self.point_finder() done = self.is_done(True) return trace, state, reward, observation, done def show(self): # plt.grid('on') ims = [] nrows, ncols = self.MAP_shape ax = plt.gca() fig = plt.figure() ax.set_xticks(np.arange(0.5, nrows, 1)) ax.set_yticks(np.arange(0.5, ncols, 1)) ax.set_xticklabels([]) ax.set_yticklabels([]) canvas = np.copy(self.map()) for row, col in self.visited: canvas[(row, col)] = self.FIELD.index('M') img1 = plt.imshow(canvas, interpolation="bilinear", cmap=cm.GnBu) ims.append([img1]) img = plt.imshow(canvas, interpolation="bilinear", cmap=cm.GnBu, animated=True) ani = animation.ArtistAnimation(fig, ims, interval=100, blit=True, repeat_delay=1000) plt.show() return @abstractmethod def get_image(self): pass def render(self, mode='human', max_width=500): img = self.get_image() img = np.asarray(img).astype(np.uint8) img_height, img_width = img.shape[:2] ratio = max_width / img_width #img = Image.fromarray(img).resize([int(ratio * img_width), int(ratio * img_height)]) img = np.asarray(img) if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control.rendering import SimpleImageViewer if self.viewer is None: self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
class MultiagentVecEnv(ABC): def __init__(self, num_envs: int, num_agents: int, height: int, width: int, dtype: torch.dtype, device: str): self.num_envs = num_envs self.num_agents = num_agents self.height = height self.width = width self.dtype = dtype self.device = device self.viewer = None self.render_args = {'num_rows': 1, 'num_cols': 1, 'size': 256} # This Tensor represents the location of each agent in each environment. It should contain # only one non-zero entry for each sub array along dimension 0. self.agents = torch.zeros((num_envs * num_agents, 1, height, width), dtype=dtype, device=device, requires_grad=False) # This Tensor represents the current alive/dead state of each agent in each environment self.dones = torch.zeros(self.num_envs * self.num_agents, dtype=torch.uint8, device=device, requires_grad=False) # This tensor represents whether a particular environment has experienced an exception in the most recent # step. This is useful for resetting environments that have an exception self.errors = torch.zeros(self.num_envs, dtype=torch.uint8, device=device, requires_grad=False) @abstractmethod def step( self, actions: Dict[str, torch.Tensor], return_observations: bool = False ) -> (Dict[str, torch.Tensor], Dict[str, torch.Tensor], Dict[ str, torch.Tensor], dict): raise NotImplementedError @abstractmethod def reset(self, done: torch.Tensor = None, return_observations: bool = True ) -> Optional[Dict[str, torch.Tensor]]: raise NotImplementedError @abstractmethod def _get_env_images(self) -> torch.Tensor: """Gets RGB arrays for each environment. Returns: img: A Tensor of shape (num_envs, 3, height, width) and dtype torch.short i.e. an RBG rendering of each environment """ raise NotImplementedError def render(self, mode: str = 'human', env: Optional[int] = None) -> Any: if self.viewer is None and mode == 'human': # Lazy importing because this breaks EC2 instances that don't have a screen/viewing device from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer(maxwidth=1080) img = self._get_env_images() img = build_render_rgb(img=img, num_envs=self.num_envs, env_height=self.height, env_width=self.width, env=env, num_rows=self.render_args['num_rows'], num_cols=self.render_args['num_cols'], render_size=self.render_args['size']) if mode == 'human': self.viewer.imshow(img) return self.viewer.isopen elif mode == 'rgb_array': return img else: raise ValueError('Render mode not recognised.') @abstractmethod def check_consistency(self): raise NotImplementedError
class RetroEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0 } def compute_step(self, image): reward = self.data.current_reward() done = self.data.is_done() return reward, done, self.data.lookup_all() def record_movie(self, path): self.movie = retro.Movie(path, True) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state) def stop_record(self): self.movie_path = None self.movie_id = 0 if self.movie: self.movie.close() self.movie = None def auto_record(self, path=None): if not path: path = os.getcwd() self.movie_path = path def __init__(self, game, state=retro.STATE_DEFAULT, scenario=None, info=None, use_restricted_actions=retro.ACTIONS_FILTERED, record=False): if not hasattr(self, 'spec'): self.spec = None self.img = None self.viewer = None self.gamename = game self.statename = state game_path = retro.get_game_path(game) rom_path = retro.get_romfile_path(game) metadata_path = os.path.join(game_path, 'metadata.json') if state == retro.STATE_NONE: self.initial_state = None elif state == retro.STATE_DEFAULT: self.initial_state = None try: with open(metadata_path) as f: metadata = json.load(f) if 'default_state' in metadata: with gzip.open( os.path.join(game_path, metadata['default_state']) + '.state', 'rb') as fh: self.initial_state = fh.read() except (IOError, json.JSONDecodeError): pass else: if not state.endswith('.state'): state += '.state' with gzip.open(os.path.join(game_path, state), 'rb') as fh: self.initial_state = fh.read() self.data = GameData() if info is None: info = 'data' if info.endswith('.json'): # assume it's a path info_path = info else: info_path = os.path.join(game_path, info + '.json') if scenario is None: scenario = 'scenario' if scenario.endswith('.json'): # assume it's a path scenario_path = scenario else: scenario_path = os.path.join(game_path, scenario + '.json') system = retro.get_romfile_system(rom_path) # We can't have more than one emulator per process. Before creating an # emulator, ensure that unused ones are garbage-collected gc.collect() self.em = retro.RetroEmulator(rom_path) self.em.configure_data(self.data) self.em.step() img = self.em.get_screen() core = retro.get_system_info(system) self.BUTTONS = core['buttons'] self.NUM_BUTTONS = len(self.BUTTONS) self.BUTTON_COMBOS = self.data.valid_actions() try: assert self.data.load( info_path, scenario_path), 'Failed to load info (%s) or scenario (%s)' % ( info_path, scenario_path) except Exception: del self.em raise if use_restricted_actions == retro.ACTIONS_DISCRETE: combos = 1 for combo in self.BUTTON_COMBOS: combos *= len(combo) self.action_space = gym.spaces.Discrete(combos) elif use_restricted_actions == retro.ACTIONS_MULTI_DISCRETE: self.action_space = gym.spaces.MultiDiscrete([ len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1) for combos in self.BUTTON_COMBOS ]) else: self.action_space = gym.spaces.MultiBinary(self.NUM_BUTTONS) kwargs = {} if gym_version >= (0, 9, 6): kwargs['dtype'] = np.uint8 self.observation_space = gym.spaces.Box(low=0, high=255, shape=img.shape, **kwargs) self.use_restricted_actions = use_restricted_actions self.movie = None self.movie_id = 0 self.movie_path = None if record is True: self.auto_record() elif record is not False: self.auto_record(record) self.seed() if gym_version < (0, 9, 6): self._seed = self.seed self._step = self.step self._reset = self.reset self._render = self.render self._close = self.close def step(self, a): if self.img is None: raise RuntimeError('Please call env.reset() before env.step()') action = 0 if self.use_restricted_actions == retro.ACTIONS_DISCRETE: for combo in self.BUTTON_COMBOS: current = a % len(combo) a //= len(combo) action |= combo[current] elif self.use_restricted_actions == retro.ACTIONS_MULTI_DISCRETE: for i in range(len(a)): buttons = self.BUTTON_COMBOS[i] action |= buttons[a[i]] else: for i in range(len(a)): action |= int(a[i]) << i if self.use_restricted_actions == retro.ACTIONS_FILTERED: action = self.data.filter_action(action) a = np.zeros([16], np.uint8) for i in range(16): a[i] = (action >> i) & 1 if self.movie: self.movie.set_key(i, a[i]) if self.movie: self.movie.step() self.em.set_button_mask(a) self.em.step() self.img = ob = self.em.get_screen() self.data.update_ram() rew, done, info = self.compute_step(ob) return ob, float(rew), bool(done), dict(info) def reset(self): if self.initial_state: self.em.set_state(self.initial_state) self.em.set_button_mask(np.zeros([16], np.uint8)) self.em.step() if self.movie_path is not None: self.record_movie( os.path.join( self.movie_path, '%s-%s-%04d.bk2' % (self.gamename, self.statename, self.movie_id))) self.movie_id += 1 if self.movie: self.movie.step() self.img = ob = self.em.get_screen() self.data.reset() self.data.update_ram() return ob def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2] def render(self, mode='human', close=False): if close: if self.viewer: self.viewer.close() return if mode == "rgb_array": return self.em.get_screen() if self.img is None else self.img elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(self.img) return self.viewer.isopen def close(self): if hasattr(self, 'em'): del self.em if self.viewer is not None: self.viewer.close() self.viewer = None
class RetroEnv(gym.Env): """ Gym Retro environment class Provides a Gym interface to classic video games """ metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0 } def __init__(self, game, state=retro.State.DEFAULT, scenario=None, info=None, use_restricted_actions=retro.Actions.FILTERED, record=False, players=1, inttype=retro.data.Integrations.STABLE, obs_type=retro.Observations.IMAGE, naudio_samples=None, make_video=False, is_baseline=False): if not hasattr(self, 'spec'): self.spec = None self._obs_type = obs_type self.img = None self.ram = None self.viewer = None self.gamename = game self.statename = state self.initial_state = None self.players = players self.naudio_samples = naudio_samples self.audio_clip = [] self.make_video = make_video self.is_baseline = is_baseline metadata = {} rom_path = retro.data.get_romfile_path(game, inttype) metadata_path = retro.data.get_file_path(game, 'metadata.json', inttype) if state == retro.State.NONE: self.statename = None elif state == retro.State.DEFAULT: self.statename = None try: with open(metadata_path) as f: metadata = json.load(f) if 'default_player_state' in metadata and self.players <= len( metadata['default_player_state']): self.statename = metadata['default_player_state'][ self.players - 1] elif 'default_state' in metadata: self.statename = metadata['default_state'] else: self.statename = None except (IOError, json.JSONDecodeError): pass if self.statename: self.load_state(self.statename, inttype) self.data = retro.data.GameData() if info is None: info = 'data' if info.endswith('.json'): # assume it's a path info_path = info else: info_path = retro.data.get_file_path(game, info + '.json', inttype) if scenario is None: scenario = 'scenario' if scenario.endswith('.json'): # assume it's a path scenario_path = scenario else: scenario_path = retro.data.get_file_path(game, scenario + '.json', inttype) self.system = retro.get_romfile_system(rom_path) # We can't have more than one emulator per process. Before creating an # emulator, ensure that unused ones are garbage-collected gc.collect() self.em = retro.RetroEmulator(rom_path) self.em.configure_data(self.data) self.em.step() core = retro.get_system_info(self.system) self.buttons = core['buttons'] self.num_buttons = len(self.buttons) self.button_combos = self.data.valid_actions() try: assert self.data.load( info_path, scenario_path), 'Failed to load info (%s) or scenario (%s)' % ( info_path, scenario_path) except Exception: del self.em raise if use_restricted_actions == retro.Actions.DISCRETE: combos = 1 for combo in self.button_combos: combos *= len(combo) self.action_space = gym.spaces.Discrete(combos**players) elif use_restricted_actions == retro.Actions.MULTI_DISCRETE: self.action_space = gym.spaces.MultiDiscrete([ len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1) for combos in self.button_combos ] * players) else: self.action_space = gym.spaces.MultiBinary(self.num_buttons * players) kwargs = {} if gym_version >= (0, 9, 6): kwargs['dtype'] = np.uint8 if self._obs_type == retro.Observations.RAM: shape = self.get_ram().shape else: img = [self.get_screen(p) for p in range(players)] shape = img[0].shape self.observation_space = gym.spaces.Box(low=0, high=255, shape=shape, **kwargs) self.use_restricted_actions = use_restricted_actions self.movie = None self.movie_id = 0 self.movie_path = None if record is True: self.auto_record() elif record is not False: self.auto_record(record) self.seed() if gym_version < (0, 9, 6): self._seed = self.seed self._step = self.step self._reset = self.reset self._render = self.render self._close = self.close def _update_obs(self): if self._obs_type == retro.Observations.RAM: self.ram = self.get_ram() return self.ram elif self._obs_type == retro.Observations.IMAGE: self.img = self.get_screen() return self.img else: raise ValueError('Unrecognized observation type: {}'.format( self._obs_type)) def action_to_array(self, a): actions = [] for p in range(self.players): action = 0 if self.use_restricted_actions == retro.Actions.DISCRETE: for combo in self.button_combos: current = a % len(combo) a //= len(combo) action |= combo[current] elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): buttons = self.button_combos[i] action |= buttons[ap[i]] else: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): action |= int(ap[i]) << i if self.use_restricted_actions == retro.Actions.FILTERED: action = self.data.filter_action(action) ap = np.zeros([self.num_buttons], np.uint8) for i in range(self.num_buttons): ap[i] = (action >> i) & 1 actions.append(ap) return actions def step(self, a): if self.img is None and self.ram is None: raise RuntimeError('Please call env.reset() before env.step()') for p, ap in enumerate(self.action_to_array(a)): if self.movie: for i in range(self.num_buttons): self.movie.set_key(i, ap[i], p) self.em.set_button_mask(ap, p) if self.movie: self.movie.step() self.em.step() self.data.update_ram() ob = self._update_obs() rew, done, info = self.compute_step() sample = self.em.get_audio() if self.naudio_samples is not None: info['audio'] = librosa.util.fix_length(sample.T, int(self.naudio_samples)).T if self.make_video: self.audio_clip.extend(sample) if self.make_video: baseline_str = 'b-' if self.is_baseline else '' cv2.imwrite( 'video_frames/' + baseline_str + self.gamename + '-' + str(self.n) + '.png', cv2.cvtColor(ob, cv2.COLOR_RGB2BGR)) self.n += 1 return ob, rew, bool(done), dict(info) def reset(self): if self.audio_clip: if self.make_video: baseline_str = 'b-' if self.is_baseline else '' path = baseline_str + self.gamename + '_audio.wav' numpy_audio = np.asarray(self.audio_clip) wv.write(path, int(self.em.get_audio_rate()), numpy_audio) # Combine all saved frames into video cmd1 = 'ffmpeg -y -r 60 -f image2 -i video_frames/' + baseline_str + self.gamename + '-%d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p ' + baseline_str + self.gamename + '_noaudio.mp4 -hide_banner -loglevel panic' # Add audio to video os.system(cmd1) cmd = "ffmpeg -y -i " + baseline_str + self.gamename + "_noaudio.mp4 -i " + path + " -y -c:v copy -c:a aac -strict experimental -hide_banner -loglevel panic " + baseline_str + self.gamename + '-' + str( self.n) + '.mp4' os.system(cmd) os.system('rm video_frames/' + baseline_str + self.gamename + '*.png') print( 'saved video to ', baseline_str + self.gamename + '-' + str(self.n) + '.mp4') sys.exit(0) self.audio_clip = [] self.n = 0 if self.initial_state: self.em.set_state(self.initial_state) for p in range(self.players): self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p) self.em.step() if self.movie_path is not None: rel_statename = os.path.splitext(os.path.basename( self.statename))[0] self.record_movie( os.path.join( self.movie_path, '%s-%s-%06d.bk2' % (self.gamename, rel_statename, self.movie_id))) self.movie_id += 1 if self.movie: self.movie.step() self.data.reset() self.data.update_ram() return self._update_obs() def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2] def render(self, mode='human', close=False): if close: if self.viewer: self.viewer.close() return img = self.get_screen() if self.img is None else self.img if mode == "rgb_array": return img elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if hasattr(self, 'em'): del self.em def get_action_meaning(self, act): actions = [] for p, action in enumerate(self.action_to_array(act)): actions.append([ self.buttons[i] for i in np.extract(action, np.arange(len(action))) ]) if self.players == 1: return actions[0] return actions def get_ram(self): blocks = [] for offset in sorted(self.data.memory.blocks): arr = np.frombuffer(self.data.memory.blocks[offset], dtype=np.uint8) blocks.append(arr) return np.concatenate(blocks) def get_screen(self, player=0): img = self.em.get_screen() x, y, w, h = self.data.crop_info(player) if not w or x + w > img.shape[1]: w = img.shape[1] else: w += x if not h or y + h > img.shape[0]: h = img.shape[0] else: h += y if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]: return img return img[y:h, x:w] def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT): if not statename.endswith('.state'): statename += '.state' with gzip.open( retro.data.get_file_path(self.gamename, statename, inttype), 'rb') as fh: self.initial_state = fh.read() self.statename = statename def compute_step(self): if self.players > 1: reward = [self.data.current_reward(p) for p in range(self.players)] else: reward = self.data.current_reward() done = self.data.is_done() return reward, done, self.data.lookup_all() def record_movie(self, path): self.movie = retro.Movie(path, True, self.players) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state) def stop_record(self): self.movie_path = None self.movie_id = 0 if self.movie: self.movie.close() self.movie = None def auto_record(self, path=None): if not path: path = os.getcwd() self.movie_path = path
class RetroEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0 } def compute_step(self): if self.players > 1: reward = [self.data.current_reward(p) for p in range(self.players)] else: reward = self.data.current_reward() done = self.data.is_done() return reward, done, self.data.lookup_all() def record_movie(self, path): self.movie = retro.Movie(path, True, self.players) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state) def stop_record(self): self.movie_path = None self.movie_id = 0 if self.movie: self.movie.close() self.movie = None def auto_record(self, path=None): if not path: path = os.getcwd() self.movie_path = path def __init__(self, game, state=retro.State.DEFAULT, scenario=None, info=None, use_restricted_actions=retro.Actions.FILTERED, record=False, players=1, inttype=retro.data.Integrations.STABLE): if not hasattr(self, 'spec'): self.spec = None self.img = None self.viewer = None self.gamename = game self.statename = state self.initial_state = None self.players = players metadata = {} rom_path = retro.data.get_romfile_path(game, inttype) metadata_path = retro.data.get_file_path(game, 'metadata.json', inttype) if state == retro.State.NONE: self.statename = None elif state == retro.State.DEFAULT: self.statename = None try: with open(metadata_path) as f: metadata = json.load(f) if 'default_player_state' in metadata and self.players <= len( metadata['default_player_state']): self.statename = metadata['default_player_state'][ self.players - 1] elif 'default_state' in metadata: self.statename = metadata['default_state'] else: self.statename = None except (IOError, json.JSONDecodeError): pass if self.statename: if not self.statename.endswith('.state'): self.statename += '.state' with gzip.open( retro.data.get_file_path(game, self.statename, inttype), 'rb') as fh: self.initial_state = fh.read() self.data = retro.data.GameData() if info is None: info = 'data' if info.endswith('.json'): # assume it's a path info_path = info else: info_path = retro.data.get_file_path(game, info + '.json', inttype) if scenario is None: scenario = 'scenario' if scenario.endswith('.json'): # assume it's a path scenario_path = scenario else: scenario_path = retro.data.get_file_path(game, scenario + '.json', inttype) self.system = retro.get_romfile_system(rom_path) # We can't have more than one emulator per process. Before creating an # emulator, ensure that unused ones are garbage-collected gc.collect() self.em = retro.RetroEmulator(rom_path) self.em.configure_data(self.data) self.em.step() core = retro.get_system_info(self.system) self.buttons = core['buttons'] self.num_buttons = len(self.buttons) self.button_combos = self.data.valid_actions() try: assert self.data.load( info_path, scenario_path), 'Failed to load info (%s) or scenario (%s)' % ( info_path, scenario_path) except Exception: del self.em raise img = [self.get_screen(p) for p in range(players)] if use_restricted_actions == retro.Actions.DISCRETE: combos = 1 for combo in self.button_combos: combos *= len(combo) self.action_space = gym.spaces.Discrete(combos**players) elif use_restricted_actions == retro.Actions.MULTI_DISCRETE: self.action_space = gym.spaces.MultiDiscrete([ len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1) for combos in self.button_combos ] * players) else: self.action_space = gym.spaces.MultiBinary(self.num_buttons * players) kwargs = {} if gym_version >= (0, 9, 6): kwargs['dtype'] = np.uint8 self.observation_space = gym.spaces.Box(low=0, high=255, shape=img[0].shape, **kwargs) self.use_restricted_actions = use_restricted_actions self.movie = None self.movie_id = 0 self.movie_path = None if record is True: self.auto_record() elif record is not False: self.auto_record(record) self.seed() if gym_version < (0, 9, 6): self._seed = self.seed self._step = self.step self._reset = self.reset self._render = self.render self._close = self.close def action_to_array(self, a): actions = [] for p in range(self.players): action = 0 if self.use_restricted_actions == retro.Actions.DISCRETE: for combo in self.button_combos: current = a % len(combo) a //= len(combo) action |= combo[current] elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): buttons = self.button_combos[i] action |= buttons[ap[i]] else: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): action |= int(ap[i]) << i if self.use_restricted_actions == retro.Actions.FILTERED: action = self.data.filter_action(action) ap = np.zeros([self.num_buttons], np.uint8) for i in range(self.num_buttons): ap[i] = (action >> i) & 1 actions.append(ap) return actions def step(self, a): if self.img is None: raise RuntimeError('Please call env.reset() before env.step()') for p, ap in enumerate(self.action_to_array(a)): if self.movie: for i in range(self.num_buttons): self.movie.set_key(i, ap[i], p) self.em.set_button_mask(ap, p) if self.movie: self.movie.step() self.em.step() self.img = ob = self.get_screen() self.data.update_ram() rew, done, info = self.compute_step() return ob, rew, bool(done), dict(info) def reset(self): if self.initial_state: self.em.set_state(self.initial_state) for p in range(self.players): self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p) self.em.step() if self.movie_path is not None: rel_statename = os.path.splitext(os.path.basename( self.statename))[0] self.record_movie( os.path.join( self.movie_path, '%s-%s-%06d.bk2' % (self.gamename, rel_statename, self.movie_id))) self.movie_id += 1 if self.movie: self.movie.step() self.img = ob = self.get_screen() self.data.reset() self.data.update_ram() return ob def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2] def render(self, mode='human', close=False): if close: if self.viewer: self.viewer.close() return if mode == "rgb_array": return self.get_screen() if self.img is None else self.img elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(self.img) return self.viewer.isopen def close(self): if hasattr(self, 'em'): del self.em def get_action_meaning(self, act): actions = [] for p, action in enumerate(self.action_to_array(act)): actions.append([ self.buttons[i] for i in np.extract(action, np.arange(len(action))) ]) if self.players == 1: return actions[0] return actions def get_screen(self, player=0): img = self.em.get_screen() x, y, w, h = self.data.crop_info(player) if not w or x + w > img.shape[1]: w = img.shape[1] else: w += x if not h or y + h > img.shape[0]: h = img.shape[0] else: h += y if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]: return img return img[y:h, x:w]
class BaseEnv(gym.Env, ABC): """Base class for all mazelab environments. The subclass should implement at least the following: - :meth:`step` - :meth:`reset` - :meth:`get_image` """ metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 3 } def __init__(self, maze, motion): self.maze = maze self.motion = motion self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.maze.size, dtype=np.float32) self.action_space = spaces.Discrete(self.motion.size) self.viewer = None self.seed() @abstractmethod def step(self, action): pass def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @abstractmethod def reset(self): pass @abstractmethod def get_image(self): pass def render(self, mode='human', max_width=500): img = self.get_image() img = np.asarray(img).astype(np.uint8) img_height, img_width = img.shape[:2] ratio = max_width / img_width img = Image.fromarray(img).resize( [int(ratio * img_width), int(ratio * img_height)]) img = np.asarray(img) if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control.rendering import SimpleImageViewer if self.viewer is None: self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
def _env_runner( worker: "RolloutWorker", base_env: BaseEnv, extra_batch_callback: Callable[[SampleBatchType], None], policies: Dict[PolicyID, Policy], policy_mapping_fn: Callable[[AgentID], PolicyID], rollout_fragment_length: int, horizon: int, preprocessors: Dict[PolicyID, Preprocessor], obs_filters: Dict[PolicyID, Filter], clip_rewards: bool, clip_actions: bool, multiple_episodes_in_batch: bool, callbacks: "DefaultCallbacks", tf_sess: Optional["tf.Session"], perf_stats: _PerfStats, soft_horizon: bool, no_done_at_end: bool, observation_fn: "ObservationFunction", sample_collector: Optional[SampleCollector] = None, render: bool = None, ) -> Iterable[SampleBatchType]: """This implements the common experience collection logic. Args: worker (RolloutWorker): Reference to the current rollout worker. base_env (BaseEnv): Env implementing BaseEnv. extra_batch_callback (fn): function to send extra batch data to. policies (Dict[PolicyID, Policy]): Map of policy ids to Policy instances. policy_mapping_fn (func): Function that maps agent ids to policy ids. This is called when an agent first enters the environment. The agent is then "bound" to the returned policy for the episode. rollout_fragment_length (int): Number of episode steps before `SampleBatch` is yielded. Set to infinity to yield complete episodes. horizon (int): Horizon of the episode. preprocessors (dict): Map of policy id to preprocessor for the observations prior to filtering. obs_filters (dict): Map of policy id to filter used to process observations for the policy. clip_rewards (bool): Whether to clip rewards before postprocessing. multiple_episodes_in_batch (bool): Whether to pack multiple episodes into each batch. This guarantees batches will be exactly `rollout_fragment_length` in size. clip_actions (bool): Whether to clip actions to the space range. callbacks (DefaultCallbacks): User callbacks to run on episode events. tf_sess (Session|None): Optional tensorflow session to use for batching TF policy evaluations. perf_stats (_PerfStats): Record perf stats into this object. soft_horizon (bool): Calculate rewards but don't reset the environment when the horizon is hit. no_done_at_end (bool): Ignore the done=True at the end of the episode and instead record done=False. observation_fn (ObservationFunction): Optional multi-agent observation func to use for preprocessing observations. sample_collector (Optional[SampleCollector]): An optional SampleCollector object to use. render (bool): Whether to try to render the environment after each step. Yields: rollout (SampleBatch): Object containing state, action, reward, terminal condition, and other fields as dictated by `policy`. """ # May be populated with used for image rendering simple_image_viewer: Optional["SimpleImageViewer"] = None # Try to get Env's `max_episode_steps` prop. If it doesn't exist, ignore # error and continue with max_episode_steps=None. max_episode_steps = None try: max_episode_steps = base_env.get_unwrapped()[0].spec.max_episode_steps except Exception: pass # Trainer has a given `horizon` setting. if horizon: # `horizon` is larger than env's limit. if max_episode_steps and horizon > max_episode_steps: # Try to override the env's own max-step setting with our horizon. # If this won't work, throw an error. try: base_env.get_unwrapped()[0].spec.max_episode_steps = horizon base_env.get_unwrapped()[0]._max_episode_steps = horizon except Exception: raise ValueError( "Your `horizon` setting ({}) is larger than the Env's own " "timestep limit ({}), which seems to be unsettable! Try " "to increase the Env's built-in limit to be at least as " "large as your wanted `horizon`.".format( horizon, max_episode_steps)) # Otherwise, set Trainer's horizon to env's max-steps. elif max_episode_steps: horizon = max_episode_steps logger.debug( "No episode horizon specified, setting it to Env's limit ({}).". format(max_episode_steps)) # No horizon/max_episode_steps -> Episodes may be infinitely long. else: horizon = float("inf") logger.debug("No episode horizon specified, assuming inf.") # Pool of batch builders, which can be shared across episodes to pack # trajectory data. batch_builder_pool: List[MultiAgentSampleBatchBuilder] = [] def get_batch_builder(): if batch_builder_pool: return batch_builder_pool.pop() else: return None def new_episode(env_id): episode = MultiAgentEpisode(policies, policy_mapping_fn, get_batch_builder, extra_batch_callback, env_id=env_id) # Call each policy's Exploration.on_episode_start method. # type: Policy for p in policies.values(): if getattr(p, "exploration", None) is not None: p.exploration.on_episode_start(policy=p, environment=base_env, episode=episode, tf_sess=getattr( p, "_sess", None)) callbacks.on_episode_start( worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_id, ) return episode active_episodes: Dict[str, MultiAgentEpisode] = \ NewEpisodeDefaultDict(new_episode) while True: perf_stats.iters += 1 t0 = time.time() # Get observations from all ready agents. # type: MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, ... unfiltered_obs, rewards, dones, infos, off_policy_actions = \ base_env.poll() perf_stats.env_wait_time += time.time() - t0 if log_once("env_returns"): logger.info("Raw obs from env: {}".format( summarize(unfiltered_obs))) logger.info("Info return from env: {}".format(summarize(infos))) # Process observations and prepare for policy evaluation. t1 = time.time() # type: Set[EnvID], Dict[PolicyID, List[PolicyEvalData]], # List[Union[RolloutMetrics, SampleBatchType]] active_envs, to_eval, outputs = \ _process_observations( worker=worker, base_env=base_env, policies=policies, active_episodes=active_episodes, unfiltered_obs=unfiltered_obs, rewards=rewards, dones=dones, infos=infos, horizon=horizon, preprocessors=preprocessors, obs_filters=obs_filters, multiple_episodes_in_batch=multiple_episodes_in_batch, callbacks=callbacks, soft_horizon=soft_horizon, no_done_at_end=no_done_at_end, observation_fn=observation_fn, sample_collector=sample_collector, ) perf_stats.raw_obs_processing_time += time.time() - t1 for o in outputs: yield o # Do batched policy eval (accross vectorized envs). t2 = time.time() # type: Dict[PolicyID, Tuple[TensorStructType, StateBatch, dict]] eval_results = _do_policy_eval( to_eval=to_eval, policies=policies, sample_collector=sample_collector, active_episodes=active_episodes, tf_sess=tf_sess, ) perf_stats.inference_time += time.time() - t2 # Process results and update episode state. t3 = time.time() actions_to_send: Dict[EnvID, Dict[AgentID, EnvActionType]] = \ _process_policy_eval_results( to_eval=to_eval, eval_results=eval_results, active_episodes=active_episodes, active_envs=active_envs, off_policy_actions=off_policy_actions, policies=policies, clip_actions=clip_actions, ) perf_stats.action_processing_time += time.time() - t3 # Return computed actions to ready envs. We also send to envs that have # taken off-policy actions; those envs are free to ignore the action. t4 = time.time() base_env.send_actions(actions_to_send) perf_stats.env_wait_time += time.time() - t4 # Try to render the env, if required. if render: t5 = time.time() # Render can either return an RGB image (uint8 [w x h x 3] numpy # array) or take care of rendering itself (returning True). rendered = base_env.try_render() # Rendering returned an image -> Display it in a SimpleImageViewer. if isinstance(rendered, np.ndarray) and len(rendered.shape) == 3: # ImageViewer not defined yet, try to create one. if simple_image_viewer is None: try: from gym.envs.classic_control.rendering import \ SimpleImageViewer simple_image_viewer = SimpleImageViewer() except (ImportError, ModuleNotFoundError): render = False # disable rendering logger.warning( "Could not import gym.envs.classic_control." "rendering! Try `pip install gym[all]`.") if simple_image_viewer: simple_image_viewer.imshow(rendered) perf_stats.env_render_time += time.time() - t5
class N64Env(gym.Env): """ Nintendo 64 environment. We can't use the typical retro environment because n64 uses dynamic memory addresses. So we have to read and interpret the ram differently, which we handle in this class. """ metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0} def __init__(self, game, state=retro.State.DEFAULT, scenario=None, info=None, use_restricted_actions=retro.Actions.FILTERED, record=False, players=1, inttype=retro.data.Integrations.STABLE, obs_type=retro.Observations.IMAGE): if not hasattr(self, 'spec'): self.spec = None self._obs_type = obs_type self.img = None self.ram = None self.viewer = None self.gamename = game self.statename = state self.initial_state = None self.players = players if game != "SuperSmashBros-N64": raise NotImplementedError("Only ssb64 supported so far") self.ssb64_game_data = retro.data.SSB64GameData() metadata = {} rom_path = retro.data.get_romfile_path(game, inttype) metadata_path = retro.data.get_file_path(game, 'metadata.json', inttype) if state == retro.State.NONE: self.statename = None elif state == retro.State.DEFAULT: self.statename = None try: with open(metadata_path) as f: metadata = json.load(f) if 'default_player_state' in metadata and self.players <= len( metadata['default_player_state']): self.statename = metadata['default_player_state'][self.players - 1] elif 'default_state' in metadata: self.statename = metadata['default_state'] else: self.statename = None except (IOError, json.JSONDecodeError): pass if self.statename: self.load_state(self.statename, inttype) self.data = retro.data.GameData() if info is None: info = 'data' if info.endswith('.json'): # assume it's a path info_path = info else: info_path = retro.data.get_file_path(game, info + '.json', inttype) if scenario is None: scenario = 'scenario' if scenario.endswith('.json'): # assume it's a path scenario_path = scenario else: scenario_path = retro.data.get_file_path(game, scenario + '.json', inttype) self.system = retro.get_romfile_system(rom_path) # We can't have more than one emulator per process. Before creating an # emulator, ensure that unused ones are garbage-collected gc.collect() self.em = retro.RetroEmulator(rom_path) self.em.configure_data(self.data) self.em.step() core = retro.get_system_info(self.system) self.buttons = core['buttons'] self.num_buttons = len(self.buttons) try: assert self.data.load( info_path, scenario_path), 'Failed to load info (%s) or scenario (%s)' % (info_path, scenario_path) except Exception: del self.em raise self.button_combos = self.data.valid_actions() if use_restricted_actions == retro.Actions.DISCRETE: combos = 1 for combo in self.button_combos: combos *= len(combo) self.action_space = gym.spaces.Discrete(combos**players) elif use_restricted_actions == retro.Actions.MULTI_DISCRETE: self.action_space = gym.spaces.MultiDiscrete([ len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1) for combos in self.button_combos ] * players) else: self.action_space = gym.spaces.MultiBinary(self.num_buttons * players) kwargs = {} if gym_version >= (0, 9, 6): kwargs['dtype'] = np.uint8 if self._obs_type == retro.Observations.RAM: shape = self.get_ram().shape else: img = [self.get_screen(p) for p in range(players)] shape = img[0].shape self.observation_space = gym.spaces.Box(low=0, high=255, shape=shape, **kwargs) self.use_restricted_actions = use_restricted_actions self.movie = None self.movie_id = 0 self.movie_path = None if record is True: self.auto_record() elif record is not False: self.auto_record(record) self.seed() if gym_version < (0, 9, 6): self._seed = self.seed self._step = self.step self._reset = self.reset self._render = self.render self._close = self.close def _update_obs(self): self.ram = self.get_ram() self.img = self.get_screen() if self._obs_type == retro.Observations.RAM: return self.ram elif self._obs_type == retro.Observations.IMAGE: return self.img else: raise ValueError('Unrecognized observation type: {}'.format(self._obs_type)) def action_to_array(self, a): actions = [] for p in range(self.players): action = 0 if self.use_restricted_actions == retro.Actions.DISCRETE: for combo in self.button_combos: current = a % len(combo) a //= len(combo) action |= combo[current] elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE: # # Is this entire thing just totally wrong? # I think so # maybe I should submit a pull request # ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] # for i in range(len(ap)): # # I think this index should be modulo the number of button_combos? # # It definitely goes beyond the length of the list. # buttons = self.button_combos[i % len(self.button_combos)] # action |= buttons[ap[i]] num_combos = len(self.button_combos) ap = a[num_combos * p:num_combos * (p + 1)] for i in range(len(ap)): buttons = self.button_combos[i] action |= buttons[ap[i]] else: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): action |= int(ap[i]) << i if self.use_restricted_actions == retro.Actions.FILTERED: action = self.data.filter_action(action) ap = np.zeros([self.num_buttons], np.uint8) for i in range(self.num_buttons): ap[i] = (action >> i) & 1 actions.append(ap) return actions def step(self, a): if self.img is None and self.ram is None: raise RuntimeError('Please call env.reset() before env.step()') for p, ap in enumerate(self.action_to_array(a)): if self.movie: for i in range(self.num_buttons): self.movie.set_key(i, ap[i], p) self.em.set_button_mask(ap, p) if self.movie: self.movie.step() self.em.step() self.data.update_ram() ob = self._update_obs() rew, done, info = self.compute_step() return ob, rew, bool(done), dict(info) def reset(self): if self.initial_state: self.em.set_state(self.initial_state) for p in range(self.players): self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p) self.em.step() if self.movie_path is not None: rel_statename = os.path.splitext(os.path.basename(self.statename))[0] self.record_movie( os.path.join(self.movie_path, '%s-%s-%06d.bk2' % (self.gamename, rel_statename, self.movie_id))) self.movie_id += 1 if self.movie: self.movie.step() self.data.reset() self.ssb64_game_data.reset() self.data.update_ram() return self._update_obs() def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2] def render(self, mode='human', close=False): if close: if self.viewer: self.viewer.close() return img = self.get_screen() if self.img is None else self.img if mode == "rgb_array": return img elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if hasattr(self, 'em'): del self.em def get_action_meaning(self, act): actions = [] for p, action in enumerate(self.action_to_array(act)): actions.append([self.buttons[i] for i in np.extract(action, np.arange(len(action)))]) if self.players == 1: return actions[0] return actions def get_ram(self): blocks = [] for offset in sorted(self.data.memory.blocks): arr = np.frombuffer(self.data.memory.blocks[offset], dtype=np.uint8) blocks.append(arr) return np.concatenate(blocks) def get_screen(self, player=0): img = self.em.get_screen() # OpenGL returns the image flipped and I'm not sure how to fix it there. img = np.flipud(img) x, y, w, h = self.data.crop_info(player) if not w or x + w > img.shape[1]: w = img.shape[1] else: w += x if not h or y + h > img.shape[0]: h = img.shape[0] else: h += y if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]: return img return img[y:h, x:w] def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT): if not statename.endswith('.state'): statename += '.state' with gzip.open(retro.data.get_file_path(self.gamename, statename, inttype), 'rb') as fh: self.initial_state = fh.read() self.statename = statename def compute_step(self): """Specific to ssb64 for now.""" self.ssb64_game_data.update(self.ram) if self.players > 1: # Make the reward a numpy array so that certain wrappers work with it. reward = np.array([self.ssb64_game_data.current_reward(p) for p in range(self.players)]) else: reward = self.ssb64_game_data.current_reward() done = self.ssb64_game_data.is_done() return reward, done, self.ssb64_game_data.lookup_all() def record_movie(self, path): self.movie = retro.Movie(path, True, self.players) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state) def stop_record(self): self.movie_path = None self.movie_id = 0 if self.movie: self.movie.close() self.movie = None def auto_record(self, path=None): if not path: path = os.getcwd() self.movie_path = path
class ToyboxBaseEnv(AtariEnv, ABC): metadata = {'render.modes': ['human']} def __init__(self, toybox, game, frameskip=(2, 5), repeat_action_probability=0., grayscale=True, alpha=False, actions=None): assert (toybox.rstate) self.toybox = toybox # This is a workaround for issues with Gym wrappers # resetting state prematurely self.cached_state = None self.score = self.toybox.get_score() self.viewer = None # Required for compatability with OpenAI Gym's Atari wrappers self.np_random = np_random self.ale = MockALE(toybox) utils.EzPickle.__init__(self, game, 'human', frameskip, repeat_action_probability) # By default, we don't need actions passed in: if actions is None: actions = toybox.get_legal_action_set() assert (actions is not None) self._action_set = actions self._obs_type = 'image' self._rgba = 1 if grayscale else 4 if alpha else 3 self._pixel_high = 255 self._height = self.toybox.get_height() self._width = self.toybox.get_width() self._dim = (self._height, self._width, self._rgba ) # * len(self.toybox.get_state())) self.reward_range = (0, float('inf')) self.action_space = spaces.Discrete(len(self._action_set)) self.observation_space = spaces.Box(low=0, high=self._pixel_high, shape=self._dim, dtype='uint8') def seed(self, seed=None): """ This is totally the implementation in AtariEnv in openai/gym. """ self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. # Toybox takes a uint seed, but we're copying the ALE seed for reasons above. # They're unclear who checks, so being safe here. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 self.toybox.set_seed(seed2) # Start a new game to ensure that the seed gets used!. self.toybox.new_game() return [seed1, seed2] # This is required to "trick" baselines into treating us as a regular Atari game # Implementation copied from baselines def get_action_meanings(self): #return [ACTION_MEANING[i] for i in self._action_set] return list(ACTION_MEANING.values()) # From OpenAI Gym Baselines # https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py def _get_obs(self): return self.toybox.get_state() def step(self, action_index): obs = None reward = None done = False info = {} # Sometimes the action_index is a numpy integer... #print('Action index and type', action_index, type(action_index)) assert (action_index < len(self._action_set)) assert (type(self._action_set) == list) self.toybox.apply_ale_action(self._action_set[action_index]) if self.ale.game_over(): print('GAME OVER') info['cached_state'] = self.toybox.to_state_json() obs = self._get_obs() # Compute the reward from the current score and reset the current score. score = self.toybox.get_score() reward = max(score - self.score, 0) self.score = score # Check whether the episode is done # use "ale" semantics here done = self.ale.game_over() # Send back dignostic information info['lives'] = self.toybox.get_lives() #info['frame'] = frame info['score'] = 0 if done else self.score return obs, reward, done, info def reset(self): self.cached_state = self.toybox.to_state_json() self.toybox.new_game() self.score = self.toybox.get_score() obs = self._get_obs() return obs def render(self, mode='human', close=False): if mode == 'human': # the following is copied from gym's AtariEnv if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(self.toybox.get_rgb_frame()) return self.viewer.isopen elif mode == 'rgb_array': return self.toybox.get_rgb_frame() def close(self): if self.viewer is not None: self.viewer.close() del self.toybox self.toybox = None
class SuperMarioKartEnv(RetroEnv): def __init__(self, game, state=retro.State.DEFAULT, scenario=None, sprite_buffer=20, **kwargs): RetroEnv.__init__(self, game, state, scenario, **kwargs) self.map = None self.sprite_buffer = sprite_buffer # defines visual area around kart; values 5-50 probably fine # TODO: update observation space here def get_screen(self, player=0): # Check the game mode game_mode_var = self.data.get_variable("game_mode") game_mode = self.data.memory.extract(game_mode_var["address"], game_mode_var["type"]) # Get kart direction direction = self.data.memory.extract(int("0x95", 16), "|u1") # adjust the direction direction = (direction / 255) * 360 if game_mode != 28: # return np.zeros((32, 32, 3)).astype("uint8") return np.zeros((128, 128, 3)).astype("uint8") # If we're in gameplay else: # Update the base map layer if necessary if self.map is None: self.map = self.read_map() # Make a copy and update the map with player position this_map = np.copy(self.map) # Get scaled player position on map player_position_east = self.data.memory.extract(8257672, "<u2") player_position_south = self.data.memory.extract(8257676, "<u2") player_position_east_relative = math.floor((player_position_east / 4100) * 128) player_position_south_relative = math.floor((player_position_south / 4100) * 128) # Scale the RGB inputs to get a greyscale map this_map = np.floor(((this_map + np.abs(np.min(this_map))) / np.max(this_map)) * 255).astype("uint8") this_map = np.reshape(this_map, (128,128,1)) this_map = np.concatenate((this_map, this_map, this_map), axis=2) # Update the player position to be red this_map[player_position_south_relative, player_position_east_relative, 0] = 255 this_map[player_position_south_relative, player_position_east_relative, 1:2] = 0 # add enemy karts positions = self.get_cpu_kart_pos() for player_num, position_dict in positions.items(): if position_dict["east"] == player_position_east and position_dict["south"] == player_position_south: continue else: p_south_rel = math.floor(((position_dict["south"] / 4100.0) * 128)) p_east_rel = math.floor(((position_dict["east"] / 4100.0) * 128)) this_map[p_south_rel, p_east_rel,2] = 255 this_map[p_south_rel, p_east_rel, 0] = 0 this_map[p_south_rel, p_east_rel, 1] = 0 this_map = this_map.astype("uint8") # return this_map # Rotate # Pad using sprite buffer # Original image generated from RAM is 128x128 a = np.concatenate((np.zeros((128, self.sprite_buffer, 3)), this_map), axis=1) a = np.concatenate((a, np.zeros((128, self.sprite_buffer, 3))), axis=1) a = np.concatenate((a, np.zeros((self.sprite_buffer, 128+self.sprite_buffer*2, 3))), axis=0) a = np.concatenate((np.zeros((self.sprite_buffer, 128+self.sprite_buffer*2, 3)), a), axis=0) a = a.astype("uint8") # Now need to account for padding smallmap = a[player_position_south_relative:player_position_south_relative + self.sprite_buffer*2, player_position_east_relative:player_position_east_relative+self.sprite_buffer*2, :] smallmap = rotate_image(smallmap, direction).astype("uint8") # Scale back to 128x128 dimensions CNN can handle dim = (128, 128) smallbigmap = cv2.resize(smallmap, dim, interpolation=cv2.INTER_AREA) return smallbigmap def read_map(self): # This base tile contains the first spritemap byte. # We read them all into a 128x128 matrix to represent the overhead map base_tile_address = 8323072 base_physics_address = int("0xB00", 16) map = np.zeros((128,128)) for x in range(1,128): for y in range(1,128): address = base_tile_address+((x-1)+(y-1)*128)*1 tile = self.data.memory.extract(address, "|u1") # extract physics elements of each tile # physics = self.get_road_physics(self.data.memory.extract(base_physics_address+tile, "|u1")) physics = self.get_physics(self.data.memory.extract(base_physics_address+tile, "|u1")) map[x-1, y-1] = physics map = np.fliplr(map) map = np.rot90(map) return map def get_cpu_kart_pos(self): pos = {} for k in range(2,9): base = int("0xF00", 16) + int("0x100", 16) * k x = self.data.memory.extract(int("0x18", 16) + base, "<2") * 4 y = self.data.memory.extract(int("0x1C", 16) + base, "<2") * 4 pos[k] = {"east": x, "south": y} return pos def render(self, mode='human', close=False): # Mimics functionality of parent render method, but adds lowres overlay if close: if self.viewer: self.viewer.close() return # Get game and overlay screens game_img = RetroEnv.get_screen(self) game_img_shape = game_img.shape lowres_overhead = self.get_screen() lowres_shape = lowres_overhead.shape # Extend the image actual_game_image = np.concatenate((game_img,np.zeros((game_img_shape[0], lowres_shape[1], 3))), axis=1) actual_game_image[0:lowres_shape[0], game_img_shape[1]:game_img_shape[1] + lowres_shape[1], :] = lowres_overhead actual_game_image = actual_game_image.astype("uint8") # Scale scale_percent = 400 width = int(actual_game_image.shape[1] * scale_percent / 100) height = int(actual_game_image.shape[0] * scale_percent / 100) dim = (width, height) # resize image actual_game_image = cv2.resize(actual_game_image, dim, interpolation=cv2.INTER_AREA) if mode == "rgb_array": return actual_game_image elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer(maxwidth=width) self.viewer.imshow(actual_game_image) return self.viewer.isopen def get_road_physics(self, physics): if physics == int("0x40", 16): # --road return 1 elif physics == int("0x46",16): # --dirt road return 1 elif physics == int("0x42",16): # --ghost road return 1 elif physics == int("0x4E",16): # --light ghost road return 1 elif physics == int("0x50",16): # --wood bridge return 1 elif physics == int("0x1E",16): # --starting line return 1 elif physics == int("0x44",16): # --castle road return 1 elif physics == int("0x16",16): # --speed boost return 2 elif physics == int("0x10",16): # --jump pad return 1.5 elif physics == int("0x4C",16): # --choco road return 1 elif physics == int("0x4A",16): # --sand road return 1 else: return 0 def get_physics(self, physics): if physics == int("0x54",16): # --dirt return 0 elif physics == int("0x5A",16): # --lily pads/grass return 0 elif physics == int("0x5C",16): # --shallow water return 0 elif physics == int("0x58",16): # --snow return 0 elif physics == int("0x56",16): # --chocodirt return -0.5 elif physics == int("0x40",16): # --road return 1 elif physics == int("0x46",16): # --dirt road return 0.75 elif physics == int("0x52",16): # --loose dirt return 0.5 elif physics == int("0x42",16): # --ghost road return 1 elif physics == int("0x10",16): # --jump pad return 1.5 elif physics == int("0x4E",16): # --light ghost road return 1 elif physics == int("0x50",16): # --wood bridge return 1 elif physics == int("0x1E",16): # --starting line return 1 elif physics == int("0x44",16): # --castle road return 1 elif physics == int("0x16",16): # --speed boost return 2 elif physics == int("0x80",16): # --wall return -1.5 elif physics == int("0x26",16): # --oob grass return -1.5 elif physics == int("0x22",16): # --deep water return -1 elif physics == int("0x20",16): # --pit return -2 elif physics == int("0x82",16): # --ghost house border return -1.5 elif physics == int("0x24",16): # --lava return -2 elif physics == int("0x4C",16): # --choco road return 1 elif physics == int("0x12",16): # --choco bump return 0.75 elif physics == int("0x1C",16): # --choco bump return 0.75 elif physics == int("0x5E",16): # --mud return 0.5 elif physics == int("0x48",16): # --wet sand return 0.75 elif physics == int("0x4A",16): # --sand road return 1 elif physics == int("0x84",16): # --ice blocks return -1.5 elif physics == int("0x28",16): # --unsure return -1 elif physics == int("0x14",16): # --? box return 1.5 elif physics == int("0x1A",16): # --coin return 1.25 elif physics == int("0x18",16): # --oil spill return -0.75 else: raise(Exception("Unknown physics: {}".format(physics)))
class MultiGridEnv(gym.Env): def __init__( self, agents, grid_size=None, width=None, height=None, max_steps=100, see_through_walls=False, done_condition=None, seed=1337, ): if grid_size is not None: assert width == None and height == None width, height = grid_size, grid_size if done_condition is not None and done_condition not in ("any", "all"): raise ValueError("done_condition must be one of ['any', 'all', None].") self.done_condition = done_condition self.num_agents = len(agents) self.agents = agents self.action_space = gym.spaces.Tuple( tuple(gym.spaces.Discrete(len(agent.actions)) for agent in self.agents) ) self.observation_space = gym.spaces.Tuple( tuple( gym.spaces.Box( low=0, high=255, shape=(agent.view_size, agent.view_size, 3), dtype="uint8", ) for agent in self.agents ) ) self.reward_range = [(0, 1) for _ in range(len(self.agents))] self.window = None self.width = width self.height = height self.max_steps = max_steps self.see_through_walls = see_through_walls self.seed(seed=seed) self.reset() def seed(self, seed=1337): # Seed the random number generator self.np_random, _ = gym.utils.seeding.np_random(seed) return [seed] def _rand_int(self, low, high): """ Generate random integer in [low,high[ """ return self.np_random.randint(low, high) def _rand_float(self, low, high): """ Generate random float in [low,high[ """ return self.np_random.uniform(low, high) def _rand_bool(self): """ Generate random boolean value """ return self.np_random.randint(0, 2) == 0 def _rand_elem(self, iterable): """ Pick a random element in a list """ lst = list(iterable) idx = self._rand_int(0, len(lst)) return lst[idx] def reset(self): for agent in self.agents: agent.reset() self._gen_grid(self.width, self.height) for agent in self.agents: # Make sure _gen_grid initialized agent positions assert (agent.pos is not None) and (agent.dir is not None) # Make sure the agent doesn't overlap with an object start_cell = self.grid.get(*agent.pos) # assert start_cell is None or start_cell.can_overlap() assert start_cell is agent self.step_count = 0 obs = self.gen_obs() return obs def gen_obs_grid(self, agent): topX, topY, botX, botY = agent.get_view_exts() grid = self.grid.slice( topX, topY, agent.view_size, agent.view_size, rot_k=agent.dir + 1 ) # Process occluders and visibility # Note that this incurs some performance cost if not self.see_through_walls: vis_mask = grid.process_vis( agent_pos=(agent.view_size // 2, agent.view_size - 1) ) else: vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool) return grid, vis_mask def gen_agent_obs(self, agent): grid, vis_mask = self.gen_obs_grid(agent) return grid.render(tile_size=agent.view_tile_size) # ,highlight_mask=~vis_mask) def gen_obs(self): """ Generate the agent's view (partially observable, low-resolution encoding) """ # obs_list = [] # for agent in self.agents: # grid, vis_mask = self.gen_obs_grid(agent) # obs_list.append({ # 'image': grid.encode(vis_mask), # 'direction': agent.dir, # 'mission': agent.mission # }) return [self.gen_agent_obs(agent) for agent in self.agents] # return obs_list # def get_obs_render(self, obs, agent, tile_size=TILE_PIXELS//2): # grid, vis_mask = MultiGrid.decode(obs) def __str__(self): return self.grid.__str__() def step(self, actions): assert len(actions) == len(self.agents) rewards = np.zeros((len(self.agents,)), dtype=np.float) self.step_count += 1 wasteds = [] for agent_no, (agent, action) in enumerate(zip(self.agents, actions)): wasted = False if agent.active: cur_pos = agent.pos cur_cell = self.grid.get(*cur_pos) fwd_pos = agent.front_pos fwd_cell = self.grid.get(*fwd_pos) # Rotate left if action == agent.actions.left: agent.dir = (agent.dir - 1) % 4 # Rotate right elif action == agent.actions.right: agent.dir = (agent.dir + 1) % 4 # Move forward elif action == agent.actions.forward: # Under these conditions, the agent can move forward. if (fwd_cell is None) or fwd_cell.can_overlap(): # Move the agent to the forward cell agent.pos = fwd_pos if fwd_cell is None: self.grid.set(*fwd_pos, agent) elif fwd_cell.can_overlap(): fwd_cell.agent = agent if cur_cell == agent: self.grid.set(*cur_pos, None) else: cur_cell.agent = None else: wasted = True if isinstance(fwd_cell, Goal): # No extra wasting logic rewards[agent_no] += fwd_cell.reward agent.done = True fwd_cell.agent = None if isinstance(fwd_cell, Lava): agent.done = True # Pick up an object elif action == agent.actions.pickup: if fwd_cell and fwd_cell.can_pickup(): if agent.carrying is None: agent.carrying = fwd_cell agent.carrying.cur_pos = np.array([-1, -1]) self.grid.set(*fwd_pos, None) else: wasted = True # Drop an object elif action == agent.actions.drop: if not fwd_cell and agent.carrying: self.grid.set(*fwd_pos, agent.carrying) agent.carrying.cur_pos = fwd_pos agent.carrying = None else: wasted = True # Toggle/activate an object elif action == agent.actions.toggle: if fwd_cell: wasted = bool(fwd_cell.toggle(agent, fwd_pos)) else: wasted = True # Done action (not used by default) elif action == agent.actions.done: # dones[agent_no] = True wasted = True else: raise ValueError(f"Environment can't handle action {action}.") wasteds.append(wasted) done = np.array([agent.done for agent in self.agents], dtype=np.bool) if self.step_count >= self.max_steps: done[:] = True if self.done_condition is None: pass elif self.done_condition == "any": done = any(done) elif self.done_condition == "all": done = all(done) obs = [self.gen_agent_obs(agent) for agent in self.agents] wasteds = np.array(wasteds, dtype=np.bool) return obs, rewards, done, wasteds @property def agent_positions(self): return [ tuple(agent.pos) if agent.pos is not None else None for agent in self.agents ] def place_obj(self, obj, top=None, size=None, reject_fn=None, max_tries=math.inf): max_tries = int(max(1, min(max_tries, 1e5))) if top is None: top = (0, 0) else: top = (max(top[0], 0), max(top[1], 0)) if size is None: size = (self.grid.width, self.grid.height) agent_positions = self.agent_positions for try_no in range(max_tries): pos = ( self._rand_int(top[0], min(top[0] + size[0], self.grid.width)), self._rand_int(top[1], min(top[1] + size[1], self.grid.height)), ) if ( (self.grid.get(*pos) is None) and (pos not in agent_positions) and (reject_fn is None or (not reject_fn(pos))) ): break else: raise RecursionError("Rejection sampling failed in place_obj.") self.grid.set(*pos, obj) if obj is not None: obj.init_pos = pos obj.cur_pos = pos return pos def put_obj(self, obj, i, j): """ Put an object at a specific position in the grid """ self.grid.set(i, j, obj) obj.init_pos = (i, j) obj.cur_pos = (i, j) def place_agent(self, agent, top=None, size=None, rand_dir=True, max_tries=100): agent.pos = self.place_obj(agent, top=top, size=size, max_tries=max_tries) if rand_dir: agent.dir = self._rand_int(0, 4) return agent def place_agents(self, top=None, size=None, rand_dir=True, max_tries=100): for agent in self.agents: self.place_agent( agent, top=top, size=size, rand_dir=rand_dir, max_tries=max_tries ) if hasattr(self, "mission"): agent.mission = self.mission def render( self, mode="human", close=False, highlight=True, tile_size=TILE_PIXELS, show_agent_views=True, max_agents_per_col=3, ): """ Render the whole-grid human view """ if close: if self.window: self.window.close() return if mode == "human" and not self.window: from gym.envs.classic_control.rendering import SimpleImageViewer self.window = SimpleImageViewer() # self.window.show(block=False) # Compute which cells are visible to the agent highlight_mask = np.full((self.width, self.height), False, dtype=np.bool) for agent in self.agents: xlow, ylow, xhigh, yhigh = agent.get_view_exts() if agent.active: highlight_mask[ max(0, xlow) : min(self.grid.width, xhigh), max(0, ylow) : min(self.grid.height, yhigh), ] = True # Render the whole grid img = self.grid.render( tile_size, highlight_mask=highlight_mask if highlight else None ) rescale = lambda X, rescale_factor=2: np.kron( X, np.ones((rescale_factor, rescale_factor, 1)) ) if show_agent_views: agent_no = 0 cols = [] rescale_factor = None for col_no in range(len(self.agents) // (max_agents_per_col + 1) + 1): col_count = min(max_agents_per_col, len(self.agents) - agent_no) views = [] for row_no in range(col_count): tmp = self.gen_agent_obs(self.agents[agent_no]) if rescale_factor is None: rescale_factor = img.shape[0] // ( min(3, col_count) * tmp.shape[1] ) views.append(rescale(tmp, rescale_factor)) agent_no += 1 col_width = max([v.shape[1] for v in views]) img_col = np.zeros((img.shape[0], col_width, 3), dtype=np.uint8) for k, view in enumerate(views): start_x = (k * img.shape[0]) // len(views) start_y = 0 # (k*img.shape[1])//len(views) dx, dy = view.shape[:2] img_col[start_x : start_x + dx, start_y : start_y + dy, :] = view cols.append(img_col) img = np.concatenate((img, *cols), axis=1) if mode == "human": self.window.imshow(img) return img
class PyColabEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], } def __init__(self, max_iterations, obs_type, default_reward, action_space, act_null_value=4, delay=30, resize_scale=8, crop_window=[5, 5]): """Create an `PyColabEnv` adapter to a `pycolab` game as a `gym.Env`. You can access the `pycolab.Engine` instance with `env.current_game`. Args: max_iterations: maximum number of steps. obs_type: type of observation to return. default_reward: default reward if reward is None returned by the `pycolab` game. action_space: the action `Space` of the environment. delay: renderer delay. resize_scale: number of pixels per observation pixel. Used only by the renderer. crop_window: dimensions of observation cropping. """ assert max_iterations > 0 assert isinstance(default_reward, numbers.Number) self._max_iterations = max_iterations self._default_reward = default_reward # At this point, the game would only want to access the random # property, although it is set to None initially. self.np_random = None self._colors = self.make_colors() test_game = self.make_game() test_game.the_plot.info = {} observations, _, _ = test_game.its_showtime() layers = list(observations.layers.keys()) not_ordered = list(set(layers) - set(test_game.z_order)) self._render_order = list(reversed(not_ordered + test_game.z_order)) # Create the observation space. self.obs_type = obs_type if self.obs_type == 'mask': self.observation_space = spaces.Box( 0., 1., [len(self.state_layer_chars)] + crop_window) # don't count empty space layer elif self.obs_type == 'rgb': self.observation_space = spaces.Box( 0., 255., [crop_window[0] * resize_scale, crop_window[1] * resize_scale ] + [3]) self.action_space = action_space self.act_null_value = act_null_value self.current_game = None self._croppers = [] self._state = None self._last_uncropped_observations = None self._empty_uncropped_board = None self._last_cropped_observations = None self._empty_cropped_board = None self._last_reward = None self._game_over = False self.viewer = None self.resize_scale = resize_scale self.delay = delay # Metrics self.visitation_frequency = {char: 0 for char in self.objects} self.first_visit_time = {char: 500 for char in self.objects} # Heatmaps self.episodes = 0 # number of episodes run (to determine when to save heatmaps) self.heatmap_save_freq = 3 # save heatmaps every 3 episodes self.heatmap = np.ones( (5, 5)) # stores counts each episode (5x5 is a placeholder) def pycolab_init(self, logdir, log_heatmaps): self.log_heatmaps = log_heatmaps root_path = os.path.abspath(__file__).split('/')[1:] root_path = root_path[:root_path.index('curiosity_baselines') + 1] self.heatmap_path = '/' + '/'.join(root_path) + '/' + '/'.join( logdir.split('/')[1:]) + '/heatmaps' if os.path.isdir(self.heatmap_path) == False and log_heatmaps == True: os.makedirs(self.heatmap_path) @abc.abstractmethod def make_game(self): """Function that creates a new pycolab game. Returns: pycolab.Engine. """ pass def make_colors(self): """Functions that returns colors. Returns: Dictionary mapping key name to `tuple(R, G, B)`. """ return { 'P': (255., 255., 255.), 'a': (175., 255., 15.), 'b': (21., 0., 255.), 'c': (250., 0., 129.), 'd': (0., 250., 71.), 'e': (255., 0., 0.), 'f': (252., 28., 3.), 'g': (136., 3., 252.), 'h': (20., 145., 60.), '#': (61., 61., 61.), '@': (255., 255., 0.), ' ': (0., 0., 0.) } def _paint_board(self, layers, cropped=False): """Method to privately paint layers to RGB. Args: layers: a dictionary mapping a character to the respective curtain. cropped: whether or not this is being called to paint cropped or uncropped images. Returns: 3D np.array (np.uint32) representing the RGB of the observation layers. """ if not cropped: board_shape = self._last_uncropped_observations.board.shape else: board_shape = self._last_cropped_observations.board.shape board = np.zeros(list(board_shape) + [3], np.uint32) board_mask = np.zeros(list(board_shape) + [3], np.bool) for key in self._render_order: color = self._colors.get(key, (0, 0, 0)) color = np.reshape(color, [1, 1, -1]).astype(np.uint32) # Broadcast the layer to [H, W, C]. board_layer_mask = np.array(layers[key])[..., None] board_layer_mask = np.repeat(board_layer_mask, 3, axis=-1) # Update the board with the new layer. board = np.where(np.logical_not(board_mask), board_layer_mask * color, board) # Update the mask. board_mask = np.logical_or(board_layer_mask, board_mask) return board def _update_for_game_step(self, observations, reward): """Update internal state with data from an environment interaction.""" # disentangled one hot state if self.obs_type == 'mask': self._state = [] for char in self.state_layer_chars: if char != ' ': mask = observations.layers[char].astype(float) if char in self.objects and 1. in mask: self.visitation_frequency[char] += 1 self._state.append(mask) self._state = np.array(self._state) elif self.obs_type == 'rgb': rgb_img = self._paint_board(observations.layers, cropped=True).astype(float) self._state = self.resize(rgb_img) for char in self.state_layer_chars: if char != ' ': mask = observations.layers[char].astype(float) if char in self.objects and 1. in mask: self.visitation_frequency[char] += 1 # update heatmap metric if self.log_heatmaps == True: pr, pc = self.current_game.things['P'].position self.heatmap[pr, pc] += 1 self._last_reward = reward if reward is not None else \ self._default_reward self._game_over = self.current_game.game_over if self.current_game.the_plot.frame >= self._max_iterations: self._game_over = True def reset(self): """Start a new episode.""" self.current_game = self.make_game() for cropper in self._croppers: cropper.set_engine(self.current_game) self._colors = self.make_colors() self.current_game.the_plot.info = {} self._game_over = None self._last_observations = None self._last_reward = None observations, reward, _ = self.current_game.its_showtime() self._last_uncropped_observations = observations self._empty_uncropped_board = np.zeros_like( self._last_uncropped_observations.board) if len(self._croppers) > 0: observations = [ cropper.crop(observations) for cropper in self._croppers ][0] self._last_cropped_observations = observations self._empty_cropped_board = np.zeros_like( self._last_cropped_observations) # save and reset metrics self.visitation_frequency = {char: 0 for char in self.objects} if self.log_heatmaps == True and self.episodes % self.heatmap_save_freq == 0: np.save('{}/{}.npy'.format(self.heatmap_path, self.episodes), self.heatmap) heatmap_normed = self.heatmap / np.linalg.norm(self.heatmap) plt.imsave('{}/{}.png'.format(self.heatmap_path, self.episodes), heatmap_normed, cmap='afmhot', vmin=0.0, vmax=1.0) self.episodes += 1 self.heatmap = np.zeros(self._last_uncropped_observations.board.shape) # run update self._update_for_game_step(observations, reward) return self._state def step(self, action): """Apply action, step the world forward, and return observations. Args: action: the desired action to apply to the environment. Returns: state, reward, done, info. """ if self.current_game is None: logger.warn("Episode has already ended, call `reset` instead..") self._state = None reward = self._last_reward done = self._game_over return self._state, reward, done, {} # Execute the action in pycolab. self.current_game.the_plot.info = {} observations, reward, _ = self.current_game.play(action) self._last_uncropped_observations = observations self._empty_uncropped_board = np.zeros_like( self._last_uncropped_observations.board) # Crop and update if len(self._croppers) > 0: observations = [ cropper.crop(observations) for cropper in self._croppers ][0] self._last_cropped_observations = observations self._empty_cropped_board = np.zeros_like( self._last_cropped_observations.board) self._update_for_game_step(observations, reward) info = self.current_game.the_plot.info # Add custom metrics info['visitation_frequency'] = self.visitation_frequency info['first_time_visit'] = self.first_visit_time # Check the current status of the game. reward = self._last_reward done = self._game_over if self._game_over: self.current_game = None return self._state, reward, done, info def render(self, mode='rgb_array', close=False): """Render the board to an image viewer or an np.array. Args: mode: One of the following modes: - 'human': render to an image viewer. - 'rgb_array': render to an RGB np.array (np.uint8) Returns: 3D np.array (np.uint8) or a `viewer.isopen`. """ img = self._empty_uncropped_board if self._last_uncropped_observations: img = self._last_uncropped_observations.board layers = self._last_uncropped_observations.layers if self._colors: img = self._paint_board(layers, cropped=False) else: assert img is not None, '`board` must not be `None`.' img = self.resize(img) if mode == 'rgb_array': return img elif mode == 'human': if self.viewer is None: from gym.envs.classic_control.rendering import ( SimpleImageViewer) self.viewer = SimpleImageViewer() self.viewer.imshow(img) time.sleep(self.delay / 1e3) return self.viewer.isopen def resize(self, img): img = _repeat_axes(img, self.resize_scale, axis=[0, 1]) if len(img.shape) != 3: img = np.repeat(img[..., None], 3, axis=-1) return img.astype(np.uint8) def seed(self, seed=None): """Seeds the environment. Args: seed: seed of the random engine. Returns: [seed]. """ self.np_random, seed = seeding.np_random(seed) return [seed] def close(self): """Tears down the renderer.""" if self.viewer: self.viewer.close() self.viewer = None
class KrazyGridWorld: def __init__(self, screen_height, grid_squares_per_row=10, one_hot_obs=True, seed=42, task_seed=None, init_pos_seed=None, death_square_percentage=0.1, ice_sq_perc=0.05, num_goals=3, min_goal_distance=2, max_goal_distance=np.inf, num_steps_before_energy_needed=11, energy_replenish=8, energy_sq_perc=0.05, num_transporters=1, sparse_rewards=True, image_obs=True, use_local_obs=False): if task_seed is None: task_seed = seed if init_pos_seed is None: init_pos_seed = seed self.init_pos_rng = np.random.RandomState(init_pos_seed) self.task_rng = np.random.RandomState(task_seed) random.seed(task_seed) self.one_hot_obs = one_hot_obs self.image_obs = image_obs self.use_local_obs = use_local_obs self.screen_dim = (screen_height, screen_height) # width and height self.tile_types = TileTypes() self.agent = Agent( num_steps_until_energy_needed=num_steps_before_energy_needed, energy_replenish=energy_replenish) self.game_grid = GameGrid(grid_squares_per_row=grid_squares_per_row, tile_types=self.tile_types, agent=self.agent, task_rng=self.task_rng, death_sq_perc=death_square_percentage, energy_sq_perc=energy_sq_perc, ice_sq_perc=ice_sq_perc, num_goals=num_goals, min_goal_distance=min_goal_distance, max_goal_distance=max_goal_distance, num_transporters=num_transporters) self.num_goals_obtained = 0 self.sparse_reward = sparse_rewards self.reset_task() self.simple_image_viewer = None self.last_im_obs = None def reset(self, reset_agent_start_pos=False, reset_board=False, reset_colors=False, reset_dynamics=False): self.agent.dead = False self.agent.agent_position = copy.deepcopy( self.agent.agent_position_init) self.agent.num_steps_until_energy_needed = copy.deepcopy( self.agent.energy_init) self.num_goals_obtained = 0 self.game_grid.grid_np = copy.deepcopy(self.game_grid.game_grid_init) if reset_colors: self.tile_types.reset_colors() if reset_dynamics: self.agent.change_dynamics() if reset_board: self.reset_task() if reset_agent_start_pos: self.reset_agent_start_position() return self.get_obs() def reset_task(self): # reset the entire board and agent start position, generating a new MDP. self.game_grid.get_new_game_grid() self.reset_agent_start_position() def reset_agent_start_position(self): # keep the previous board but update the agents starting position. # keeps the previous MDP but samples x_0. new_start = self.game_grid.get_one_non_agent_square() self.agent.agent_position = new_start self.agent.agent_position_init = new_start def get_obs(self): if self.image_obs: return self.get_img_obs() else: return None def step(self, a, render=False): if self.agent.dead is False: proposed_step = self.agent.try_step(a) if self.game_grid.is_position_legal(proposed_step): self.agent.agent_position = proposed_step self.check_dead() self.check_at_goal() self.check_at_energy() self.check_at_transporter() # this shit handles the ice squares while True: if self.check_at_ice_square() is False: break else: # don't take energy for going over ice. self.agent.num_steps_until_energy_needed += 1 proposed_step_nu = self.agent.try_step(a) if self.game_grid.is_position_legal(proposed_step_nu): self.step(a) else: break if self.agent.num_steps_until_energy_needed < 1: self.agent.dead = True if render: self.render() return self.get_obs(), self.get_reward(), self.agent.dead, dict() def check_dead(self): agent_pos = self.agent.agent_position game_grid = self.game_grid.grid_np if game_grid[agent_pos[0], agent_pos[1]] == self.tile_types.death: self.agent.dead = True def check_at_goal(self): if self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] == self.tile_types.goal: self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] = self.tile_types.normal self.num_goals_obtained += 1 def check_at_energy(self): if self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] == self.tile_types.energy: self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] = self.tile_types.normal self.agent.give_energy() def check_at_transporter(self): transport_sq = None if self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] == self.tile_types.transporter: for tr in self.game_grid.transporters: if self.agent.agent_position[0] == tr[0][ 0] and self.agent.agent_position[1] == tr[0][1]: transport_sq = tr[1] elif self.agent.agent_position[0] == tr[1][ 0] and self.agent.agent_position[1] == tr[1][1]: transport_sq = tr[0] if transport_sq is not None: self.agent.agent_position = [transport_sq[0], transport_sq[1]] def check_at_ice_square(self): if self.game_grid.grid_np[ self.agent.agent_position[0], self.agent.agent_position[1]] == self.tile_types.ice: return True return False def render(self): if self.simple_image_viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.simple_image_viewer = SimpleImageViewer() im_obs = self.get_img_obs() self.simple_image_viewer.imshow(im_obs) time.sleep(0.075) def get_state_obs(self): grid_np = copy.deepcopy(self.game_grid.grid_np) agent_p = self.agent.agent_position grid_np[agent_p[0], agent_p[1]] = self.tile_types.agent grid_np = grid_np.astype(np.uint8) #agent_p = np.array(self.agent.agent_position) if self.one_hot_obs: n_values = np.max(grid_np) + 1 grid_np = np.eye(n_values)[grid_np] #agent_p_temp = np.zeros((self.game_grid.grid_squares_per_row, self.game_grid.grid_squares_per_row, 1)) #agent_p_temp[agent_p[0], agent_p[1], :] = 1 if self.use_local_obs: neighbors = [] x, y = self.agent.agent_position for _i, _j in [(-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0)]: i, j = (_i + x, _j + y) if 0 <= i < self.game_grid.grid_squares_per_row and 0 <= j < self.game_grid.grid_squares_per_row: neighbors.append([j, i]) else: neighbors.append(None) grid_np = np.array(neighbors) return grid_np.flatten() def get_img_obs(self): grid_np = copy.deepcopy(self.game_grid.grid_np) grid_np[self.agent.agent_position[0], self.agent.agent_position[1]] = self.tile_types.agent fake_img = np.zeros((self.game_grid.grid_squares_per_row, self.game_grid.grid_squares_per_row, 3)) for i in range(len(self.tile_types.all_tt)): is_grid_sq_color_i = grid_np == self.tile_types.all_tt[i] one_idxs = is_grid_sq_color_i.astype(int) one_idxs = np.tile(np.expand_dims(one_idxs, -1), 3) one_idxs = one_idxs * np.array(self.tile_types.colors[i].value) fake_img += one_idxs if self.use_local_obs: neighbors = [] x, y = self.agent.agent_position valid_idxs = np.zeros_like(fake_img) valid_idxs[x, y] = 1.0 for _i, _j in [(-1, -1), (0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0)]: i, j = (_i + x, _j + y) if 0 <= i < self.game_grid.grid_squares_per_row and 0 <= j < self.game_grid.grid_squares_per_row: #neighbors.append([j, i]) valid_idxs[i, j] = 1.0 else: neighbors.append(None) fake_img *= valid_idxs res = cv2.resize(fake_img, dsize=(256, 256), interpolation=cv2.INTER_NEAREST) res = res.astype(np.uint8) return res def get_reward(self): if self.sparse_reward: return 0 + self.num_goals_obtained else: rew = 0 for goal in self.game_grid.goal_squares: dist_1 = abs(goal[0] - self.agent.agent_position[0]) dist_2 = abs(goal[1] - self.agent.agent_position[1]) rew = rew + dist_1 + dist_2 rew = -1.0 * rew rew = rew + 3.0 * self.num_goals_obtained return rew def close(self): self.simple_image_viewer.close()
class PyColabEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], } def __init__(self, max_iterations, obs_type, default_reward, action_space, act_null_value=4, delay=30, resize_scale=8, crop_window=[5, 5], visitable_states=0, color_palette=0, reward_switch=[], reward_config=dict(), switch_perturbations=[], dimensions=(19, 19)): """Create an `PyColabEnv` adapter to a `pycolab` game as a `gym.Env`. You can access the `pycolab.Engine` instance with `env.current_game`. Args: max_iterations: maximum number of steps. obs_type: type of observation to return. default_reward: default reward if reward is None returned by the `pycolab` game. action_space: the action `Space` of the environment. delay: renderer delay. resize_scale: number of pixels per observation pixel. Used only by the renderer. crop_window: dimensions of observation cropping. visitable_states: number of states the agent can visit. color_palette: which color palette to use for objects. reward_switch: list of objects or coords if the reward function switches. reward_config: list of objects and their associated rewards. switch_perturbations: color perturbations if a background switch is applied. dimensions: dimensions of the game board """ assert max_iterations > 0 assert isinstance(default_reward, numbers.Number) self._max_iterations = max_iterations # Reward specs self._default_reward = default_reward self._switch = 0 self._reward_switch = reward_switch self._reward_target = None self._switch_perturbations = switch_perturbations self._reward_config = reward_config # At this point, the game would only want to access the random # property, although it is set to None initially. self.np_random = None self._color_palette = color_palette self._colors = self.make_colors() test_game = self.make_game(reward_config=self._reward_config) test_game.the_plot.info = {} observations, _, _ = test_game.its_showtime() layers = list(observations.layers.keys()) not_ordered = list(set(layers) - set(test_game.z_order)) self._render_order = list(reversed(not_ordered + test_game.z_order)) # Prepare observation space. self.obs_type = obs_type self.height, self.width = dimensions self.crop_window = crop_window self.action_space = action_space if self.obs_type == 'mask': self.observation_space = spaces.Box( 0., 1., [len(self.state_layer_chars)] + self.crop_window) # don't count empty space layer elif self.obs_type == 'rgb': self.observation_space = spaces.Box( 0., 255., [self.crop_window[0] * 17, self.crop_window[1] * 17] + [3]) elif self.obs_type == 'rgb_full': if 84 % self.width == 0: self.observation_space = spaces.Box(0., 255., [84, 84] + [3]) else: self.observation_space = spaces.Box(0., 255., [85, 85] + [3]) self.act_null_value = act_null_value self.visitable_states = visitable_states self.current_game = None self._croppers = [] self._state = None self._last_uncropped_observations = None self._empty_uncropped_board = None self._last_cropped_observations = None self._empty_cropped_board = None self._last_reward = None self._game_over = False self.viewer = None self.delay = delay # Metrics self.visitation_frequency = {char: 0 for char in self.objects} self.first_visit_time = {char: 500 for char in self.objects} self.visitation_entropy = 0 self.num_obj_eps = {char: 0 for char in self.objects} self.coverage = 0 def heatmap_init(self, logdir, log_heatmaps): self.episodes = 0 # number of episodes run (to determine when to save heatmaps) self.heatmap_save_freq = 3 # save heatmaps every 3 episodes self.heatmap = np.zeros( (5, 5)) # stores counts each episode (5x5 is a placeholder) self.log_heatmaps = log_heatmaps root_path = os.path.abspath(__file__).split('/')[1:] root_path = root_path[:root_path.index('curiosity_baselines') + 1] self.heatmap_path = '/' + '/'.join(root_path) + '/' + '/'.join( logdir.split('/')[1:]) + '/heatmaps' self.startup = True if os.path.isdir(self.heatmap_path) == False and log_heatmaps == True: os.makedirs(self.heatmap_path) elif os.path.isdir(self.heatmap_path) == True: heatmaps = os.listdir(self.heatmap_path) if len(heatmaps) != 0: sorted_images = sorted(heatmaps, key=lambda img: int(img.split('.')[0])) last_episode = int(sorted_images[-1].split('.')[0]) self.episodes = last_episode def obs_init(self, resize_scale): self.resize_scale = resize_scale @abc.abstractmethod def make_game(self): """Function that creates a new pycolab game. Returns: pycolab.Engine. """ pass def make_colors(self): """Functions that returns colors. Returns: Dictionary mapping key name to `tuple(R, G, B)`. """ if self._color_palette == 0: return { 'P': (255., 255., 255.), 'a': (175., 255., 15.), 'b': (21., 0., 255.), 'c': (255., 0., 0.), 'd': (19., 139., 67.), 'e': (250., 0., 129.), 'f': (114., 206., 227.), 'g': (136., 3., 252.), 'h': (245., 119., 34.), '#': (61., 61., 61.), '@': (90., 90., 90.), ' ': (0., 0., 0.), '.': (110., 35., 35.) } elif self._color_palette == 1: return { 'P': (255., 255., 255.), 'a': (136., 3., 252.), 'b': (21., 0., 255.), 'c': (255., 0., 0.), 'd': (19., 139., 67.), 'e': (150., 0., 129.), '#': (61., 61., 61.), '@': (90., 90., 90.), ' ': (0., 0., 0.), '.': (110., 35., 35.) } elif self._color_palette == 2: return { 'P': (255., 255., 255.), 'a': (255., 0., 0.), 'b': (255., 0., 0.), 'c': (255., 0., 0.), 'd': (255., 0., 0.), 'e': (255., 0., 0.), 'f': (255., 0., 0.), 'g': (255., 0., 0.), 'h': (255., 0., 0.), '#': (61., 61., 61.), '@': (90., 90., 90.), ' ': (0., 0., 0.), '.': (110., 35., 35.) } elif self._color_palette == 3: return { 'P': (255., 255., 255.), 'a': (30., 60., 90.), 'b': (90., 60., 30.), 'c': (90., 30., 60.), 'd': (10., 100., 70.), 'e': (10., 10., 160.), 'f': (25., 130., 25.), 'g': (50., 40., 90.), 'h': (130., 25., 25.), '#': (61., 61., 61.), '@': (90., 90., 90.), ' ': (0., 0., 0.), '.': (110., 35., 35.) } def _check_visit(self, char): """Private method to check if the player has visited "char". A visit is when the character is within the 5x5 tile window around the player. """ pr, pc = self.current_game.things['P'].position cr, cc = self.current_game.things[char].position if (pr - 2) <= cr <= (pr + 2) and (pc - 2) <= cc <= (pc + 2): return True return False def _paint_board(self, layers, cropped=False): """Method to privately paint layers to RGB. Args: layers: a dictionary mapping a character to the respective curtain. cropped: whether or not this is being called to paint cropped or uncropped images. Returns: 3D np.array (np.uint32) representing the RGB of the observation layers. """ if not cropped: board_shape = self._last_uncropped_observations.board.shape else: board_shape = self._last_cropped_observations.board.shape board = np.zeros(list(board_shape) + [3], np.uint32) board_mask = np.zeros(list(board_shape) + [3], np.bool) for key in self._render_order: color = self._colors.get(key, (0, 0, 0)) color = np.reshape(color, [1, 1, -1]).astype(np.uint32) # Broadcast the layer to [H, W, C]. board_layer_mask = np.array(layers[key])[..., None] board_layer_mask = np.repeat(board_layer_mask, 3, axis=-1) # @ correspond to white noise or changing background perturbation = np.zeros(board_layer_mask.shape) if key == '@': if len(self._reward_switch) > 0: perturbation = self._switch_perturbations[self._switch] else: h, w = board_layer_mask.shape[:2] perturbation = np.random.randint(-15, 15, (h, w, 1)) # Update the board with the new layer. board = np.where(np.logical_not(board_mask), board_layer_mask * color + perturbation, board) # Update the mask. board_mask = np.logical_or(board_layer_mask, board_mask) return board def _update_for_game_step(self, observations, reward): """Update internal state with data from an environment interaction.""" # disentangled one hot state if self.obs_type == 'mask': self._state = [] for char in self.state_layer_chars: if char in self.objects: mask = observations.layers[char].astype(float) if char in self.objects and 1. in mask: self.visitation_frequency[char] += 1 self._state.append(mask) self._state = np.array(self._state) elif 'rgb' in self.obs_type: if self.obs_type == 'rgb': rgb_img = self._paint_board(observations.layers, cropped=True).astype(float) elif self.obs_type == 'rgb_full': rgb_img = self._paint_board(observations.layers, cropped=False).astype(float) self._state = self.resize(rgb_img) for char in self.state_layer_chars: if char in self.objects: mask = observations.layers[char].astype(float) if self._check_visit(char): self.visitation_frequency[char] += 1 # update heatmap metric if self.log_heatmaps == True: pr, pc = self.current_game.things['P'].position self.heatmap[pr, pc] += 1 self.visitation_entropy = entropy(self.heatmap.flatten(), base=self.visitable_states) self.coverage = np.count_nonzero( self.heatmap) / self.visitable_states # update reward self._last_reward = reward if reward is not None else self._default_reward self._game_over = self.current_game.game_over if self.current_game.the_plot.frame >= self._max_iterations: self._game_over = True def step(self, action): """Apply action, step the world forward, and return observations. Args: action: the desired action to apply to the environment. Returns: state, reward, done, info. """ if self.current_game is None: logger.warn("Episode has already ended, call `reset` instead..") self._state = None reward = self._last_reward done = self._game_over return self._state, reward, done, {} # Execute the action in pycolab. self.current_game.the_plot.info = {} observations, reward, _ = self.current_game.play(action) self._last_uncropped_observations = observations self._empty_uncropped_board = np.zeros_like( self._last_uncropped_observations.board) # Crop and update if len(self._croppers) > 0: observations = [ cropper.crop(observations) for cropper in self._croppers ][0] self._last_cropped_observations = observations self._empty_cropped_board = np.zeros_like( self._last_cropped_observations.board) self._update_for_game_step(observations, reward) info = self.current_game.the_plot.info # Add custom metrics info['visitation_frequency'] = self.visitation_frequency info['first_time_visit'] = self.first_visit_time info['visitation_entropy'] = self.visitation_entropy info['coverage'] = self.coverage info['episodes'] = self.episodes info['num_obj_eps'] = self.num_obj_eps for ob in self.objects: pushes = getattr(self.current_game.things[ob], 'pushes', None) if pushes is not None: info['controllable_interactions'] = pushes # Check the current status of the game. reward = self._last_reward done = self._game_over if self._game_over: self.current_game = None return self._state, reward, done, info def reset(self): """Start a new episode.""" if len(self._reward_switch) > 0: self._switch = np.random.randint(len(self._reward_switch)) self._reward_target = self._reward_switch[self._switch] self._reward_config = {char: 0.0 for char in self._reward_switch} self._reward_config[self._reward_switch[self._switch]] = 1.0 self.current_game = self.make_game(reward_config=self._reward_config) for cropper in self._croppers: cropper.set_engine(self.current_game) self._colors = self.make_colors() self.current_game.the_plot.info = {} self._game_over = None self._last_observations = None self._last_reward = None observations, reward, _ = self.current_game.its_showtime() self._last_uncropped_observations = observations self._empty_uncropped_board = np.zeros_like( self._last_uncropped_observations.board) if len(self._croppers) > 0: observations = [ cropper.crop(observations) for cropper in self._croppers ][0] self._last_cropped_observations = observations self._empty_cropped_board = np.zeros_like( self._last_cropped_observations) # save and reset metrics for char in self.objects: if self.visitation_frequency[char] > 0: self.num_obj_eps[char] += 1 self.visitation_frequency = {char: 0 for char in self.objects} if self.log_heatmaps == True and self.episodes % self.heatmap_save_freq == 0 and self.startup == False: np.save('{}/{}.npy'.format(self.heatmap_path, self.episodes), self.heatmap) heatmap_normed = self.heatmap / np.linalg.norm( self.heatmap) + 0.0000000000000000001 plt.imsave('{}/{}.png'.format(self.heatmap_path, self.episodes), heatmap_normed, cmap='afmhot', vmin=0.0, vmax=1.0) self.episodes += 1 self.startup = False self.heatmap = np.zeros(self._last_uncropped_observations.board.shape) # run update self._update_for_game_step(observations, reward) return self._state def render(self, mode='rgb_array', close=False): """Render the board to an image viewer or an np.array. Args: mode: One of the following modes: - 'human': render to an image viewer. - 'rgb_array': render to an RGB np.array (np.uint8) Returns: 3D np.array (np.uint8) or a `viewer.isopen`. """ img = self._empty_uncropped_board if self._last_uncropped_observations: img = self._last_uncropped_observations.board layers = self._last_uncropped_observations.layers if self._colors: img = self._paint_board(layers, cropped=False) else: assert img is not None, '`board` must not be `None`.' img = self.resize(img, scale=17) if mode == 'rgb_array': return img elif mode == 'human': if self.viewer is None: from gym.envs.classic_control.rendering import ( SimpleImageViewer) self.viewer = SimpleImageViewer() self.viewer.imshow(img) time.sleep(self.delay / 1e3) return self.viewer.isopen def resize(self, img, scale=None): if scale is None: img = _repeat_axes(img, self.resize_scale, axis=[0, 1]) else: img = _repeat_axes(img, scale, axis=[0, 1]) if len(img.shape) != 3: img = np.repeat(img[..., None], 3, axis=-1) return img.astype(np.uint8) def seed(self, seed=None): """Seeds the environment. Args: seed: seed of the random engine. Returns: [seed]. """ self.np_random, seed = seeding.np_random(seed) return [seed] def close(self): """Tears down the renderer.""" if self.viewer: self.viewer.close() self.viewer = None
class RetroEnv(gym.Env): """ Gym Retro environment class Provides a Gym interface to classic video games """ metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60.0 } def __init__(self, game, state=retro.State.DEFAULT, scenario=None, info=None, use_restricted_actions=retro.Actions.FILTERED, record=False, players=1, inttype=retro.data.Integrations.STABLE, obs_type=retro.Observations.IMAGE, retro_run_id=None): if not hasattr(self, 'spec'): self.spec = None self._obs_type = obs_type self.shm = None self.img = None self.ram = None self.viewer = None self.gamename = game self.statename = state self.initial_state = None self.players = players metadata = {} rom_path = retro.data.get_romfile_path(game, inttype) self.disas = retro.dispel.ingest(rom_path) metadata_path = retro.data.get_file_path(game, 'metadata.json', inttype) if state == retro.State.NONE: self.statename = None elif state == retro.State.DEFAULT: self.statename = None try: with open(metadata_path) as f: metadata = json.load(f) if 'default_player_state' in metadata and self.players <= len( metadata['default_player_state']): self.statename = metadata['default_player_state'][ self.players - 1] elif 'default_state' in metadata: self.statename = metadata['default_state'] else: self.statename = None except (IOError, json.JSONDecodeError): pass if self.statename: self.load_state(self.statename, inttype) self.data = retro.data.GameData() if info is None: info = 'data' if info.endswith('.json'): # assume it's a path info_path = info else: info_path = retro.data.get_file_path(game, info + '.json', inttype) if scenario is None: scenario = 'scenario' if scenario.endswith('.json'): # assume it's a path scenario_path = scenario else: scenario_path = retro.data.get_file_path(game, scenario + '.json', inttype) self.system = retro.get_romfile_system(rom_path) # Set up the shm if we're using the SNES emulator self._init_shm(retro_run_id) # We can't have more than one emulator per process. Before creating an # emulator, ensure that unused ones are garbage-collected gc.collect() self.em = retro.RetroEmulator(rom_path) self.em.configure_data(self.data) self.em.step() core = retro.get_system_info(self.system) self.buttons = core['buttons'] self.num_buttons = len(self.buttons) try: assert self.data.load( info_path, scenario_path), 'Failed to load info (%s) or scenario (%s)' % ( info_path, scenario_path) except Exception: del self.em raise self.button_combos = self.data.valid_actions() if use_restricted_actions == retro.Actions.DISCRETE: combos = 1 for combo in self.button_combos: combos *= len(combo) self.action_space = gym.spaces.Discrete(combos**players) elif use_restricted_actions == retro.Actions.MULTI_DISCRETE: self.action_space = gym.spaces.MultiDiscrete([ len(combos) if gym_version >= (0, 9, 6) else (0, len(combos) - 1) for combos in self.button_combos ] * players) else: self.action_space = gym.spaces.MultiBinary(self.num_buttons * players) kwargs = {} if gym_version >= (0, 9, 6): kwargs['dtype'] = np.uint8 if self._obs_type == retro.Observations.RAM: shape = self.get_ram().shape else: img = [self.get_screen(p) for p in range(players)] shape = img[0].shape self.observation_space = gym.spaces.Box(low=0, high=255, shape=shape, **kwargs) self.use_restricted_actions = use_restricted_actions self.movie = None self.movie_id = 0 self.movie_path = None if record is True: self.auto_record() elif record is not False: self.auto_record(record) self.seed() if gym_version < (0, 9, 6): self._seed = self.seed self._step = self.step self._reset = self.reset self._render = self.render self._close = self.close def _init_shm(self, retro_run_id): if self.system != 'Snes': self.shm = None return ##### Set up the shared memory segment ################################################## # currently only supports Snes # Set the identifier that the SNES C code may use to create a shared memory segment if retro_run_id is None: self.retro_run_id = random.randint(1, 1 << 30) else: self.retro_run_id = retro_run_id os.environ['RETRO_RUN_ID'] = f"{self.retro_run_id}" self.shm_key = self.retro_run_id shm_size = VISITED_BUFFER_SIZE * WORD_SIZE # enough to hold 2^15 16-bit words try: self.shm = ipc.SharedMemory(self.shm_key, flags=ipc.IPC_CREX, mode=0o666, size=shm_size) except Exception as e: # FIXME tighten this except up shm = ipc.SharedMemory(self.shm_key, 0, 0) ipc.remove_shared_memory(shm.id) self.shm = ipc.SharedMemory(self.shm_key, flags=ipc.IPC_CREX, mode=0o666, size=shm_size) return def _update_obs(self): if self._obs_type == retro.Observations.RAM: self.ram = self.get_ram() return self.ram elif self._obs_type == retro.Observations.IMAGE: self.img = self.get_screen() return self.img else: raise ValueError('Unrecognized observation type: {}'.format( self._obs_type)) def _read_snes_shm(self): self.shm.attach() count = struct.unpack(f"<Q", self.shm.read(WORD_SIZE))[0] buf = self.shm.read((count + 1) * WORD_SIZE) self.shm.detach() g = MSG_FMT.iter_unpack(buf) _ = next(g) return [(addr | bank << 16, offset, bytes(bytecode)) for (addr, bank, offset, *bytecode) in g] def action_to_array(self, a): actions = [] for p in range(self.players): action = 0 if self.use_restricted_actions == retro.Actions.DISCRETE: for combo in self.button_combos: current = a % len(combo) a //= len(combo) action |= combo[current] elif self.use_restricted_actions == retro.Actions.MULTI_DISCRETE: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): buttons = self.button_combos[i] action |= buttons[ap[i]] else: ap = a[self.num_buttons * p:self.num_buttons * (p + 1)] for i in range(len(ap)): action |= int(ap[i]) << i if self.use_restricted_actions == retro.Actions.FILTERED: action = self.data.filter_action(action) ap = np.zeros([self.num_buttons], np.uint8) for i in range(self.num_buttons): ap[i] = (action >> i) & 1 actions.append(ap) return actions def disassemble(self, address, offset=None, bytecode=None): bank = address >> 16 #(0x0F & (address >> 16)) | 0x80 addr = address & 0xFFFF if (bank, addr) in self.disas: trace = self.disas[(bank, addr)] return trace else: trace = retro.dispel.disas_code(code=bytecode[:offset], addr=address)[0] self.disas[(bank, addr)] = trace return trace def step(self, a): if self.img is None and self.ram is None: raise RuntimeError('Please call env.reset() before env.step()') for p, ap in enumerate(self.action_to_array(a)): if self.movie: for i in range(self.num_buttons): self.movie.set_key(i, ap[i], p) self.em.set_button_mask(ap, p) if self.movie: self.movie.step() self.em.step() self.data.update_ram() ob = self._update_obs() rew, done, info = self.compute_step() info = dict(info) if self.system == 'Snes' and 'NOTRACE' not in os.environ: #info['trace'] = [self.disassemble(pc, flag, inst) for (pc, flag, inst) in self._read_snes_shm()] info['trace'] = self._read_snes_shm() return ob, rew, bool(done), info def reset(self): if self.initial_state: self.em.set_state(self.initial_state) for p in range(self.players): self.em.set_button_mask(np.zeros([self.num_buttons], np.uint8), p) self.em.step() if self.movie_path is not None: rel_statename = os.path.splitext(os.path.basename( self.statename))[0] self.record_movie( os.path.join( self.movie_path, '%s-%s-%06d.bk2' % (self.gamename, rel_statename, self.movie_id))) self.movie_id += 1 if self.movie: self.movie.step() self.data.reset() self.data.update_ram() return self._update_obs() def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 return [seed1, seed2] def render(self, mode='human', close=False): if close: if self.viewer: self.viewer.close() return img = self.get_screen() if self.img is None else self.img if mode == "rgb_array": return img elif mode == "human": if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(img) return self.viewer.isopen def close(self): if hasattr(self, 'em'): del self.em def get_action_meaning(self, act): actions = [] for p, action in enumerate(self.action_to_array(act)): actions.append([ self.buttons[i] for i in np.extract(action, np.arange(len(action))) ]) if self.players == 1: return actions[0] return actions def get_ram(self): blocks = [] for offset in sorted(self.data.memory.blocks): arr = np.frombuffer(self.data.memory.blocks[offset], dtype=np.uint8) blocks.append(arr) return np.concatenate(blocks) def get_screen(self, player=0): img = self.em.get_screen() x, y, w, h = self.data.crop_info(player) if not w or x + w > img.shape[1]: w = img.shape[1] else: w += x if not h or y + h > img.shape[0]: h = img.shape[0] else: h += y if x == 0 and y == 0 and w == img.shape[1] and h == img.shape[0]: return img return img[y:h, x:w] def load_state(self, statename, inttype=retro.data.Integrations.DEFAULT): if not statename.endswith('.state'): statename += '.state' with gzip.open( retro.data.get_file_path(self.gamename, statename, inttype), 'rb') as fh: self.initial_state = fh.read() self.statename = statename def compute_step(self): if self.players > 1: reward = [self.data.current_reward(p) for p in range(self.players)] else: reward = self.data.current_reward() done = self.data.is_done() return reward, done, self.data.lookup_all() def record_movie(self, path): self.movie = retro.Movie(path, True, self.players) self.movie.configure(self.gamename, self.em) if self.initial_state: self.movie.set_state(self.initial_state) def stop_record(self): self.movie_path = None self.movie_id = 0 if self.movie: self.movie.close() self.movie = None def auto_record(self, path=None): if not path: path = os.getcwd() self.movie_path = path def __del__(self): if self.shm is not None: ipc.remove_shared_memory(self.shm.id)
class NESEnv(gym.Env, gym.utils.EzPickle): """An environment for playing NES games in OpenAI Gym using FCEUX.""" # meta-data about the environment metadata = {'render.modes': ['human', 'rgb_array']} # a pipe from the emulator (FCEUX) to client (self). use the PID of this # python process to ensure the pipe is unique _pipe_in_name = '/tmp/smb-pipe-in-{}'.format(os.getpid()) # a pipe from the client (self) to emulator (FCEUX). use the PID of this # python process to ensure the pipe is unique _pipe_out_name = '/tmp/smb-pipe-out-{}'.format(os.getpid()) def __init__( self, max_episode_steps: int, frame_skip: int = 4, fceux_args: tuple = ('--nogui', '--sound 0'), random_seed: int = 0, ) -> None: """ Initialize a new NES environment. Args: max_episode_steps: the math number of steps per episode. - pass math.inf to use no max_episode_steps limit frame_skip: the number of frames to skip between between inputs fceux_args: arguments to pass to the FCEUX command random_seed: the random seed to start the environment with Returns: None """ # validate that fceux can be found in the path if spawn.find_executable('fceux', os.environ['PATH']) is None: msg = 'fceux not found in $PATH. is fceux installed?' raise DependencyNotFoundError(msg) gym.utils.EzPickle.__init__(self) self.max_episode_steps = max_episode_steps self.frame_skip = frame_skip self.fceux_args = fceux_args self.curr_seed = random_seed # setup the frame rate based on the frame skip rate self.metadata['video.frames_per_second'] = 60 / self.frame_skip self.viewer = None self.step_number = 0 # these store the pipe for communicating with the environment self.pipe_in = None self.pipe_out = None # variables for the ROM and FCEUX interface files self.rom_file_path = None self.lua_interface_path = None self.emulator_started = False # Setup the observation space self.observation_space = gym.spaces.Box(low=0, high=255, shape=(SCREEN_HEIGHT, SCREEN_WIDTH, 3), dtype=np.uint8) self.screen = self.observation_space.sample() # Setup the action space self.actions = [ 'U', # Up 'D', # Down 'L', # Left 'R', # Right 'UR', # Up + Right 'DR', # Down + Right 'URA', # Up + Right + A 'DRB', # Down + Right + B 'A', # A 'B', # B 'RB', # Right + B 'RA' # Right + A ] self.action_space = gym.spaces.Discrete(len(self.actions)) # MARK: FCEUX def _start_emulator(self) -> None: """Spawn an instance of FCEUX and pass parameters to it.""" # validate that the rom file and lua interface are defiend if not self.rom_file_path: raise Exception('No rom file specified!') if not self.lua_interface_path: raise Exception("Must specify a lua interface file to get scores!") # setup the environment variables to pass to the emulator instance os.environ['frame_skip'] = str(self.frame_skip) os.environ['pipe_in_name'] = str(self._pipe_in_name) os.environ['pipe_out_name'] = str(self._pipe_out_name) # TODO: define and setup different reward schemes to initialize with # and activate them here using the environment key 'reward_scheme' # open up the pipes to the emulator. self._open_pipes() # build the FCEUX command command = ' '.join([ 'fceux', *self.fceux_args, '--loadlua', self.lua_interface_path, self.rom_file_path, '&' ]) # open the FCEUX process proc = subprocess.Popen(command, shell=True) proc.communicate() # open the pipe files self.pipe_in = open(self._pipe_in_name, 'rb') self.pipe_out = open(self._pipe_out_name, 'w', 1) # make sure the emulator sends the ready message opcode, _ = self._read_from_pipe() assert 'ready' == opcode self.emulator_started = True def _joypad(self, button: str) -> None: """ Pass a joy-pad command to the emulator Args: button: the button (or combination) to press on the controller Returns: None """ self._write_to_pipe('joypad' + SEP + button) def _get_state(self) -> tuple: """ Parse a state message from the emulator and return it. Returns: a tuple of: - the screen from the emulator - the reward from the previous action - the terminal flag denoting if an episode has ended """ # read the initial state from the pipe opcode, data = self._read_from_pipe() assert opcode == 'state' # The first two underscores are `reward` and `done`. the last one is # the dummy '\n' at the end of each line reward, done, screen, _ = data reward = int(reward.decode('ascii')) done = bool(int(done.decode('ascii'))) # change the done flag to true if this step passes the episode length done = True if self.step_number > self.max_episode_steps else done # unwrap the P value representing a frame from the data pvs = np.array(struct.unpack('B' * len(screen), screen)) # use the palette to convert the p values to RGB rgb = np.array(PALETTE[pvs - 20], dtype=np.uint8) # reshape the screen and assign it to self screen = rgb.reshape((SCREEN_HEIGHT, SCREEN_WIDTH, 3)) return screen, reward, done # MARK: Pipes def _open_pipes(self) -> None: """Open the communication path between self and the emulator""" # Open the inbound pipe if it doesn't exist yet if not os.path.exists(self._pipe_in_name): os.mkfifo(self._pipe_in_name) # Open the outbound pipe if it doesn't exist yet if not os.path.exists(self._pipe_out_name): os.mkfifo(self._pipe_out_name) def _write_to_pipe(self, message: str) -> None: """Write a message to the outbound pip (emulator).""" # write the message to the pipe and flush it self.pipe_out.write(message + '\n') self.pipe_out.flush() def _read_from_pipe(self) -> tuple: """ Read a message from the pipe. Returns: a tuple of - the opcode - the data with the message (as another tuple) """ # Read a message from the pipe and separate along the delimiter 0xff message = self.pipe_in.readline().split(b'\xFF') # decode the opcde opcode = message[0].decode('ascii') # return the opcode and data tuple return opcode, message[1:] # MARK: OpenAI Gym API def step(self, action: int) -> tuple: """ Take a step using the given action. Args: action: the discrete action to perform. will use the action in `self.actions` indexed by this value Returns: a tuple of: - the start as a result of the action - the reward achieved by taking the action - a flag denoting whether the episode has ended - a dictionary of additional information """ # unwrap the string action value from the list of actions self._joypad(self.actions[action]) # increment the frame counter self.step_number += 1 # get the screen, reward, and done flag from the emulator self.screen, reward, done = self._get_state() return self.screen, reward, done, {} def reset(self) -> np.ndarray: """Reset the emulator and return the initial state.""" if not self.emulator_started: self._start_emulator() # write the reset command to the emulator self._write_to_pipe('reset' + SEP) self.step_number = 0 # get a state from the emulator. ignore the `reward` and `done` flag self.screen, _, _ = self._get_state() return self.screen def render(self, mode: str = 'human'): """ Render the current screen using the given mode. Args: mode: the mode to render the screen using - 'human': render in a window using GTK - 'rgb_array': render in the back-end and return a matrix Returns: None if mode is 'human' or a matrix if mode is 'rgb_array' """ if mode == 'human': if self.viewer is None: from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(self.screen) elif mode == 'rgb_array': return self.screen def close(self) -> None: """Close the emulator and shutdown FCEUX.""" self._write_to_pipe('close') self.pipe_in.close() self.pipe_out.close() self.emulator_started = False def seed(self, seed: int = None) -> list: """ Set the seed for this env's random number generator(s). Returns: A list of seeds used in this env's random number generators. there is only one "main" seed in this env """ self.curr_seed = gym.utils.seeding.hash_seed(seed) % 256 return [self.curr_seed]
class MagnetsEnv(Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, G_const=1.0, acceleration=30.0, time_step=0.01, time_limit=10, friction=10.0, seed=None, boundary_less=-1, boundary_greater=1, num_agents=3): ''' constants ''' self.G_const = G_const self.acceleration = acceleration self.time_step = time_step self.time_limit = time_limit self.friction = friction if (seed is None): self.seed = int(time.time()) else: self.seed = seed self.boundary_less = boundary_less self.boundary_greater = boundary_greater self.num_agents = num_agents self.action_space = MultiDiscrete([[0, 8] for _ in range(num_agents)]) # It's unclear what low and high here should be. Set them to 0 so # that if anyone tries to use them, it is more likely that obviously # wrong things happen. self.observation_space = Box(low=0, high=0, shape=(4*(num_agents+1),)) ''' variables that change with time ''' self.state = State(num_agents, seed) self.spec = None self.viewer = None def _reset(self): self.seed += 1 self.state = State(self.num_agents, self.seed) return self.state.to_array() def _step(self, action): ''' evolve the state ''' if not isinstance(action, Iterable): # if we didn't get a list of actions action = self._action_scal2vec(action) pos_inc = self.state.target_state.vel * self.time_step self.state.target_state.pos += pos_inc total_acc = np.zeros(2) for i in range(self.num_agents): diff_i = self.state.target_state.pos - self.state.agent_states[i].pos dist_square = (diff_i[0] * diff_i[0]) + (diff_i[1] * diff_i[1]) total_acc += (self.G_const / dist_square) *\ (diff_i / math.sqrt(dist_square)) self.state.agent_states[i].pos += (self.state.agent_states[i].vel * self.time_step) agent_dist = self.state.agent_states[i].pos[0] ** 2 +\ self.state.agent_states[i].pos[1] ** 2 if (agent_dist > 2): self.state.agent_states[i].pos /= agent_dist self.state.agent_states[i].pos *= 2 self.state.target_state.vel += (total_acc * self.time_step) ''' update velocities of agents based on acceleration ''' for i in range(self.num_agents): ''' acceleration has constant magnitude and one of 8 directions ''' acc_dir = np.zeros(2) if (action[i] != 8): acc_dir = np.asarray([math.cos((action[i] * math.pi) / 4), math.sin((action[i] * math.pi) / 4)]) vel_inc = self.acceleration * acc_dir * self.time_step vel_dec = self.friction * self.state.agent_states[i].vel *\ self.time_step self.state.agent_states[i].vel += (vel_inc - vel_dec) ''' checking if the game has ended so can return ''' if (not self.state.in_box()): return self.state.to_array(), 0, True, {"Msg": "Game over"} return self.state.to_array(), 1, False, {"Msg": "Game not over"} def print_state(self): self.state.print_state() def _render_object(self, draw, obj, color): obj_x = int(((obj.pos[0] - ENV_LOWER) / ENV_SIDE)*RENDER_WIDTH) obj_y = int(((obj.pos[1] - ENV_LOWER) / ENV_SIDE)*RENDER_HEIGHT) draw.arc( [obj_x - RENDER_AGENT_SIZE/2, obj_y - RENDER_AGENT_SIZE/2, obj_x + RENDER_AGENT_SIZE/2, obj_y + RENDER_AGENT_SIZE/2], 0, 360, fill=color ) def _render_objective(self, draw, color): BOUND_X = (BOUND_SIDE/ENV_SIDE)*RENDER_WIDTH/2 BOUND_Y = (BOUND_SIDE/ENV_SIDE)*RENDER_HEIGHT/2 draw.rectangle( [RENDER_WIDTH/2 + BOUND_X, RENDER_HEIGHT/2 + BOUND_Y, RENDER_WIDTH/2 - BOUND_X, RENDER_HEIGHT/2 - BOUND_Y], outline=color ) def _render_bounds(self, draw, color): draw.arc([0, 0, RENDER_WIDTH, RENDER_HEIGHT], 0, 360, fill=color) def _render(self, mode='human', close=False): img = Image.new('RGB', (RENDER_HEIGHT, RENDER_WIDTH), WHITE) draw = ImageDraw.Draw(img) for i in range(self.num_agents): agent = self.state.agent_states[i] self._render_object(draw, agent, RED) self._render_object(draw, self.state.target_state, BLUE) self._render_objective(draw, GREEN) self._render_bounds(draw, BLACK) del draw if mode == 'human': if (self.viewer is None): # don't import SimpleImageViewer by default because even importing # it requires a display from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = SimpleImageViewer() self.viewer.imshow(np.asarray(img)) elif mode == 'rgb_array': return np.asarray(img)
class GridWorld(gym.Env): metadata = {'render.modes': ['human']} def __init__(self, file_name="map1.txt", fail_rate=0.0, terminal_reward=1.0, move_reward=0.0, bump_reward=-0.5, bomb_reward=-1.0): self.viewer = SimpleImageViewer() self.n = None self.m = None self.bombs = [] self.walls = [] self.goals = [] self.start = None this_file_path = os.path.dirname(os.path.realpath(__file__)) file_name = os.path.join(this_file_path, file_name) with open(file_name, "r") as f: for i, row in enumerate(f): row = row.rstrip('\r\n') if self.n is not None and len(row) != self.n: raise ValueError( "Map's rows are not of the same dimension...") self.n = len(row) for j, col in enumerate(row): if col == "x" and self.start is None: self.start = self.n * i + j elif col == "x" and self.start is not None: raise ValueError( "There is more than one starting position in the map..." ) elif col == "G": self.goals.append(self.n * i + j) elif col == "B": self.bombs.append(self.n * i + j) elif col == "1": self.walls.append(self.n * i + j) self.m = i + 1 if len(self.goals) == 0: raise ValueError("At least one goal needs to be specified...") self.n_states = self.n * self.m self.n_actions = 4 self.fail_rate = fail_rate self.state = self.start self.terminal_reward = terminal_reward self.move_reward = move_reward self.bump_reward = bump_reward self.bomb_reward = bomb_reward self.action_space = spaces.Discrete(4) self.observation_space = spaces.Discrete(self.n_states) self.done = False def step(self, action): assert self.action_space.contains(action) if self.state in self.goals or np.random.rand() < self.fail_rate: return self.state, 0.0, self.done, None else: new_state = self.take_action(action) reward = self.get_reward(new_state) self.state = new_state return self.state, reward, self.done, None def reset(self): self.done = False self.state = self.start return self.state def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if mode == 'human': grid = np.multiply(np.ones((self.n_states, 3), dtype=np.int8), np.array([0, 255, 0], dtype=np.int8)) for g in self.goals: grid[g] = np.array([255, 0, 0]) for b in self.bombs: grid[b] = np.array([255, 255, 0]) for w in self.walls: grid[w] = np.array([0, 0, 0]) grid[self.state] = np.array([0, 0, 255]) grid = grid.reshape(self.m, self.n, 3) self.viewer.imshow(grid) return self.viewer.isopen elif mode == "rgb_array": return grid else: return def take_action(self, action): row = self.state / self.n col = self.state % self.n if action == DOWN and (row + 1) * self.n + col not in self.walls: row = min(row + 1, self.m - 1) elif action == UP and (row - 1) * self.n + col not in self.walls: row = max(0, row - 1) elif action == RIGHT and row * self.n + col + 1 not in self.walls: col = min(col + 1, self.n - 1) elif action == LEFT and row * self.n + col - 1 not in self.walls: col = max(0, col - 1) new_state = row * self.n + col return new_state def get_reward(self, new_state): if new_state in self.goals: self.done = True return self.terminal_reward elif new_state in self.bombs: return self.bomb_reward elif new_state == self.state: return self.bump_reward return self.move_reward
class Runner: def __init__(self, env, model, batch_size, timesteps, discount_rate, summary_frequency, performance_num_episodes, summary_log_dir): self.env = env self.model = model self.timesteps = timesteps self.discount_rate = discount_rate self.observation = env.reset() self.batch_size = batch_size self.stats_recorder = StatsRecorder( summary_frequency=summary_frequency, performance_num_episodes=performance_num_episodes, summary_log_dir=summary_log_dir, save=True) self.viewer = SimpleImageViewer() def render(self): columns = [] for i in range(80): rows = [] for j in range(80): if self.observation[i][j] == 1: rows.append([255, 255, 255]) else: rows.append([0, 0, 0]) columns.append(rows) self.viewer.imshow(np.asarray(columns, dtype=np.uint8)) def run(self): observations = [] rewards = [] actions = [] terminals = [] for t in range(self.timesteps + 1): action_index = self.model.predict_action([self.observation]) observations.append(self.observation) action = action_with_index(action_index) self.observation, reward, terminal = self.env.step(action) self.stats_recorder.after_step(reward=reward, terminal=terminal) rewards.append(reward) actions.append(action_index) terminals.append(terminal) if len(rewards) == self.batch_size: discounted_rewards = discount(rewards, terminals, self.discount_rate) self.model.train(observations, discounted_rewards, actions) observations = [] rewards = [] actions = [] terminals = [] if terminal: self.observation = self.env.reset() if t % self.stats_recorder.summary_frequency == 0: self.model.save(0)
class TetrisEnv(gym.Env, gym.utils.EzPickle): """An environment for playing Tetris in OpenAI Gym.""" # meta-data about the environment for OpenAI Gym utilities (like Monitor) metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30, } def __init__(self, max_steps: int, random_state: int = None) -> None: """ Initialize a new Tetris environment. Args: max_steps: the max number of steps per episode. random_state: the random seed to start the environment with Returns: None """ gym.utils.EzPickle.__init__(self) self.max_steps = max_steps self.viewer = None self.step_number = 0 # Setup the observation space as RGB game frames self.observation_space = gym.spaces.Box(low=0, high=255, shape=(SCREEN_HEIGHT, SCREEN_WIDTH, 3), dtype=np.uint8) # Setup the action space, the game defines 12 legal actions self.action_space = gym.spaces.Discrete(12) # setup the game self.game = Tetris() self.seed(random_state) @property def screen(self) -> np.ndarray: """Return the screen of the game""" return self.game.screen def reset(self) -> np.ndarray: """Reset the emulator and return the initial state.""" self.game.reset() # reset the step count self.step_number = 0 # return the initial screen from the game return self.game.screen def step(self, action: int) -> tuple: """ Take a step using the given action. Args: action: the discrete action to perform. will use the action in `self.actions` indexed by this value Returns: a tuple of: - the start as a result of the action - the reward achieved by taking the action - a flag denoting whether the episode has ended - a dictionary of extra information """ state, reward, done, info = self.game.step(action) self.step_number += 1 # if this step has passed the max number, set the episode to done if self.step_number >= self.max_steps: done = True return state, reward, done, info def render(self, mode: str = 'human'): """ Render the current screen using the given mode. Args: mode: the mode to render the screen using - 'human': render in a window using GTK - 'rgb_array': render in the back-end and return a matrix Returns: None if mode is 'human' or a matrix if mode is 'rgb_array' """ # if the mode is RGB, return the screen as a NumPy array if mode == 'rgb_array': return self.game.screen # if the mode is human, create a viewer and display the screen elif mode == 'human': from pyglet.window import Window from gym.envs.classic_control.rendering import SimpleImageViewer if self.viewer is None: self.viewer = SimpleImageViewer() self.viewer.window = Window( width=SCREEN_WIDTH, height=SCREEN_HEIGHT, caption=self.spec.id, ) self.viewer.imshow(self.game.screen) return self.viewer.isopen # otherwise the render mode is not supported, raise an error else: raise ValueError('unsupported render mode: {}'.format(repr(mode))) def close(self) -> None: """Close the emulator.""" # delete the existing game if there is one if isinstance(self.game, Tetris): del self.game if self.viewer is not None: self.viewer.close() del self.viewer def seed(self, random_state: int = None) -> list: """ Set the seed for this env's random number generator(s). Args: random_state: the seed to set the random generator to Returns: A list of seeds used in this env's random number generators """ random.seed(random_state) self.curr_seed = random_state return [self.curr_seed] def get_keys_to_action(self) -> dict: """Return the dictionary of keyboard keys to actions.""" # Map of in game directives to their associated keyboard value down = ord('s') left = ord('a') right = ord('d') rot_l = ord('q') rot_r = ord('e') # A mapping of pressed key combinations to discrete actions keys_to_action = { (): 0, (left, ): 1, (right, ): 2, (down, ): 3, (rot_l, ): 4, (rot_r, ): 5, tuple(sorted(( left, down, ))): 6, tuple(sorted(( right, down, ))): 7, tuple(sorted(( left, rot_l, ))): 8, tuple(sorted(( right, rot_l, ))): 9, tuple(sorted(( left, rot_r, ))): 10, tuple(sorted(( right, rot_r, ))): 11, } return keys_to_action