def __init__(self, env, rank=0): Wrapper.__init__(self, env=env) self.env = env self.rank = rank self.rewards = [] self.current_metadata = {} # extra info that gets injected into each log entry self.summaries_dict = {'reward': 0, 'episode_length': 0}
def test(env: gym.Wrapper, model: tf.keras.Model, log_dir: Path) -> None: """Test the DQN on Pong. Args: env: The Atari Pong environment model: The model to be trained log_dir: Path where to save the video """ env = Monitor( env, log_dir, force=True, # overwrite existing videos video_callable=lambda count: True, # force save this episode ) state = Deque[tf.Tensor](maxlen=STATE_FRAMES) state.append(preprocess(env.reset())) # initial state print("Starting testing...") while True: if len(state) < STATE_FRAMES: initial = None action = env.action_space.sample() else: initial = tf.stack(state, axis=-1) action = choose(model, initial, 0) # choose greedily state_new, _, done, _ = env.step(action) state_new = preprocess(state_new) state.append(state_new) if done: break print("Testing done")
def __init__(self, env, num_skills, meta_agent): Wrapper.__init__(self, env) self.num_skills = num_skills self.meta_agent = meta_agent # Each skill equally likely to be chosen self.prior_probability_of_skill = 1.0 / self.num_skills self._max_episode_steps = self.env._max_episode_steps
def __init__(self, env, HIRO_agent, max_sub_policy_timesteps): Wrapper.__init__(self, env) self.env = env self.meta_agent = HIRO_agent self.max_sub_policy_timesteps = max_sub_policy_timesteps self.track_intrinsic_rewards = []
def run_agent(player: MarioPlayer, env: Wrapper, record: bool, vids_path: str, index): if record: rec_output_path = os.path.join(vids_path, "{name}.mp4".format(name=index)) rec = monitor.video_recorder.VideoRecorder(env, path=rec_output_path) state = env.reset() done = False for step in range(steps_limit): if done: break action = player.act(state) state, reward, done, info = env.step(action) env.render() if record: rec.capture_frame() player.update_info(info) player.update_reward(reward) if info['flag_get']: # if got to the flag - run is ended. done = True if record: rec.close() player.calculate_fitness() outcome = player.get_run_info() outcome['index'] = index return outcome
def __init__(self, env, memory, lock, args): GymWrapper.__init__(self, env) self.memory = memory self.lock = lock # Lock for memory access self.skipframes = args['skip'] self.environment_name = args['environment_name'] self.logdir = args['logdir'] self.current_i = 0
def __init__(self, env, stack_size=4): Wrapper.__init__(self, env=env) self.stack_size = stack_size self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, self.stack_size), dtype=np.uint8) self.state = None
def __init__(self, env, num_states, num_skills, regularisation_weight, visitations_decay): Wrapper.__init__(self, env) self.num_skills = num_skills self.num_states = num_states self.state_visitations = [ [0 for _ in range(num_states)] for _ in range(num_skills)] self.regularisation_weight = regularisation_weight self.visitations_decay = visitations_decay
def __init__(self, env, logdir, info_keywords=()): """ A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data. :param env: (Gym environment) The environment :param filename: (str) the location to save tensorboard logs :param info_keywords: (tuple) extra information to log, from the information return of environment.step """ Wrapper.__init__(self, env=env) self.writer = FileWriter(logdir) self.info_keywords = info_keywords self.episode_info = dict() self.total_steps = 0
def __init__( self, env: gym.Env, callback: Callback, ): """Initialize. :param env: Gym environment to wrap :param callback: a callback object """ Wrapper.__init__(self, env) self._callback = callback
def __init__(self, env, n_skills, total_timesteps=None, batch_size=64, hidden_dim=128, lr=1e-3): """ Args: env (gym env) n_skills (int) : number of skills total_timesteps (int) : same parameter as algorithm. If not None, DIAYN is in "training" mode : a progress bar will appear during training. If None, there would be no progress bar. hidden_dim (int) : dimension of latent space lr (float) : learning rate """ Wrapper.__init__(self, env) self.n_skills = n_skills self.hidden_dim = hidden_dim self.state_size = env.observation_space.shape[0] self.lr = lr self.batch_size = batch_size self.probability_of_skill = 1 / self.n_skills self.discriminator = nn.Sequential( nn.Linear(self.state_size, self.hidden_dim), nn.ReLU(), nn.Linear(self.hidden_dim, self.hidden_dim), nn.ReLU(), nn.Linear(self.hidden_dim, self.n_skills)) self.discriminator_optimizer = optim.Adam( self.discriminator.parameters(), lr=self.lr) self.discriminator_optimizer.zero_grad() # Set up the environment self.env.observation_space.shape = (self.state_size + self.n_skills, ) # Init skill and "loggers" self.skill = np.random.randint(self.n_skills) self.training_mode = total_timesteps is not None self.pbar = tqdm(total=total_timesteps, disable=not self.training_mode) self.pbar.set_postfix_str("Ready to train !") self.current_experiment_number = [0] self.discriminator_losses = [] self.current_step = 0 if self.training_mode: self.buffer = Buffer(total_timesteps, self.state_size + 1) # state + current skill
def fix_visual_game(env: gym.Wrapper) -> gym.Wrapper: w, h, channels = env.observation_space.shape setattr(env, 'env_name', env.unwrapped.spec.id) setattr(env, 'state_num', (channels, w, h)) setattr(env, 'state_dim', 3) setattr(env, 'action_dim', env.action_space.n) setattr(env, 'if_discrete', isinstance(env.action_space, gym.spaces.Discrete)) target_reward = getattr(env, 'target_reward', None) target_reward_default = getattr(env.spec, 'reward_threshold', None) if target_reward is None: target_reward = target_reward_default if target_reward is None: target_reward = 2**16 setattr(env, 'target_reward', target_reward) def convert_image_shape(img: np.ndarray) -> np.ndarray: (w, h, channels) = img.shape return img.reshape((channels, w, h)) def fix_step(env_step): def step(action): observation, reward, terminal, info = env_step(action) print(type(observation)) return convert_image_shape(observation), reward, terminal, info return step env.step = fix_step(env.step) return env
def __init__(self, length, markovian=True, can_stay=False): layout = np.zeros(shape=(3, length + 1), dtype=np.int) layout[:, 0] = 1 layout[1, :] = 1 layout[:, -1] = 1 entries = [(0, 0), (2, 0)] exits = [(0, length)] traps = [(2, length)] MazeClass = MarkovianMaze if markovian else NonMarkovianMaze maze = MazeClass(layout, entries, exits, traps, can_stay, -1.0, -1.0, length + 1, -1) Wrapper.__init__(self, maze)
def __init__(self, env, start_obss, end_obss): """ Creates an environment tailored to train a single (missing) skill. Trajectories are initialized in start_obss state and terminated (and reward is generated) upon reaching end_obs state. :param env: AsaEnv environment to wrap. Environment is cloned to sustain integrity of original env. :param start_obss: Tensor of experienced starting observations (where skill should initiate) :param end_obss: Tensor of experienced ending observations (where skill should terminate) """ Serializable.quick_init(self, locals()) Wrapper.__init__(self, AsaEnv.clone_wrapped( env)) # this clones base env along with all wrappers if start_obss.shape != end_obss.shape: raise ValueError( 'start_obss ({}) and end_obss ({}) must be of same shape'. format(start_obss.shape, end_obss.shape)) self._end_obss = end_obss.reshape((end_obss.shape[0], -1)) self._start_obss = start_obss.reshape((start_obss.shape[0], -1)) self.current_obs_idx = None
def __init__(self, env, k, axis=None): """Stack k last observations. If axis == None, create a new 0 dimension and concatenate along it Otherwise, concatenate observations along the given axis. """ Wrapper.__init__(self, env) self.k = k self.axis = axis self.obs = deque([], maxlen=k) shp = list(env.observation_space.shape) dim = len(shp) if axis: assert axis < dim, "Axis {} is out of bounds for observations of dimension {}".format(axis, dim) self.stack = lambda x: np.concatenate(list(x), axis=axis) shp[axis] *= k self.observation_space = spaces.Box(low=0, high=255, shape=tuple(shp)) else: self.stack = lambda x: np.stack(list(x), axis=0) self.observation_space = spaces.Box(low=0, high=255, shape=(k,) + tuple(shp))
def _test(id: int, env: gym.Wrapper, model: TD3Network, render: bool = False, recording_path=None, save_video=False): episode_rewards = [] action_repeats = [] state = env.reset() done = False episode_images = [] while not done: # get action state = torch.FloatTensor(state).unsqueeze(0) action = model.actor(state) repeat_q = model.critic_1(state, action) repeat_idx = repeat_q.argmax(1).item() action = action.data.cpu().numpy()[0] repeat = model.action_repeats[repeat_idx] action_repeats.append(repeat) for _ in range(repeat): if render: if save_video: img = env.render(mode='rgb_array') episode_images.append(img) else: env.render(mode='human') # step state, reward, done, info = env.step(action) episode_rewards.append(reward) if done: break if render and save_video: write_gif(episode_images, action_repeats, episode_rewards, os.path.join(recording_path, 'ep_{}.gif'.format(id))) return sum(episode_rewards), action_repeats
def __init__(self, env, num_stack, use_lazy_frame=False, lz4_compress=False): Wrapper.__init__(self, env) BaseWrapper.__init__(self) self.num_stack = num_stack self.lz4_compress = lz4_compress self.use_lazy_frame = use_lazy_frame self.frames = deque(maxlen=num_stack) low = np.repeat(self.observation_space.low[np.newaxis, ...], num_stack, axis=0) high = np.repeat(self.observation_space.high[np.newaxis, ...], num_stack, axis=0) self.observation_space = Box(low=low, high=high, dtype=self.observation_space.dtype)
def __init__(self, env): Wrapper.__init__(self, env) self.game_over = False
def __init__(self, env): Wrapper.__init__(self, env) MultiAgentEnv.__init__(self, getattr_unwrapped(env, 'num_agents'))
def __init__(self, env): Wrapper.__init__(self, env) self.action_space = gym.spaces.Tuple((self.action_space, )) self.observation_space = gym.spaces.Tuple((self.observation_space, )) MultiAgentEnv.__init__(self, num_agents=1)
def __init__(self, env, HIRO_agent, max_sub_policy_timesteps): Wrapper.__init__(self, env) self.env = env self.meta_agent = HIRO_agent self.max_sub_policy_timesteps = max_sub_policy_timesteps
def __init__(self, env, HIRO_agent): Wrapper.__init__(self, env) self.env = env self.HIRO_agent = HIRO_agent self.action_space = self.observation_space
def __init__(self, env, lower_level_agent, timesteps_to_give_up_control_for, num_skills): Wrapper.__init__(self, env) self.action_space = spaces.Discrete(num_skills) self.lower_level_agent = lower_level_agent self.timesteps_to_give_up_control_for = timesteps_to_give_up_control_for
def __init__(self, env, warm_up_examples = 0): Wrapper.__init__(self, env) self.warm_up_examples = warm_up_examples self.warm_up_action = 0 self.observation_space = Box(low=0, high=255, shape=(210, 160, 3), dtype=np.uint8)
def __init__(self, env): Wrapper.__init__(self, env) self.frame_stack = deque(maxlen=4)
def __init__(self, env: Union[EnvDataset, PolicyEnv] = None, dataset: Union[EnvDataset, PolicyEnv] = None, batch_size: int = None, num_workers: int = None, **kwargs): assert not ( env is None and dataset is None ), "One of the `dataset` or `env` arguments must be passed." assert not ( env is not None and dataset is not None ), "Only one of the `dataset` and `env` arguments can be used." if not isinstance(env, IterableDataset): raise RuntimeError( f"The env {env} isn't an interable dataset! (You can use the " f"EnvDataset or PolicyEnv wrappers to make an IterableDataset " f"from a gym environment.") if isinstance(env.unwrapped, VectorEnv): if batch_size is not None and batch_size != env.num_envs: logger.warning( UserWarning( f"The provided batch size {batch_size} will be ignored, since " f"the provided env is vectorized with a batch_size of " f"{env.unwrapped.num_envs}.")) batch_size = env.num_envs if isinstance(env.unwrapped, BatchedVectorEnv): num_workers = env.n_workers elif isinstance(env.unwrapped, AsyncVectorEnv): num_workers = env.num_envs else: num_workers = 0 self.env = env # TODO: We could also perhaps let those parameters through to the # constructor of DataLoader, because in __iter__ we're not using the # DataLoader iterator anyway! This would have the benefit that the # batch_size and num_workers attributes would reflect the actual state # of the iterator, and things like pytorch-lightning would stop warning # us that the num_workers is too low. super().__init__( dataset=self.env, # The batch size is None, because the VecEnv takes care of # doing the batching for us. batch_size=batch_size, num_workers=num_workers, # collate_fn=None, **kwargs, ) Wrapper.__init__(self, env=self.env) assert not isinstance( self.env, GymDataLoader), "Something very wrong is happening." # self.max_epochs: int = max_epochs self.observation_space: gym.Space = self.env.observation_space self.action_space: gym.Space = self.env.action_space self.reward_space: gym.Space if isinstance(env.unwrapped, VectorEnv): env: VectorEnv batch_size = env.num_envs # TODO: Overwriting the action space to be the 'batched' version of # the single action space, rather than a Tuple(Discrete, ...) as is # done in the gym.vector.VectorEnv. self.action_space = batch_space(env.single_action_space, batch_size) if not hasattr(self.env, "reward_space"): self.reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=(), ) if isinstance(self.env.unwrapped, VectorEnv): # Same here, we use a 'batched' space rather than Tuple. self.reward_space = batch_space(self.reward_space, batch_size)
def __init__(self, env): Wrapper.__init__(self, env) if len(env.unwrapped.get_action_meanings()) < 3: raise ValueError('Expected an action space of at least 3!')
def __init__(self, env, num_skills, timesteps_before_changing_skill, skills_agent): Wrapper.__init__(self, env) self.action_space = spaces.Discrete(num_skills) self.timesteps_before_changing_skill = timesteps_before_changing_skill self.skills_agent = skills_agent
def __init__(self, env, rank=0): Wrapper.__init__(self, env=env) self.rank = rank self.rewards = [] self.current_metadata = {} self.info = {'reward': 0, 'episode_length': 0}
def __init__(self, env, max_n_noops): Wrapper.__init__(self, env) self.max_n_noops = max_n_noops