def make_environment(env_name, start_index, end_index, seed): """Creates the environment. Args: env_name: name of the environment start_index: first index of the observation to use in the goal. end_index: final index of the observation to use in the goal. The goal is then obs[start_index:goal_index]. seed: random seed. Returns: env: the environment obs_dim: integer specifying the size of the observations, before the start_index/end_index is applied. """ np.random.seed(seed) gym_env, obs_dim, max_episode_steps = env_utils.load(env_name) goal_indices = obs_dim + obs_to_goal_1d(np.arange(obs_dim), start_index, end_index) indices = np.concatenate([ np.arange(obs_dim), goal_indices ]) env = gym_wrapper.GymWrapper(gym_env) env = step_limit.StepLimitWrapper(env, step_limit=max_episode_steps) env = ObservationFilterWrapper(env, indices) if env_name.startswith('ant_'): env = canonical_spec.CanonicalSpecWrapper(env) return env, obs_dim
def test_gym_cartpole(self): env = gym_wrapper.GymWrapper(gym.make('CartPole-v0')) # Test converted observation spec. observation_spec: specs.BoundedArray = env.observation_spec() self.assertEqual(type(observation_spec), specs.BoundedArray) self.assertEqual(observation_spec.shape, (4,)) self.assertEqual(observation_spec.minimum.shape, (4,)) self.assertEqual(observation_spec.maximum.shape, (4,)) self.assertEqual(observation_spec.dtype, np.dtype('float32')) # Test converted action spec. action_spec: specs.BoundedArray = env.action_spec() self.assertEqual(type(action_spec), specs.DiscreteArray) self.assertEqual(action_spec.shape, ()) self.assertEqual(action_spec.minimum, 0) self.assertEqual(action_spec.maximum, 1) self.assertEqual(action_spec.num_values, 2) self.assertEqual(action_spec.dtype, np.dtype('int64')) # Test step. timestep = env.reset() self.assertTrue(timestep.first()) timestep = env.step(1) self.assertEqual(timestep.reward, 1.0) self.assertEqual(timestep.observation.shape, (4,)) env.close()
def test_early_truncation(self): # Pendulum has no early termination condition. gym_env = gym.make('Pendulum-v0') env = gym_wrapper.GymWrapper(gym_env) ts = env.reset() while not ts.last(): ts = env.step(env.action_spec().generate_value()) self.assertEqual(ts.discount, 1.0) env.close()
def _build_environment(name, n_actions=3, max_steps=500): raw_env = gym.make(name) raw_env.action_space.n = n_actions raw_env.max_steps = max_steps env = ImgFlatObsWrapper(FullyObsWrapper(raw_env)) env = gym_wrapper.GymWrapper(env) env = CustomSinglePrecisionWrapper(env) spec = specs.make_environment_spec(env) return env, spec
def test_early_truncation(self): # Pendulum has no early termination condition. Recent versions of gym force # to use v1. We try both in case an earlier version is installed. try: gym_env = gym.make('Pendulum-v1') except: # pylint: disable=bare-except gym_env = gym.make('Pendulum-v0') env = gym_wrapper.GymWrapper(gym_env) ts = env.reset() while not ts.last(): ts = env.step(env.action_spec().generate_value()) self.assertEqual(ts.discount, 1.0) self.assertTrue(np.isscalar(ts.reward)) env.close()
def test_basic(self): env = GymEnvWithInfo() env = gym_wrapper.GymWrapper(env) observer = env_info.EnvInfoObserver() timestep = env.reset() observer.observe_first(env, timestep) for _ in range(20): action = np.zeros((3, )) timestep = env.step(action) observer.observe(env, timestep, action) metrics = observer.get_metrics() self.assertLen(metrics, 3) np.testing.assert_equal(metrics['found_checkpoint'], 2) np.testing.assert_equal(metrics['picked_up_an_apple'], 1) np.testing.assert_equal(metrics['survival_bonus'], 20)
def make_environment(task, end_on_success, max_episode_steps, distance_fn, goal_image, baseline_distance=None, eval_mode=False, logdir=None, counter=None, record_every=100, num_episodes_to_record=3): """Create the environment and its wrappers.""" env = gym.make(task) env = gym_wrapper.GymWrapper(env) if end_on_success: env = env_wrappers.EndOnSuccessWrapper(env) env = wrappers.StepLimitWrapper(env, max_episode_steps) env = env_wrappers.ReshapeImageWrapper(env) if distance_fn.history_length > 1: env = wrappers.FrameStackingWrapper(env, distance_fn.history_length) env = env_wrappers.GoalConditionedWrapper(env, goal_image) env = env_wrappers.DistanceModelWrapper( env, distance_fn, max_episode_steps, baseline_distance, distance_reward_weight=FLAGS.distance_reward_weight, environment_reward_weight=FLAGS.environment_reward_weight) if FLAGS.use_true_distance: env = env_wrappers.RewardWrapper(env) if logdir: env = env_wrappers.RecordEpisodesWrapper( env, counter, logdir, record_every=record_every, num_to_record=num_episodes_to_record, eval_mode=eval_mode) env = env_wrappers.VisibleStateWrapper(env, eval_mode) return single_precision.SinglePrecisionWrapper(env)
from acme.utils import loggers from acme.wrappers import gym_wrapper from agents.dqn_agent import DQNAgent from networks.models import Models from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) def render(env): return env.environment.render(mode='rgb_array') environment = gym_wrapper.GymWrapper(gym.make('LunarLander-v2')) environment = wrappers.SinglePrecisionWrapper(environment) environment_spec = specs.make_environment_spec(environment) model = Models.sequential_model( input_shape=environment_spec.observations.shape, num_outputs=environment_spec.actions.num_values, hidden_layers=3, layer_size=300) agent = DQNAgent(environment_spec=environment_spec, network=model) logger = loggers.TerminalLogger(time_delta=10.) loop = acme.EnvironmentLoop(environment=environment, actor=agent) loop.run()
from acme.utils import loggers import gym import dm_env import matplotlib.pyplot as plt import numpy as np import reverb import sonnet as snt import tensorflow as tf from IPython.display import clear_output clear_output() # Load Environment: env = gym_wrapper.GymWrapper(gym.make('MountainCarContinuous-v0')) env = wrappers.SinglePrecisionWrapper(env) env.environment.render(mode='rgb_array') def render(env): return env.environment.render(mode='rgb_array') environment_spec = specs.make_environment_spec(env) # Create D4PG Agent: # Get total number of action dimensions from action spec