def create_d4rl_env_and_dataset( task_name, batch_size ): """Create gym environment and dataset for d4rl. Args: task_name: Name of d4rl task. batch_size: Mini batch size. Returns: Gym env and dataset. """ env = gym.make(task_name) env = wrappers.GymWrapper(env) dataset = d4rl.qlearning_dataset(env) states = np.array(dataset['observations'], dtype=np.float32) actions = np.array(dataset['actions'], dtype=np.float32) rewards = np.array(dataset['rewards'], dtype=np.float32) discounts = np.array(np.logical_not(dataset['terminals']), dtype=np.float32) next_states = np.array(dataset['next_observations'], dtype=np.float32) dataset = tf_data.Dataset.from_tensor_slices( Inputs(data=(states, actions, rewards, discounts, next_states)) ).cache().shuffle( states.shape[0], reshuffle_each_iteration=True ).repeat().batch( batch_size, drop_remainder=True ).prefetch(tf_data.experimental.AUTOTUNE) return env, dataset
def make_environment(suite: str, task: str) -> dm_env.Environment: """Makes the requested continuous control environment. Args: suite: One of 'gym' or 'control'. task: Task to load. If `suite` is 'control', the task must be formatted as f'{domain_name}:{task_name}' Returns: An environment satisfying the dm_env interface expected by Acme agents. """ if suite not in _VALID_TASK_SUITES: raise ValueError( f'Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}' ) if suite == 'gym': env = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. env = wrappers.GymWrapper(env) elif suite == 'control': # Load dm_suite lazily not require Mujoco license when not using it. from dm_control import suite as dm_suite # pylint: disable=g-import-not-at-top domain_name, task_name = task.split(':') env = dm_suite.load(domain_name, task_name) env = wrappers.ConcatObservationWrapper(env) # Wrap the environment so the expected continuous action spec is [-1, 1]. # Note: this is a no-op on 'control' tasks. env = wrappers.CanonicalSpecWrapper(env, clip=True) env = wrappers.SinglePrecisionWrapper(env) return env
def make_environment_atari(env_name, seed): env = gym.make(env_name) env = AtariWrapper(env, partial_observation_wrapper=QuadrantObservationWrapper, partial_percentage=1.0, seed=seed) env = wrappers.GymWrapper(env) return env
def make_environment( name: str, seed: Optional[int] = None, ) -> dm_env.Environment: env = gym.make(name) env.seed(seed) env = wrappers.GymWrapper(env) return env
def make_gym_environment( task_name: str = 'MountainCarContinuous-v0') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task_name) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def create_d4rl_env( task_name, ): """Create the environment for the d4rl task. Args: task_name: Name of d4rl task. Returns: dm env. """ env = gym.make(task_name) env = wrappers.GymWrapper(env) return env
def make_environment( task: str = 'MountainCarContinuous-v0') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_environment(evaluation: bool = False, task: str = 'HalfCheetah-v3') -> dm_env.Environment: """Creates an OpenAI Gym environment.""" del evaluation # Load the gym environment. environment = gym.make(task) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def make_single_agent_env(scenario: str, render=False): scenario = SingleAgentScenario.from_spec(scenario, rendering=render) env = VectorizedSingleAgentRaceEnv(scenarios=[scenario]) env = wrap_env(env=env, wrapper_configs='single_agent_wrappers.yml') env = wrappers.GymWrapper(environment=env) env = wrappers.SinglePrecisionWrapper(env) return env # def make_multi_agent_env(scenario: str, render=False, test=False): # scenario = MultiAgentScenario.from_spec(scenario, rendering=render) # env = VectorizedMultiAgentRaceEnv(scenarios=[scenario]) # if test: # env = wrap_env(env=env, wrapper_configs='multi_agent_test_wrappers.yml') # else: # env = wrap_env(env=env, wrapper_configs='multi_agent_wrappers.yml') # # env = MultiAgentGymWrapper(environment=env) # env = wrappers.SinglePrecisionWrapper(env) # return env
def make_environment(task, evaluation = False): """Creates an OpenAI Gym environment.""" # Load the gym environment. environment = gym.make(task) environment = env_wrappers.AdroitSparseRewardWrapper(environment) # Make sure the environment obeys the dm_env.Environment interface. environment = wrappers.GymWrapper(environment) # Clip the action returned by the agent to the environment spec. environment = wrappers.CanonicalSpecWrapper(environment, clip=True) environment = wrappers.SinglePrecisionWrapper(environment) if evaluation: environment = env_wrappers.SuccessRewardWrapper(environment, success_threshold=1.) return environment
from env.RSEnv import RSEnv from env.TestRSEnv import TestRSEnv from acme import environment_loop from acme import specs from acme import wrappers from acme.agents.tf import d4pg from acme.tf import networks from acme.tf import utils as tf2_utils from acme.utils import loggers import numpy as np import sonnet as snt import gym environment = RSEnv() environment = wrappers.GymWrapper(environment) # To dm_env interface. # Make sure the environment outputs single-precision floats. environment = wrappers.SinglePrecisionWrapper(environment) # Grab the spec of the environment. environment_spec = specs.make_environment_spec(environment) #@title Build agent networks # BUILDING A D4PG AGENT # Get total number of action dimensions from action spec. num_dimensions = np.prod(environment_spec.actions.shape, dtype=int) # Create the shared observation network; here simply a state-less operation. observation_network = tf2_utils.batch_concat