def test_different_first_observation_if_different_seed(self): timestep_1 = soccer.load(team_size=2, random_state=1).reset() timestep_2 = soccer.load(team_size=2, random_state=2).reset() try: self.assertSameObservation(timestep_1.observation, timestep_2.observation) except AssertionError: pass else: self.fail("Observations are unexpectedly identical.")
def test_same_first_observation_if_same_seed(self, disable_walker_contacts): seed = 42 timestep_1 = soccer.load( team_size=2, random_state=seed, disable_walker_contacts=disable_walker_contacts).reset() timestep_2 = soccer.load( team_size=2, random_state=seed, disable_walker_contacts=disable_walker_contacts).reset() self.assertSameObservation(timestep_1.observation, timestep_2.observation)
def __init__(self, team_size, time_limit): """ Create a object that bridges Soccre env from dm_control to Ray MultiAgentEnv interface :param team_size: how many agents are in one team :param time_limit: timestep of the env (1second = 40timestep) """ self.team_size = team_size self.agent_num = 2 * team_size self.n = self.agent_num self.env = dm_soccer.load(team_size=team_size, time_limit=time_limit) # action space creation self.action_space = [ spaces.Box(-1.0, 1.0, shape=[3], dtype=np.float32) ] * self.agent_num # observation space creation self.feature_space_length = 0 for feature_name, feature_spec in self.env.observation_spec()[0].items( ): self.feature_space_length += np.prod(feature_spec.shape) self.observation_space = [ spaces.Box(low=-np.inf, high=np.inf, shape=(self.feature_space_length, ), dtype=np.float32) ] * self.n self.info = {}
def __init__(self, batch_size=None, **kwargs): # super().__init__(batch_size, **kwargs) self.base_env = dm_soccer.load(kwargs["team_size"], kwargs["time_limit"]) act_spec = self.base_env.action_spec() self.n_agents = len(act_spec) self.observation_space = [Box(low=-10000, high=10000, shape=(154, )) ] * self.n_agents self.action_space = [Box(low=-1, high=1, shape=(3, ))] * self.n_agents
def __init__(self, env_name, ALGO): """ A base template for all environment wrappers. """ from dm_control.locomotion import soccer as dm_soccer import gym self.env = dm_soccer.load(team_size=2, time_limit=10.) self.action_specs = env.action_spec() self.ALGO = ALGO
def test_load_env(self, team_size, disable_walker_contacts): env = soccer.load(team_size=team_size, time_limit=2., disable_walker_contacts=disable_walker_contacts) action_specs = env.action_spec() random_state = np.random.RandomState(0) time_step = env.reset() while not time_step.last(): actions = [] for action_spec in action_specs: action = random_state.uniform( action_spec.minimum, action_spec.maximum, size=action_spec.shape) actions.append(action) time_step = env.step(actions) for i in range(len(action_specs)): logging.info( "Player %d: reward = %s, discount = %s, observations = %s.", i, time_step.reward[i], time_step.discount, time_step.observation[i])
def __init__(self, team_size=2, time_limit=45, disable_walker_contacts=True, team_num=2, render_name="human"): self.team_size = team_size self.team_num = team_num self.env = dm_soccer.load(self.team_size, time_limit, disable_walker_contacts) ac_sp_i = spaces.Box(low=-1.0, high=1.0, shape=(3, ), dtype=np.float32) ac_sp = spaces.Tuple( [spaces.Tuple(tuple([ac_sp_i] * self.team_size))] * self.team_num) self.action_space = ac_sp #print(self.action_space) self.observation_space = spaces.Tuple([NoneSpace(), NoneSpace()]) self.timestep = None odict_sp = {} odict = self.env.observation_spec() for key in odict[0]: odict_sp[key] = spaces.Box(-np.inf, np.inf, shape=(np.int( np.prod(odict[0][key].shape)), )) self.observation_space = spaces.Tuple( [spaces.Tuple([spaces.Dict(odict_sp)] * self.team_size)] * self.team_num) # render render_mode_list = self.create_render_mode(render_name, show=False, return_pixel=True) if render_mode_list is not None: self.metadata['render.modes'] = list(render_mode_list.keys()) self.viewer = {key: None for key in render_mode_list.keys()} else: self.metadata['render.modes'] = [] self.render_mode_list = render_mode_list # set seed self._seed()
def make_env(): # Load the 2-vs-2 soccer environment with episodes of 10 seconds: dm_env = dm_soccer.load(team_size=2, time_limit=10.) env = GymEnvWrapper(dmc2gym.DmControlWrapper('', '', env=dm_env)) return env
def test_same_first_observation_if_same_seed(self): seed = 42 timestep_1 = soccer.load(team_size=2, random_state=seed).reset() timestep_2 = soccer.load(team_size=2, random_state=seed).reset() self.assertSameObservation(timestep_1.observation, timestep_2.observation)
import numpy as np from dm_control.locomotion import soccer as dm_soccer # Load the 2-vs-2 soccer environment with episodes of 10 seconds: env = dm_soccer.load(team_size=2, time_limit=10.) # Retrieves action_specs for all 4 players. action_specs = env.action_spec() # Step through the environment for one episode with random actions. time_step = env.reset() while not time_step.last(): actions = [] for action_spec in action_specs: action = np.random.uniform(action_spec.minimum, action_spec.maximum, size=action_spec.shape) actions.append(action) time_step = env.step(actions) for i in range(len(action_specs)): print( "Player {}: reward = {}, discount = {}, observations = {}.".format( i, time_step.reward[i], time_step.discount, time_step.observation[i]))