def custom_env(env_config): ''' Create an env stub for policy training. Only the action_space and observation_space attributes need to be defined, no other env functions are needed (e.g. step(), reset(), etc). ''' env = gym.Env() env.action_space = agent_action_space env.observation_space = agent_obs_space return env
def setUp(self) -> None: self.obs_idx = 0 self.dummy_env = gym.Env() self.dummy_env.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 3), dtype=np.uint8) self.obs_buffer = ObservationBufferWrapper(self.dummy_env, obs_buffer_depth=3) self.visualize_buffer()
def test_load_cell_raises_exception_on_non_loadable_cell(self): save_function = mock.MagicMock() load_function = mock.MagicMock() core_env = gym.Env() env = save_loadable.SaveLoadableWrapper(core_env, save_function, load_function) test_cell = base_returning_info.BaseReturningInfo() policy = load_policy.LoadPolicy(env) with self.assertRaisesRegexp( TypeError, "LoadableCellInfo.*got.*BaseReturningInfo"): policy.return_to_cell(test_cell)
def test(rank, params, shared_model): torch.manual_seed(params.seed + rank) # asynchronizing the test agent env = gym.Env(params.env_name, video=True) # running an environment with a video env.seed(params.seed + rank) # asynchronizing the environment model = AC_netwok(env.observation_space.shape[0], env.action_space) # creating one model model.eval( ) # putting the model in "eval" model because it won't be trained state = env.reset() # getting the input images as numpy arrays state = torch.from_numpy(state) # converting them into torch tensors reward_sum = 0 # initializing the sum of rewards to 0 done = True # initializing done to True start_time = time.time( ) # getting the starting time to measure the computation time actions = deque( maxlen=100) # cf https://pymotw.com/2/collections/deque.html episode_length = 0 # initializing the episode length to 0 while True: # repeat episode_length += 1 # incrementing the episode length by one if done: # synchronizing with the shared model (same as train.py) model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, 256), volatile=True) hx = Variable(torch.zeros(1, 256), volatile=True) else: cx = Variable(cx.data, volatile=True) hx = Variable(hx.data, volatile=True) value, action_value, (hx, cx) = model( (Variable(state.unsqueeze(0), volatile=True), (hx, cx))) prob = F.softmax(action_value) action = prob.max(1)[1].data.numpy( ) # the test agent does not explore, it directly plays the best action state, reward, done, _ = env.step(action[ 0, 0]) # done = done or episode_length >= params.max_episode_length reward_sum += reward if done: # printing the results at the end of each part print("Time {}, episode reward {}, episode length {}".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length)) reward_sum = 0 # reinitializing the sum of rewards episode_length = 0 # reinitializing the episode length actions.clear() # reinitializing the actions state = env.reset() # reinitializing the environment time.sleep( 60 ) # doing a one minute break to let the other agents practice (if the game is done) state = torch.from_numpy(state) # new state and we continue
def test_gym_loads_cell_using_saved_snapshot(self): save_function = mock.MagicMock() load_function = mock.MagicMock() core_env = gym.Env() env = save_loadable.SaveLoadableWrapper(core_env, save_function, load_function) cell = loadable_cell_info.LoadableCellInfo( snapshot_data="snapshot_data") policy = load_policy.LoadPolicy(env) policy.return_to_cell(cell) load_function.assert_called_once_with(core_env, "snapshot_data")
import gym from gym import spaces from gym.utils import seeding import numpy as np import time import subprocess # See user manual https://usermanual.wiki/Document/pybullet20quickstart20guide.479068914.pdf import pybullet as p import pybullet_data from pkg_resources import parse_version from servorobots.components.dc_motor import GearedDcMotor from servorobots.components.dc_motor import TimestampInput logger = logging.getLogger(__name__) gym.Env() class qt: def q_mult(q1, q2): x1, y1, z1, w1 = q1 x2, y2, z2, w2 = q2 w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2 z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2 return x, y, z, w def q_conjugate(q): x, y, z, w = q return (-x, -y, -z, w)
if __name__ == "__main__": action_space = gym.spaces.Dict({ "attack": gym.spaces.Discrete(2), "back": gym.spaces.Discrete(2), "forward": gym.spaces.Discrete(2), "camera": gym.spaces.Box(low=-180, high=180, shape=(2, ), dtype=np.float32) }) action_space = translate_action_space(action_space) print(repr(action_space)) env = gym.Env() env.action_space = action_space env2 = DictToMultiDiscreteActionWrapper(env) try: env2.step(env2.action_space.sample()) except NotImplementedError: pass env = gym.Env() env.action_space = action_space env2 = DictToDiscreteActionWrapper(env) try: env2.step(env2.action_space.sample()) except NotImplementedError: pass
import gym env=gym.Env("CartPole") # hello #hello