示例#1
0
 def space_step(self, action_e):
     action = action_e[(0, 0)]  # single body
     if self.done:  # space envs run continually without a central reset signal
         return self.space_reset()
     if not self.is_discrete:
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     reward = guard_reward(reward)
     reward *= self.reward_scale
     if util.to_render():
         self.u_env.render()
     self.done = done = done or self.clock.get('t') > self.max_timestep
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for ab, body in util.ndenumerate_nonan(self.body_e):
         reward_e[ab] = reward
         state_e[ab] = state
         done_e[ab] = done
     logger.debug(
         f'Env {self.e} step reward_e: {reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return reward_e, state_e, done_e
示例#2
0
文件: unity.py 项目: c-w-m/slm-lab
 def __init__(self, name):
     worker_id = int(f'{os.getpid()}{int(ps.unique_id())}'[-4:])
     super().__init__(get_env_path(name),
                      worker_id,
                      no_graphics=not util.to_render(),
                      multiagent=True)
     self.num_envs = self.number_agents
示例#3
0
 def reset(self):
     _reward = np.nan
     state = self.u_env.reset()
     self.done = done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(f'Env {self.e} reset reward: {_reward}, state: {state}, done: {done}')
     return _reward, state, done
示例#4
0
 def space_reset(self):
     _reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
     for ab, body in util.ndenumerate_nonan(self.body_e):
         state = self.u_env.reset()
         state_e[ab] = state
         done_e[ab] = self.done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(f'Env {self.e} reset reward_e: {_reward_e}, state_e: {state_e}, done_e: {done_e}')
     return _reward_e, state_e, done_e
示例#5
0
 def step(self, action):
     if not self.is_discrete:  # guard for continuous
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     if util.to_render():
         self.u_env.render()
     self.done = done = done or self.clock.get('t') > self.max_timestep
     logger.debug(
         f'Env {self.e} step reward: {reward}, state: {state}, done: {done}'
     )
     return reward, state, done
示例#6
0
 def step(self, action):
     if not self.is_discrete:  # guard for continuous
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     reward = guard_reward(reward)
     reward *= self.reward_scale
     if util.to_render():
         self.u_env.render()
     if self.max_t is not None:
         done = done or self.clock.t > self.max_t
     self.done = done
     logger.debug(f'Env {self.e} step reward: {reward}, state: {state}, done: {done}')
     return reward, state, done
示例#7
0
 def reset(self):
     _reward = np.nan
     state = self.u_env.reset()
     self.done = done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(
         f'Env {self.e} reset reward: {_reward}, state: {state}, done: {done}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return _reward, state, done
示例#8
0
 def space_reset(self):
     _reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for ab, body in util.ndenumerate_nonan(self.body_e):
         state = self.u_env.reset()
         state_e[ab] = state
         done_e[ab] = self.done = False
     if util.to_render():
         self.u_env.render()
     logger.debug(
         f'Env {self.e} reset reward_e: {_reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return _reward_e, state_e, done_e
示例#9
0
 def step(self, action):
     if not self.is_discrete:  # guard for continuous
         action = np.array([action])
     state, reward, done, _info = self.u_env.step(action)
     reward = guard_reward(reward)
     reward *= self.reward_scale
     if util.to_render():
         self.u_env.render()
     self.done = done = done or self.clock.get('t') > self.max_timestep
     logger.debug(
         f'Env {self.e} step reward: {reward}, state: {state}, done: {done}'
     )
     if isinstance(self.u_env.observation_space,
                   gym.spaces.discrete.Discrete):
         state = util.to_one_hot(state, self.u_env.observation_space.n)
     return reward, state, done
示例#10
0
 def __init__(self, spec):
     self.done = False
     self.env_spec = spec['env'][0]  # idx 0 for single-env
     # set default
     util.set_attr(
         self,
         dict(
             log_frequency=None,  # default to log at epi done
             frame_op=None,
             frame_op_len=None,
             normalize_state=False,
             reward_scale=None,
             num_envs=None,
         ))
     util.set_attr(self, spec['meta'], [
         'log_frequency',
         'eval_frequency',
     ])
     util.set_attr(self, self.env_spec, [
         'name',
         'frame_op',
         'frame_op_len',
         'normalize_state',
         'reward_scale',
         'num_envs',
         'max_t',
         'max_frame',
     ])
     seq_len = ps.get(spec, 'agent.0.net.seq_len')
     if seq_len is not None:  # infer if using RNN
         self.frame_op = 'stack'
         self.frame_op_len = seq_len
     if util.in_eval_lab_modes():  # use singleton for eval
         self.num_envs = 1
         self.log_frequency = None
     if spec['meta'][
             'distributed'] != False:  # divide max_frame for distributed
         self.max_frame = int(self.max_frame / spec['meta']['max_session'])
     self.is_venv = (self.num_envs is not None and self.num_envs > 1)
     if self.is_venv:
         assert self.log_frequency is not None, f'Specify log_frequency when using venv'
     self.clock_speed = 1 * (
         self.num_envs or 1
     )  # tick with a multiple of num_envs to properly count frames
     self.clock = Clock(self.max_frame, self.clock_speed)
     self.to_render = util.to_render()
示例#11
0
文件: base.py 项目: mattgrcia/SLM-Lab
 def __init__(self, spec):
     self.env_spec = spec['env'][0]  # idx 0 for single-env
     # set default
     util.set_attr(
         self,
         dict(
             eval_frequency=10000,
             log_frequency=10000,
             frame_op=None,
             frame_op_len=None,
             image_downsize=(84, 84),
             normalize_state=False,
             reward_scale=None,
             num_envs=1,
         ))
     util.set_attr(self, spec['meta'], [
         'eval_frequency',
         'log_frequency',
     ])
     util.set_attr(self, self.env_spec, [
         'name',
         'frame_op',
         'frame_op_len',
         'image_downsize',
         'normalize_state',
         'reward_scale',
         'num_envs',
         'max_t',
         'max_frame',
     ])
     # override if env is for eval
     if util.in_eval_lab_modes():
         self.num_envs = ps.get(spec, 'meta.rigorous_eval')
     self.to_render = util.to_render()
     self._infer_frame_attr(spec)
     self._infer_venv_attr()
     self._set_clock()
     self.done = False
     self.total_reward = np.nan