class SpaceSession(Session): '''Session for multi-agent/env setting''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_session_logger(self.spec, self.info_space, logger) self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.aeb_space.init_body_space() util.set_rand_seed(self.info_space.get_random_seed(), self.env_space) util.try_set_cuda_id(self.spec, self.info_space) assert not ps.is_dict( global_nets ), f'multi agent global_nets must be a list of dicts, got {global_nets}' self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets) logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def save_if_ckpt(self, agent_space, env_space): '''Save for agent, env if episode is at checkpoint''' for agent in agent_space.agents: for body in agent.nanflat_body_a: env = body.env super(SpaceSession, self).save_if_ckpt(agent, env) def run_all_episodes(self): ''' Continually run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done are done. ''' all_done = self.aeb_space.tick('epi') reward_space, state_space, done_space = self.env_space.reset() self.agent_space.reset(state_space) while not all_done: all_done = self.aeb_space.tick() action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) self.save_if_ckpt(self.agent_space, self.env_space) def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent_space.close() self.env_space.close() logger.info('Session done and closed.') def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data
class SpaceSession(Session): '''Session for multi-agent/env setting''' def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None self.eval_proc = None # reference run_online_eval process self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.aeb_space.init_body_space() util.set_rand_seed(self.info_space.get_random_seed(), self.env_space) util.try_set_cuda_id(self.spec, self.info_space) self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets) logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}') def try_ckpt(self, agent_space, env_space): '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end''' for agent in agent_space.agents: for body in agent.nanflat_body_a: env = body.env super(SpaceSession, self).try_ckpt(agent, env) def run_all_episodes(self): ''' Continually run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done are done. ''' all_done = self.aeb_space.tick('epi') reward_space, state_space, done_space = self.env_space.reset() self.agent_space.reset(state_space) while not all_done: self.try_ckpt(self.agent_space, self.env_space) all_done = self.aeb_space.tick() action_space = self.agent_space.act(state_space) reward_space, state_space, done_space = self.env_space.step( action_space) self.agent_space.update(action_space, reward_space, state_space, done_space) self.try_ckpt(self.agent_space, self.env_space) if self.eval_proc is not None: # wait for final eval before closing util.run_cmd_wait(self.eval_proc) def close(self): ''' Close session and clean up. Save agent, close env. ''' self.agent_space.close() self.env_space.close() logger.info('Session done and closed.') def run(self): self.run_all_episodes() self.data = analysis.analyze_session(self) # session fitness self.close() return self.data