Exemplo n.º 1
0
class SpaceSession(Session):
    '''Session for multi-agent/env setting'''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_session_logger(self.spec, self.info_space, logger)
        self.data = None

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        assert not ps.is_dict(
            global_nets
        ), f'multi agent global_nets must be a list of dicts, got {global_nets}'
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def save_if_ckpt(self, agent_space, env_space):
        '''Save for agent, env if episode is at checkpoint'''
        for agent in agent_space.agents:
            for body in agent.nanflat_body_a:
                env = body.env
                super(SpaceSession, self).save_if_ckpt(agent, env)

    def run_all_episodes(self):
        '''
        Continually run all episodes, where each env can step and reset at its own clock_speed and timeline.
        Will terminate when all envs done are done.
        '''
        all_done = self.aeb_space.tick('epi')
        reward_space, state_space, done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while not all_done:
            all_done = self.aeb_space.tick()
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)
            self.save_if_ckpt(self.agent_space, self.env_space)

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done and closed.')

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
Exemplo n.º 2
0
class SpaceSession(Session):
    '''Session for multi-agent/env setting'''
    def __init__(self, spec, info_space, global_nets=None):
        self.spec = spec
        self.info_space = info_space
        self.index = self.info_space.get('session')
        util.set_logger(self.spec, self.info_space, logger, 'session')
        self.data = None
        self.eval_proc = None  # reference run_online_eval process

        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.aeb_space.init_body_space()
        util.set_rand_seed(self.info_space.get_random_seed(), self.env_space)
        util.try_set_cuda_id(self.spec, self.info_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space, global_nets)

        logger.info(util.self_desc(self))
        logger.info(f'Initialized session {self.index}')

    def try_ckpt(self, agent_space, env_space):
        '''Try to checkpoint agent and run_online_eval at the start, save_freq, and the end'''
        for agent in agent_space.agents:
            for body in agent.nanflat_body_a:
                env = body.env
                super(SpaceSession, self).try_ckpt(agent, env)

    def run_all_episodes(self):
        '''
        Continually run all episodes, where each env can step and reset at its own clock_speed and timeline.
        Will terminate when all envs done are done.
        '''
        all_done = self.aeb_space.tick('epi')
        reward_space, state_space, done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while not all_done:
            self.try_ckpt(self.agent_space, self.env_space)
            all_done = self.aeb_space.tick()
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)
        self.try_ckpt(self.agent_space, self.env_space)
        if self.eval_proc is not None:  # wait for final eval before closing
            util.run_cmd_wait(self.eval_proc)

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done and closed.')

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data