示例#1
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space=None):
        info_space = info_space or InfoSpace()
        init_thread_vars(spec, info_space, unit='session')
        self.spec = deepcopy(spec)
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
        torch.cuda.manual_seed_all(self.random_seed)
        torch.manual_seed(self.random_seed)
        np.random.seed(self.random_seed)
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()
        logger.info(f'Initialized session {self.index}')

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        _action_space = self.agent_space.reset(
            state_space)  # nan action at t=0 for bookkeeping in data_space
        while True:
            end_session = self.aeb_space.tick_clocks(self)
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
示例#2
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''

    def __init__(self, spec, info_space=None):
        info_space = info_space or InfoSpace()
        init_thread_vars(spec, info_space, unit='session')
        self.spec = deepcopy(spec)
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        self.random_seed = 100 * (info_space.get('trial') or 0) + self.index
        torch.cuda.manual_seed_all(self.random_seed)
        torch.manual_seed(self.random_seed)
        np.random.seed(self.random_seed)
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()
        logger.info(f'Initialized session {self.index}')

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        _action_space = self.agent_space.reset(state_space)  # nan action at t=0 for bookkeeping in data_space
        while True:
            end_session = self.aeb_space.tick_clocks(self)
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(action_space)
            self.agent_space.update(action_space, reward_space, state_space, done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
示例#3
0
class Session:
    '''
    The base unit of instantiated RL system.
    Given a spec,
    session creates agent(s) and environment(s),
    run the RL system and collect data, e.g. fitness metrics, till it ends,
    then return the session data.
    '''
    def __init__(self, spec, info_space=InfoSpace()):
        self.spec = spec
        if info_space.get('session') is None:
            info_space.tick('session')
        self.info_space = info_space
        self.coor, self.index = self.info_space.get_coor_idx(self)
        # TODO option to set rand_seed. also set np random seed
        self.torch_rand_seed = torch.initial_seed()
        self.data = None
        self.aeb_space = AEBSpace(self.spec, self.info_space)
        self.env_space = EnvSpace(self.spec, self.aeb_space)
        self.agent_space = AgentSpace(self.spec, self.aeb_space)
        logger.info(util.self_desc(self))
        self.aeb_space.init_body_space()
        self.aeb_space.post_body_init()

    def close(self):
        '''
        Close session and clean up.
        Save agent, close env.
        Prepare self.df.
        '''
        self.agent_space.close()
        self.env_space.close()
        logger.info('Session done, closing.')

    def run_all_episodes(self):
        '''
        Run all episodes, where each env can step and reset at its own clock_speed and timeline. Will terminate when all envs done running max_episode.
        '''
        _reward_space, state_space, _done_space = self.env_space.reset()
        self.agent_space.reset(state_space)
        while True:
            end_session = self.aeb_space.tick_clocks()
            if end_session:
                break
            action_space = self.agent_space.act(state_space)
            reward_space, state_space, done_space = self.env_space.step(
                action_space)
            self.agent_space.update(action_space, reward_space, state_space,
                                    done_space)

    def run(self):
        self.run_all_episodes()
        self.data = analysis.analyze_session(self)  # session fitness
        self.close()
        return self.data
示例#4
0
def test_prioritized_replay_memory(request):
    memspec = spec_util.get('base.json', 'base_prioritized_replay_memory')
    memspec = util.override_test_spec(memspec)
    aeb_mem_space = AEBSpace(memspec, InfoSpace())
    env_space = EnvSpace(memspec, aeb_mem_space)
    agent_space = AgentSpace(memspec, aeb_mem_space)
    aeb_mem_space.init_body_space()
    aeb_mem_space.post_body_init()
    agent = agent_space.agents[0]
    body = agent.nanflat_body_a[0]
    res = (body.memory, ) + request.param
    return res
示例#5
0
def test_prioritized_replay_memory(request):
    memspec = spec_util.get('base.json', 'base_prioritized_replay_memory')
    memspec = util.override_test_spec(memspec)
    aeb_mem_space = AEBSpace(memspec, InfoSpace())
    env_space = EnvSpace(memspec, aeb_mem_space)
    agent_space = AgentSpace(memspec, aeb_mem_space)
    aeb_mem_space.init_body_space()
    aeb_mem_space.post_body_init()
    agent = agent_space.agents[0]
    body = agent.nanflat_body_a[0]
    res = (body.memory, ) + request.param
    return res