def gen_avg_return(agent, env): '''Generate average return for agent and an env''' with util.ctx_lab_mode('eval'): # enter eval context agent.algorithm.update() # set explore_var etc. to end_val under ctx with torch.no_grad(): ret = gen_return(agent, env) # exit eval context, restore variables simply by updating agent.algorithm.update() return ret
def gen_avg_return(agent, env, num_eval=NUM_EVAL): '''Generate average return for agent and an env''' with util.ctx_lab_mode('eval'): # enter eval context agent.algorithm.update() # set explore_var etc. to end_val under ctx with torch.no_grad(): returns = [gen_return(agent, env) for i in range(num_eval)] # exit eval context, restore variables simply by updating agent.algorithm.update() return np.mean(returns)
def __init__(self, spec, global_nets=None): self.spec = spec self.index = self.spec['meta']['session'] util.set_random_seed(self.spec) util.set_cuda_id(self.spec) util.set_logger(self.spec, logger, 'session') spec_util.save(spec, unit='session') self.agent, self.env = make_agent_env(self.spec, global_nets) with util.ctx_lab_mode('eval'): # env for eval self.eval_env = make_env(self.spec) logger.info(util.self_desc(self))
def run_eval_episode(self): with util.ctx_lab_mode('eval'): # enter eval context self.agent.algorithm.update( ) # set explore_var etc. to end_val under ctx self.eval_env.clock.tick('epi') logger.info( f'Running eval episode for trial {self.info_space.get("trial")} session {self.index}' ) total_reward = 0 reward, state, done = self.eval_env.reset() while not done: self.eval_env.clock.tick('t') action = self.agent.act(state) reward, state, done = self.eval_env.step(action) total_reward += reward # exit eval context, restore variables simply by updating self.agent.algorithm.update() # update body.eval_df self.agent.body.eval_update(self.eval_env, total_reward) self.agent.body.log_summary(body_df_kind='eval')
def __init__(self, spec, info_space, global_nets=None): self.spec = spec self.info_space = info_space self.index = self.info_space.get('session') util.set_logger(self.spec, self.info_space, logger, 'session') self.data = None # init singleton agent and env self.env = make_env(self.spec) util.set_rand_seed(self.info_space.get_random_seed(), self.env) with util.ctx_lab_mode('eval'): # env for eval self.eval_env = make_env(self.spec) util.set_rand_seed(self.info_space.get_random_seed(), self.eval_env) util.try_set_cuda_id(self.spec, self.info_space) body = Body(self.env, self.spec['agent']) self.agent = Agent(self.spec, self.info_space, body=body, global_nets=global_nets) enable_aeb_space(self) # to use lab's data analysis framework logger.info(util.self_desc(self)) logger.info(f'Initialized session {self.index}')