def train_agent_async(outdir, processes, make_env, profile=False, steps=8 * 10**7, eval_frequency=10**6, eval_n_runs=10, gamma=0.99, max_episode_len=None, step_offset=0, successful_score=None, eval_explorer=None, agent=None, make_agent=None, logger=None): """Train agent asynchronously. One of agent and make_agent must be specified. Args: agent (Agent): Agent to train make_agent (callable): (process_idx) -> Agent processes (int): Number of processes. make_env (callable): (process_idx, test) -> env model_opt (callable): () -> (models, optimizers) profile (bool): Profile if set True steps (int): Number of global time steps for training """ logger = logger or logging.getLogger(__name__) # Prevent numpy from using multiple threads os.environ['OMP_NUM_THREADS'] = '1' counter = mp.Value('l', 0) training_done = mp.Value('b', False) # bool if agent is None: assert make_agent is not None agent = make_agent(0) shared_objects = extract_shared_objects_from_agent(agent) set_shared_objects(agent, shared_objects) evaluator = AsyncEvaluator(n_runs=eval_n_runs, eval_frequency=eval_frequency, outdir=outdir, max_episode_len=max_episode_len, step_offset=step_offset, explorer=eval_explorer, logger=logger) def run_func(process_idx): random_seed.set_random_seed(process_idx) env = make_env(process_idx, test=False) eval_env = make_env(process_idx, test=True) if make_agent is not None: local_agent = make_agent(process_idx) set_shared_objects(local_agent, shared_objects) else: local_agent = agent local_agent.process_idx = process_idx def f(): train_loop(process_idx=process_idx, counter=counter, agent=local_agent, env=env, steps=steps, outdir=outdir, max_episode_len=max_episode_len, evaluator=evaluator, successful_score=successful_score, training_done=training_done, eval_env=eval_env, logger=logger) if profile: import cProfile cProfile.runctx('f()', globals(), locals(), 'profile-{}.out'.format(os.getpid())) else: f() async .run_async(processes, run_func) return agent
def train_agent_async(outdir, processes, make_env, profile=False, steps=8 * 10 ** 7, eval_interval=10 ** 6, eval_n_runs=10, max_episode_len=None, step_offset=0, successful_score=None, agent=None, make_agent=None, global_step_hooks=[], save_best_so_far_agent=True, logger=None, ): """Train agent asynchronously using multiprocessing. Either `agent` or `make_agent` must be specified. Args: outdir (str): Path to the directory to output things. processes (int): Number of processes. make_env (callable): (process_idx, test) -> Environment. profile (bool): Profile if set True. steps (int): Number of global time steps for training. eval_interval (int): Interval of evaluation. If set to None, the agent will not be evaluated at all. eval_n_runs (int): Number of runs for each time of evaluation. max_episode_len (int): Maximum episode length. step_offset (int): Time step from which training starts. successful_score (float): Finish training if the mean score is greater or equal to this value if not None agent (Agent): Agent to train. make_agent (callable): (process_idx) -> Agent global_step_hooks (list): List of callable objects that accepts (env, agent, step) as arguments. They are called every global step. See chainerrl.experiments.hooks. save_best_so_far_agent (bool): If set to True, after each evaluation, if the score (= mean return of evaluation episodes) exceeds the best-so-far score, the current agent is saved. logger (logging.Logger): Logger used in this function. Returns: Trained agent. """ logger = logger or logging.getLogger(__name__) # Prevent numpy from using multiple threads os.environ['OMP_NUM_THREADS'] = '1' counter = mp.Value('l', 0) episodes_counter = mp.Value('l', 0) training_done = mp.Value('b', False) # bool if agent is None: assert make_agent is not None agent = make_agent(0) shared_objects = extract_shared_objects_from_agent(agent) set_shared_objects(agent, shared_objects) if eval_interval is None: evaluator = None else: evaluator = AsyncEvaluator( n_runs=eval_n_runs, eval_interval=eval_interval, outdir=outdir, max_episode_len=max_episode_len, step_offset=step_offset, save_best_so_far_agent=save_best_so_far_agent, logger=logger, ) def run_func(process_idx): random_seed.set_random_seed(process_idx) env = make_env(process_idx, test=False) if evaluator is None: eval_env = env else: eval_env = make_env(process_idx, test=True) if make_agent is not None: local_agent = make_agent(process_idx) set_shared_objects(local_agent, shared_objects) else: local_agent = agent local_agent.process_idx = process_idx def f(): train_loop( process_idx=process_idx, counter=counter, episodes_counter=episodes_counter, agent=local_agent, env=env, steps=steps, outdir=outdir, max_episode_len=max_episode_len, evaluator=evaluator, successful_score=successful_score, training_done=training_done, eval_env=eval_env, global_step_hooks=global_step_hooks, logger=logger) if profile: import cProfile cProfile.runctx('f()', globals(), locals(), 'profile-{}.out'.format(os.getpid())) else: f() env.close() if eval_env is not env: eval_env.close() async_.run_async(processes, run_func) return agent