def add_new_iteration_strategy_model(self, owner, adv_net_state_dict, cfr_iter): iter_strat = IterationStrategy(t_prof=self._t_prof, env_bldr=self._env_bldr, owner=owner, device=self._t_prof.device_inference, cfr_iter=cfr_iter) iter_strat.load_net_state_dict( self._ray.state_dict_to_torch( adv_net_state_dict, device=self._t_prof.device_inference)) self._strategy_buffers[iter_strat.owner].add( iteration_strat=iter_strat) # Store to disk if self._t_prof.export_each_net: path = ospj(self._t_prof.path_strategy_nets, self._t_prof.name) file_util.create_dir_if_not_exist(path) file_util.do_pickle(obj=iter_strat.state_dict(), path=path, file_name=str(iter_strat.cfr_iteration) + "_P" + str(iter_strat.owner) + ".pkl") if self._t_prof.log_verbose: if owner == 1: # Logs process = psutil.Process(os.getpid()) self.add_scalar(self._exp_mem_usage, "Debug/Memory Usage/Chief", cfr_iter, process.memory_info().rss)
def __init__(self, t_prof, eval_methods, chief_cls, eval_agent_cls, n_iterations=None, iteration_to_import=None, name_to_import=None): """ Args: t_prof (TrainingProfile) eval_methods (dict): dict of {evaluator1_name: frequency, ...} Currently supported evaluators are "br", "rlbr", and "lbr" chief_cls (ChiefBase subclass): class, not instance n_iterations (int) number of iterations to run. If None, runs forever iteration_to_import (int): step/iteration to import name_to_import (str): name of the run to import """ super().__init__(t_prof=t_prof) if self._t_prof.CLUSTER: self._ray.init_cluster(redis_address=t_prof.redis_head_adr) else: self._ray.init_local() file_util.do_pickle(obj=t_prof, file_name=t_prof.name, path=t_prof.path_trainingprofiles) self.n_iterations = n_iterations self._step_to_import = iteration_to_import self._name_to_import = name_to_import if self._t_prof.DISTRIBUTED: from PokerRL.eval.lbr.DistLBRMaster import DistLBRMaster as LBRMaster from PokerRL.eval.rl_br.DistRLBRMaster import DistRLBRMaster as RLBRMaster from PokerRL.eval.rl_br.workers.ps.Dist_RLBR_ParameterServer import \ Dist_RLBR_ParameterServer as RLBRParameterServer from PokerRL.eval.rl_br.workers.la.Dist_RLBR_LearnerActor import Dist_RLBR_LearnerActor as RLBRLearnerActor from PokerRL.eval.lbr.DistLBRWorker import DistLBRWorker as LBRWorker from PokerRL.eval.br.DistBRMaster import DistBRMaster as BRMaster from PokerRL.eval.head_to_head.DistHead2HeadMaster import DistHead2HeadMaster as Head2HeadMaster else: from PokerRL.eval.lbr.LocalLBRMaster import LocalLBRMaster as LBRMaster from PokerRL.eval.rl_br.LocalRLBRMaster import LocalRLBRMaster as RLBRMaster from PokerRL.eval.rl_br.workers.ps.Local_RLBR_ParameterServer import \ Local_RLBR_ParameterServer as RLBRParameterServer from PokerRL.eval.rl_br.workers.la.Local_RLBR_LearnerActor import \ Local_RLBR_LearnerActor as RLBRLearnerActor from PokerRL.eval.lbr.LocalLBRWorker import LocalLBRWorker as LBRWorker from PokerRL.eval.br.LocalBRMaster import LocalBRMaster as BRMaster from PokerRL.eval.head_to_head.LocalHead2HeadMaster import LocalHead2HeadMaster as Head2HeadMaster # safety measure to avoid overwriting logs when reloading if name_to_import is not None and iteration_to_import is not None and name_to_import == t_prof.name: t_prof.name += "_" print("Creating Chief...") self.chief_handle = self._ray.create_worker(chief_cls, t_prof) self.eval_masters = {} if "br" in list(eval_methods.keys()): print("Creating BR Evaluator...") self.eval_masters["br"] = ( self._ray.create_worker(BRMaster, t_prof, self.chief_handle, eval_agent_cls), eval_methods["br"] # freq ) if "h2h" in list(eval_methods.keys()): print("Creating Head-to-Head Mode Evaluator...") self.eval_masters["h2h"] = ( self._ray.create_worker(Head2HeadMaster, t_prof, self.chief_handle, eval_agent_cls), eval_methods["h2h"] # freq ) if "lbr" in list(eval_methods.keys()): print("Creating LBR Evaluator...") self._lbr_workers = [ self._ray.create_worker(LBRWorker, t_prof, self.chief_handle, eval_agent_cls) for _ in range(self._t_prof.module_args["lbr"].n_workers) ] self.eval_masters["lbr"] = ( self._ray.create_worker(LBRMaster, t_prof, self.chief_handle), eval_methods["lbr"] # freq ) self._ray.wait([ self._ray.remote( self.eval_masters["lbr"][0].set_worker_handles, *self._lbr_workers) ]) if "rlbr" in list(eval_methods.keys()): print("Creating RL-BR Evaluator...") self._rlbr_ps = self._ray.create_worker( RLBRParameterServer, t_prof, self.chief_handle, ) self._rlbr_las = [ self._ray.create_worker(RLBRLearnerActor, t_prof, self.chief_handle, eval_agent_cls) for _ in range(self._t_prof.module_args["rlbr"].n_las) ] self.eval_masters["rlbr"] = ( self._ray.create_worker(RLBRMaster, t_prof, self.chief_handle, eval_agent_cls), eval_methods["rlbr"] # freq ) self._ray.wait([ self._ray.remote( self.eval_masters["rlbr"][0].set_learner_actors, *self._rlbr_las), ]) self._ray.wait([ self._ray.remote(self.eval_masters["rlbr"][0].set_param_server, self._rlbr_ps), ]) self.crayon = CrayonWrapper( name=t_prof.name, chief_handle=self.chief_handle, path_log_storage=self._t_prof.path_log_storage, crayon_server_address=t_prof.local_crayon_server_docker_address, runs_distributed=t_prof.DISTRIBUTED, runs_cluster=t_prof.CLUSTER, )
def store_to_disk(self, path, file_name): do_pickle(obj=self.state_dict(), path=path, file_name=file_name)