def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _train_update_target: int, _train_save: int, _process_core: int = None, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() self.env: EnvAbstract = _env # multiprocessing for sampling self.mp = mp.get_context('spawn') self.process_core = _process_core self.pool = self.mp.Pool(self.process_core) # training control self.epoch = 0 self.train_times = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.train_update_target = _train_update_target self.train_save = _train_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self)
def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _epoch_update_target: int, _epoch_save: int, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.env = _env self.epoch = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.epoch_update_target = _epoch_update_target self.epoch_save = _epoch_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self)
class TrainEpoch: def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _epoch_update_target: int, _epoch_save: int, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.env = _env self.epoch = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.epoch_update_target = _epoch_update_target self.epoch_save = _epoch_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) def run(self): tmp_reward_buf = [] while self.epoch < self.epoch_max: logger.info('Start new game: {}'.format(self.epoch)) self.agent.startNewGame() self.epoch += 1 # step until game finishes while self.agent.step(): pass tmp_reward_buf.append(self.env.total_reward) if not self.epoch % self.epoch_train: self.agent.train() self.total_reward_buf.append(tmp_reward_buf) tmp_reward_buf = [] if not self.epoch % self.epoch_update_target: self.agent.updateTargetFunc() if not self.epoch % self.epoch_save: self.agent.save(self.epoch, 0, self.save_path) if self.use_cmd: # cmd rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass
def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _train_update_target: int, _train_save: int, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.env = _env self.epoch = 0 self.train_time = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.train_update_target = _train_update_target self.train_save = _train_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self)
def __init__( self, _agent: AgentAbstract, _epoch_max: int, _step_init: int, _step_train: int, _step_update_target: int, _step_save: int, _save_path: str = './save', _use_cmd: bool = True, ): """ one threading trainer :param _agent: agent object :param _epoch_max: how much games to play :param _step_init: how much steps to start train() :param _step_train: how much steps between train() :param _step_update_target: how much steps between updateTargetFunc() :param _step_save: how much steps between save() """ self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.epoch_max = _epoch_max self.step_init = _step_init self.step_train = _step_train self.step_update_target = _step_update_target self.step_save = _step_save self.epoch = 0 # the games has been taken self.step_local = 0 # the steps has been taken in current game self.step_total = 0 # the total steps has been taken self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self)
class AsynTrainEpoch: """ Asyn train for epoch algorithm like PPO _agent: the agent to be trained _env: the env object _epoch_max: the max epoch to play games _epoch_train: train model after how many games _train_update_target: update target after how many training _train_save: save model after how many training _process_core: how many cores to play games simultaneously _save_path: where to save models _use_cmd: whether to enable cmdtool while training """ def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _train_update_target: int, _train_save: int, _process_core: int = None, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() self.env: EnvAbstract = _env # multiprocessing for sampling self.mp = mp.get_context('spawn') self.process_core = _process_core self.pool = self.mp.Pool(self.process_core) # training control self.epoch = 0 self.train_times = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.train_update_target = _train_update_target self.train_save = _train_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) @staticmethod def loop_env(_agent: AgentAbstract, _env: EnvAbstract, _epoch_num: int): logger.info('Start new game: {}'.format(_epoch_num)) _agent.startNewGame() while _agent.step(): pass return (_agent.getDataset(_agent.replay.pull()), _env.total_reward) @staticmethod def merge_dataset_reward(_ret_list): dataset_list = [d[0] for d in _ret_list] tuple_len = len(dataset_list[0]) dataset = [] for i in range(tuple_len): dataset.append(np.concatenate([tmp[i] for tmp in dataset_list])) return dataset, [d[1] for d in _ret_list] def run(self): while self.epoch < self.epoch_max: # multiprocessing to get dataset ret_list = self.pool.starmap( AsynTrainEpoch.loop_env, [(self.agent, self.env, tmp) for tmp in range(self.epoch, self.epoch + self.epoch_train)]) self.epoch += self.epoch_train # train model dataset, rewards = AsynTrainEpoch.merge_dataset_reward(ret_list) self.agent.train(dataset) self.total_reward_buf.append(rewards) self.train_times += 1 if not self.train_times % self.train_update_target: self.agent.updateTargetFunc() if not self.train_times % self.train_save: self.agent.save(self.epoch, 0, self.save_path) if self.use_cmd: rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass
class Train: def __init__( self, _agent: AgentAbstract, _epoch_max: int, _step_init: int, _step_train: int, _step_update_target: int, _step_save: int, _save_path: str = './save', _use_cmd: bool = True, ): """ one threading trainer :param _agent: agent object :param _epoch_max: how much games to play :param _step_init: how much steps to start train() :param _step_train: how much steps between train() :param _step_update_target: how much steps between updateTargetFunc() :param _step_save: how much steps between save() """ self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.epoch_max = _epoch_max self.step_init = _step_init self.step_train = _step_train self.step_update_target = _step_update_target self.step_save = _step_save self.epoch = 0 # the games has been taken self.step_local = 0 # the steps has been taken in current game self.step_total = 0 # the total steps has been taken self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) def run(self): while self.epoch < self.epoch_max: logger.info('Start new game: {}'.format(self.epoch)) self.agent.startNewGame() self.epoch += 1 self.step_local = 0 # reset local steps in_game = True while in_game: in_game = self.agent.step() self.step_local += 1 self.step_total += 1 # init finished if self.step_total > self.step_init: if not self.step_total % self.step_train: self.agent.train() if not self.step_total % self.step_update_target: self.agent.updateTargetFunc() if not self.step_total % self.step_save: self.agent.save(self.epoch, self.step_local, self.save_path) if self.use_cmd: # cmd rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass
class AsynTrainEpoch: """ Asyn train for epoch algorithm like PPO _agent: the agent to be trained _env: the env object _epoch_max: the max epoch to play games _epoch_train: train model after how many games _train_update_target: update target after how many training _train_save: save model after how many training _process_core: how many cores to play games simultaneously _save_path: where to save models _use_cmd: whether to enable cmdtool while training """ def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _train_update_target: int, _train_save: int, _process_core: int = None, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() self.env: EnvAbstract = _env # multiprocessing for sampling self.mp = mp.get_context('spawn') self.process_core = _process_core self.pool = self.mp.Pool(self.process_core) # training control self.epoch = 0 self.train_times = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.train_update_target = _train_update_target self.train_save = _train_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) @staticmethod def loop_env( _agent: AgentAbstract, _env: EnvAbstract, _epoch_num: int ): logger.info('Start new game: {}'.format(_epoch_num)) _agent.startNewGame() while _agent.step(): pass return ( _agent.getDataset(_agent.replay.pull()), _env.total_reward ) @staticmethod def merge_dataset_reward(_ret_list): dataset_list = [d[0] for d in _ret_list] tuple_len = len(dataset_list[0]) dataset = [] for i in range(tuple_len): dataset.append(np.concatenate([ tmp[i] for tmp in dataset_list ])) return dataset, [d[1] for d in _ret_list] def run(self): while self.epoch < self.epoch_max: # multiprocessing to get dataset ret_list = self.pool.starmap( AsynTrainEpoch.loop_env, [(self.agent, self.env, tmp) for tmp in range( self.epoch, self.epoch + self.epoch_train )] ) self.epoch += self.epoch_train # train model dataset, rewards = AsynTrainEpoch.merge_dataset_reward(ret_list) self.agent.train(dataset) self.total_reward_buf.append(rewards) self.train_times += 1 if not self.train_times % self.train_update_target: self.agent.updateTargetFunc() if not self.train_times % self.train_save: self.agent.save( self.epoch, 0, self.save_path ) if self.use_cmd: rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass
class TrainEpoch: def __init__( self, _agent: AgentAbstract, _env: EnvAbstract, _epoch_max: int, _epoch_train: int, _train_update_target: int, _train_save: int, _save_path: str = './save', _use_cmd: bool = True, ): self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.env = _env self.epoch = 0 self.train_time = 0 self.epoch_max = _epoch_max self.epoch_train = _epoch_train self.train_update_target = _train_update_target self.train_save = _train_save self.total_reward_buf = [] self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) def run(self): tmp_reward_buf = [] while self.epoch < self.epoch_max: logger.info('Start new game: {}'.format(self.epoch)) # collect data for _ in range(self.epoch_train): self.agent.startNewGame() self.epoch += 1 # step until game finishes while self.agent.step(): pass tmp_reward_buf.append(self.env.total_reward) if self.use_cmd: # cmd rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass # train model self.agent.train() self.train_time += 1 self.total_reward_buf.append(tmp_reward_buf) tmp_reward_buf = [] if not self.train_time % self.train_update_target: self.agent.updateTargetFunc() if not self.train_time % self.train_save: self.agent.save( self.epoch, 0, self.save_path )
class Train: def __init__( self, _agent: AgentAbstract, _epoch_max: int, _step_init: int, _step_train: int, _step_update_target: int, _step_save: int, _save_path: str = './save', _use_cmd: bool = True, ): """ one threading trainer :param _agent: agent object :param _epoch_max: how much games to play :param _step_init: how much steps to start train() :param _step_train: how much steps between train() :param _step_update_target: how much steps between updateTargetFunc() :param _step_save: how much steps between save() """ self.agent: AgentAbstract = _agent self.agent.training() # set to training mode self.epoch_max = _epoch_max self.step_init = _step_init self.step_train = _step_train self.step_update_target = _step_update_target self.step_save = _step_save self.epoch = 0 # the games has been taken self.step_local = 0 # the steps has been taken in current game self.step_total = 0 # the total steps has been taken self.save_path = _save_path self.use_cmd = _use_cmd if self.use_cmd: self.shell = TrainShell(self) def run(self): while self.epoch < self.epoch_max: logger.info('Start new game: {}'.format(self.epoch)) self.agent.startNewGame() self.epoch += 1 self.step_local = 0 # reset local steps in_game = True while in_game: in_game = self.agent.step() self.step_local += 1 self.step_total += 1 # init finished if self.step_total > self.step_init: if not self.step_total % self.step_train: self.agent.train() if not self.step_total % self.step_update_target: self.agent.updateTargetFunc() if not self.step_total % self.step_save: self.agent.save( self.epoch, self.step_local, self.save_path ) if self.use_cmd: # cmd rlist, _, _ = select([sys.stdin], [], [], 0.0) if rlist: sys.stdin.readline() self.shell.cmdloop() else: pass