Пример #1
0
    def __init__(
        self,
        _agent: AgentAbstract,
        _env: EnvAbstract,
        _epoch_max: int,
        _epoch_train: int,
        _train_update_target: int,
        _train_save: int,
        _process_core: int = None,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()
        self.env: EnvAbstract = _env

        # multiprocessing for sampling
        self.mp = mp.get_context('spawn')
        self.process_core = _process_core
        self.pool = self.mp.Pool(self.process_core)

        # training control
        self.epoch = 0
        self.train_times = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #2
0
    def __init__(
        self,
        _agent: AgentAbstract,
        _env: EnvAbstract,
        _epoch_max: int,
        _epoch_train: int,
        _epoch_update_target: int,
        _epoch_save: int,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.env = _env

        self.epoch = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.epoch_update_target = _epoch_update_target
        self.epoch_save = _epoch_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #3
0
class TrainEpoch:
    def __init__(
        self,
        _agent: AgentAbstract,
        _env: EnvAbstract,
        _epoch_max: int,
        _epoch_train: int,
        _epoch_update_target: int,
        _epoch_save: int,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.env = _env

        self.epoch = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.epoch_update_target = _epoch_update_target
        self.epoch_save = _epoch_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    def run(self):
        tmp_reward_buf = []
        while self.epoch < self.epoch_max:
            logger.info('Start new game: {}'.format(self.epoch))

            self.agent.startNewGame()
            self.epoch += 1

            # step until game finishes
            while self.agent.step():
                pass
            tmp_reward_buf.append(self.env.total_reward)

            if not self.epoch % self.epoch_train:
                self.agent.train()
                self.total_reward_buf.append(tmp_reward_buf)
                tmp_reward_buf = []
            if not self.epoch % self.epoch_update_target:
                self.agent.updateTargetFunc()
            if not self.epoch % self.epoch_save:
                self.agent.save(self.epoch, 0, self.save_path)

            if self.use_cmd:  # cmd
                rlist, _, _ = select([sys.stdin], [], [], 0.0)
                if rlist:
                    sys.stdin.readline()
                    self.shell.cmdloop()
                else:
                    pass
Пример #4
0
    def __init__(
            self,
            _agent: AgentAbstract,
            _env: EnvAbstract,
            _epoch_max: int,
            _epoch_train: int,
            _train_update_target: int,
            _train_save: int,
            _process_core: int = None,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()
        self.env: EnvAbstract = _env

        # multiprocessing for sampling
        self.mp = mp.get_context('spawn')
        self.process_core = _process_core
        self.pool = self.mp.Pool(self.process_core)

        # training control
        self.epoch = 0
        self.train_times = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #5
0
    def __init__(
            self,
            _agent: AgentAbstract,
            _env: EnvAbstract,
            _epoch_max: int,
            _epoch_train: int,
            _train_update_target: int,
            _train_save: int,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.env = _env

        self.epoch = 0
        self.train_time = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #6
0
    def __init__(
        self,
        _agent: AgentAbstract,
        _epoch_max: int,
        _step_init: int,
        _step_train: int,
        _step_update_target: int,
        _step_save: int,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        """
        one threading trainer

        :param _agent: agent object
        :param _epoch_max: how much games to play
        :param _step_init: how much steps to start train()
        :param _step_train: how much steps between train()
        :param _step_update_target: how much steps between updateTargetFunc()
        :param _step_save: how much steps between save()
        """
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.epoch_max = _epoch_max

        self.step_init = _step_init
        self.step_train = _step_train
        self.step_update_target = _step_update_target
        self.step_save = _step_save

        self.epoch = 0  # the games has been taken
        self.step_local = 0  # the steps has been taken in current game
        self.step_total = 0  # the total steps has been taken

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #7
0
    def __init__(
            self, _agent: AgentAbstract,
            _epoch_max: int,
            _step_init: int,
            _step_train: int,
            _step_update_target: int,
            _step_save: int,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        """
        one threading trainer

        :param _agent: agent object
        :param _epoch_max: how much games to play
        :param _step_init: how much steps to start train()
        :param _step_train: how much steps between train()
        :param _step_update_target: how much steps between updateTargetFunc()
        :param _step_save: how much steps between save()
        """
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.epoch_max = _epoch_max

        self.step_init = _step_init
        self.step_train = _step_train
        self.step_update_target = _step_update_target
        self.step_save = _step_save

        self.epoch = 0  # the games has been taken
        self.step_local = 0  # the steps has been taken in current game
        self.step_total = 0  # the total steps has been taken

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)
Пример #8
0
class AsynTrainEpoch:
    """
    Asyn train for epoch algorithm like PPO

    _agent: the agent to be trained
    _env: the env object
    _epoch_max: the max epoch to play games
    _epoch_train: train model after how many games
    _train_update_target: update target after how many training
    _train_save: save model after how many training
    _process_core: how many cores to play games simultaneously
    _save_path: where to save models
    _use_cmd: whether to enable cmdtool while training
    """
    def __init__(
        self,
        _agent: AgentAbstract,
        _env: EnvAbstract,
        _epoch_max: int,
        _epoch_train: int,
        _train_update_target: int,
        _train_save: int,
        _process_core: int = None,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()
        self.env: EnvAbstract = _env

        # multiprocessing for sampling
        self.mp = mp.get_context('spawn')
        self.process_core = _process_core
        self.pool = self.mp.Pool(self.process_core)

        # training control
        self.epoch = 0
        self.train_times = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    @staticmethod
    def loop_env(_agent: AgentAbstract, _env: EnvAbstract, _epoch_num: int):
        logger.info('Start new game: {}'.format(_epoch_num))

        _agent.startNewGame()
        while _agent.step():
            pass

        return (_agent.getDataset(_agent.replay.pull()), _env.total_reward)

    @staticmethod
    def merge_dataset_reward(_ret_list):
        dataset_list = [d[0] for d in _ret_list]
        tuple_len = len(dataset_list[0])
        dataset = []
        for i in range(tuple_len):
            dataset.append(np.concatenate([tmp[i] for tmp in dataset_list]))
        return dataset, [d[1] for d in _ret_list]

    def run(self):
        while self.epoch < self.epoch_max:
            # multiprocessing to get dataset
            ret_list = self.pool.starmap(
                AsynTrainEpoch.loop_env,
                [(self.agent, self.env, tmp)
                 for tmp in range(self.epoch, self.epoch + self.epoch_train)])
            self.epoch += self.epoch_train

            # train model
            dataset, rewards = AsynTrainEpoch.merge_dataset_reward(ret_list)
            self.agent.train(dataset)
            self.total_reward_buf.append(rewards)

            self.train_times += 1
            if not self.train_times % self.train_update_target:
                self.agent.updateTargetFunc()
            if not self.train_times % self.train_save:
                self.agent.save(self.epoch, 0, self.save_path)

            if self.use_cmd:
                rlist, _, _ = select([sys.stdin], [], [], 0.0)
                if rlist:
                    sys.stdin.readline()
                    self.shell.cmdloop()
                else:
                    pass
Пример #9
0
class Train:
    def __init__(
        self,
        _agent: AgentAbstract,
        _epoch_max: int,
        _step_init: int,
        _step_train: int,
        _step_update_target: int,
        _step_save: int,
        _save_path: str = './save',
        _use_cmd: bool = True,
    ):
        """
        one threading trainer

        :param _agent: agent object
        :param _epoch_max: how much games to play
        :param _step_init: how much steps to start train()
        :param _step_train: how much steps between train()
        :param _step_update_target: how much steps between updateTargetFunc()
        :param _step_save: how much steps between save()
        """
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.epoch_max = _epoch_max

        self.step_init = _step_init
        self.step_train = _step_train
        self.step_update_target = _step_update_target
        self.step_save = _step_save

        self.epoch = 0  # the games has been taken
        self.step_local = 0  # the steps has been taken in current game
        self.step_total = 0  # the total steps has been taken

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    def run(self):
        while self.epoch < self.epoch_max:
            logger.info('Start new game: {}'.format(self.epoch))

            self.agent.startNewGame()
            self.epoch += 1
            self.step_local = 0  # reset local steps

            in_game = True
            while in_game:
                in_game = self.agent.step()
                self.step_local += 1
                self.step_total += 1

                # init finished
                if self.step_total > self.step_init:
                    if not self.step_total % self.step_train:
                        self.agent.train()
                    if not self.step_total % self.step_update_target:
                        self.agent.updateTargetFunc()
                    if not self.step_total % self.step_save:
                        self.agent.save(self.epoch, self.step_local,
                                        self.save_path)

                if self.use_cmd:  # cmd
                    rlist, _, _ = select([sys.stdin], [], [], 0.0)
                    if rlist:
                        sys.stdin.readline()
                        self.shell.cmdloop()
                    else:
                        pass
Пример #10
0
class AsynTrainEpoch:
    """
    Asyn train for epoch algorithm like PPO

    _agent: the agent to be trained
    _env: the env object
    _epoch_max: the max epoch to play games
    _epoch_train: train model after how many games
    _train_update_target: update target after how many training
    _train_save: save model after how many training
    _process_core: how many cores to play games simultaneously
    _save_path: where to save models
    _use_cmd: whether to enable cmdtool while training
    """

    def __init__(
            self,
            _agent: AgentAbstract,
            _env: EnvAbstract,
            _epoch_max: int,
            _epoch_train: int,
            _train_update_target: int,
            _train_save: int,
            _process_core: int = None,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()
        self.env: EnvAbstract = _env

        # multiprocessing for sampling
        self.mp = mp.get_context('spawn')
        self.process_core = _process_core
        self.pool = self.mp.Pool(self.process_core)

        # training control
        self.epoch = 0
        self.train_times = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    @staticmethod
    def loop_env(
            _agent: AgentAbstract, _env: EnvAbstract, _epoch_num: int
    ):
        logger.info('Start new game: {}'.format(_epoch_num))

        _agent.startNewGame()
        while _agent.step():
            pass

        return (
            _agent.getDataset(_agent.replay.pull()),
            _env.total_reward
        )

    @staticmethod
    def merge_dataset_reward(_ret_list):
        dataset_list = [d[0] for d in _ret_list]
        tuple_len = len(dataset_list[0])
        dataset = []
        for i in range(tuple_len):
            dataset.append(np.concatenate([
                tmp[i] for tmp in dataset_list
            ]))
        return dataset, [d[1] for d in _ret_list]

    def run(self):
        while self.epoch < self.epoch_max:
            # multiprocessing to get dataset
            ret_list = self.pool.starmap(
                AsynTrainEpoch.loop_env,
                [(self.agent, self.env, tmp) for tmp in range(
                    self.epoch, self.epoch + self.epoch_train
                )]
            )
            self.epoch += self.epoch_train

            # train model
            dataset, rewards = AsynTrainEpoch.merge_dataset_reward(ret_list)
            self.agent.train(dataset)
            self.total_reward_buf.append(rewards)

            self.train_times += 1
            if not self.train_times % self.train_update_target:
                self.agent.updateTargetFunc()
            if not self.train_times % self.train_save:
                self.agent.save(
                    self.epoch, 0, self.save_path
                )

            if self.use_cmd:
                rlist, _, _ = select([sys.stdin], [], [], 0.0)
                if rlist:
                    sys.stdin.readline()
                    self.shell.cmdloop()
                else:
                    pass
Пример #11
0
class TrainEpoch:
    def __init__(
            self,
            _agent: AgentAbstract,
            _env: EnvAbstract,
            _epoch_max: int,
            _epoch_train: int,
            _train_update_target: int,
            _train_save: int,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.env = _env

        self.epoch = 0
        self.train_time = 0
        self.epoch_max = _epoch_max
        self.epoch_train = _epoch_train
        self.train_update_target = _train_update_target
        self.train_save = _train_save

        self.total_reward_buf = []

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    def run(self):
        tmp_reward_buf = []
        while self.epoch < self.epoch_max:
            logger.info('Start new game: {}'.format(self.epoch))
            # collect data
            for _ in range(self.epoch_train):
                self.agent.startNewGame()
                self.epoch += 1

                # step until game finishes
                while self.agent.step():
                    pass
                tmp_reward_buf.append(self.env.total_reward)

                if self.use_cmd:  # cmd
                    rlist, _, _ = select([sys.stdin], [], [], 0.0)
                    if rlist:
                        sys.stdin.readline()
                        self.shell.cmdloop()
                    else:
                        pass

            # train model
            self.agent.train()
            self.train_time += 1
            self.total_reward_buf.append(tmp_reward_buf)
            tmp_reward_buf = []

            if not self.train_time % self.train_update_target:
                self.agent.updateTargetFunc()
            if not self.train_time % self.train_save:
                self.agent.save(
                    self.epoch, 0, self.save_path
                )
Пример #12
0
class Train:
    def __init__(
            self, _agent: AgentAbstract,
            _epoch_max: int,
            _step_init: int,
            _step_train: int,
            _step_update_target: int,
            _step_save: int,
            _save_path: str = './save',
            _use_cmd: bool = True,
    ):
        """
        one threading trainer

        :param _agent: agent object
        :param _epoch_max: how much games to play
        :param _step_init: how much steps to start train()
        :param _step_train: how much steps between train()
        :param _step_update_target: how much steps between updateTargetFunc()
        :param _step_save: how much steps between save()
        """
        self.agent: AgentAbstract = _agent
        self.agent.training()  # set to training mode

        self.epoch_max = _epoch_max

        self.step_init = _step_init
        self.step_train = _step_train
        self.step_update_target = _step_update_target
        self.step_save = _step_save

        self.epoch = 0  # the games has been taken
        self.step_local = 0  # the steps has been taken in current game
        self.step_total = 0  # the total steps has been taken

        self.save_path = _save_path
        self.use_cmd = _use_cmd
        if self.use_cmd:
            self.shell = TrainShell(self)

    def run(self):
        while self.epoch < self.epoch_max:
            logger.info('Start new game: {}'.format(self.epoch))

            self.agent.startNewGame()
            self.epoch += 1
            self.step_local = 0  # reset local steps

            in_game = True
            while in_game:
                in_game = self.agent.step()
                self.step_local += 1
                self.step_total += 1

                # init finished
                if self.step_total > self.step_init:
                    if not self.step_total % self.step_train:
                        self.agent.train()
                    if not self.step_total % self.step_update_target:
                        self.agent.updateTargetFunc()
                    if not self.step_total % self.step_save:
                        self.agent.save(
                            self.epoch, self.step_local, self.save_path
                        )

                if self.use_cmd:  # cmd
                    rlist, _, _ = select([sys.stdin], [], [], 0.0)
                    if rlist:
                        sys.stdin.readline()
                        self.shell.cmdloop()
                    else:
                        pass