Пример #1
0
 def __init__(self, config=default()):
     super().__init__(config)
     self.experience = namedtuple(
         "Experience",
         field_names=["state", "action", "reward", "next_state", "done"])
     logger.info(describe(self))
     self.clear()
Пример #2
0
    def __init__(self,
                 config,
                 input_state_size,
                 output_action_size,
                 fc1_units=400,
                 fc2_units=300):
        super(DDPG_Networks, self).__init__(config, input_state_size,
                                            output_action_size)

        self.actor_local = LowDimActor(config, self.input_state_size,
                                       self.output_action_size, fc1_units,
                                       fc2_units)
        self.actor_target = LowDimActor(config, self.input_state_size,
                                        self.output_action_size, fc1_units,
                                        fc2_units)

        self.critic_local = LowDimCritic(config, self.input_state_size,
                                         self.output_action_size, fc1_units,
                                         fc2_units)
        self.critic_target = LowDimCritic(config, self.input_state_size,
                                          self.output_action_size, fc1_units,
                                          fc2_units)

        logger.warn('DDPG_Networks is initialized!')
        logger.info(utils.describe(self))
Пример #3
0
        def _test(self, interim_test: bool = False):
            """Common test routine."""

            if interim_test:
                test_num = self._configs.glob.interim_test_nums
            else:
                test_num = self._configs.glob.num_episodes

            for i_episode in range(test_num):
                state = self.env.reset()
                done = False
                score = 0
                step = 0

                while not done:
                    self.env.render()

                    action = self.select_action(state)
                    next_state, reward, done = self.step(action)

                    state = next_state
                    score += reward
                    step += 1

                logger.info(
                    "test %d\tstep: %d\ttotal score: %d" % (i_episode, step, score)
                )

                logger.log_scalar("Test score", score)
Пример #4
0
    def __init__(self, config):
        super(GymEnv, self).__init__(config)

        self._env = gym.make(self.name)
        self._set_attr_from_u_env(self._env)
        self._env.seed(self.seed)

        logger.info(utils.describe(self))
Пример #5
0
        def save_params(self, params: dict, n_episode: int):
            if not os.path.exists("./saved"):
                os.mkdir("./saved")

            save_name = self.env_name + "_" + self.hparams.name

            path = os.path.join("./saved/" + save_name + "_ep_" + str(n_episode) + ".pt")
            torch.save(params, path)

            logger.info("Saved the model and optimizer to", path)
Пример #6
0
    def load_params(self, path: str):
        """Load model and optimizer parameters."""
        if not os.path.exists(path):
            logger.fatal("the input path does not exist. ->", path)
            return

        params = torch.load(path)
        self.actor.load_state_dict(params["actor_state_dict"])
        self.actor_target.load_state_dict(params["actor_target_state_dict"])
        self.critic.load_state_dict(params["critic_state_dict"])
        self.critic_target.load_state_dict(params["critic_target_state_dict"])
        self.actor_optimizer.load_state_dict(params["actor_optim_state_dict"])
        self.critic_optimizer.load_state_dict(params["critic_optim_state_dict"])
        logger.info("loaded the model and optimizer from", path)
Пример #7
0
    def __init__(self, env: BaseEnv, models: tuple, optims: tuple, noise: OUNoise, configs: Configs = default()):

        super(DDPGAgent, self).__init__(env, configs)

        self.actor, self.actor_target, self.critic, self.critic_target = models
        self.actor_optimizer, self.critic_optimizer = optims
        self.curr_state = np.zeros((1,))
        self.noise = noise
        self.total_step = 0
        self.episode_step = 0
        self.i_episode = 0

        if configs.glob.load_from is not None and os.path.exists(configs.glob.load_from):
            self.load_params(configs.glob.load_from)

        self._initialize()

        logger.info(describe(self))
Пример #8
0
    def write_log(self, i: int, loss: np.ndarray, score: int, avg_score):
        """Write log about loss and score"""
        total_loss = loss.sum()

        logger.info(
            "episode %d:\t episode step: %d | total step: %d | total score: %d |\t"
            "total loss: %f | actor_loss: %.3f | critic_loss: %.3f\n"
            % (
                i,
                self.episode_step,
                self.total_step,
                score,
                total_loss,
                loss[0],
                loss[1],
            )  # actor loss  # critic loss
        )

        if self._configs.glob.log:
            logger.log_scalar("scores/score", score, i)
            logger.log_scalar("scores/avg_score", avg_score, i)
            logger.log_scalar("losses/total_loss", total_loss, i)
            logger.log_scalar("losses/actor_loss", loss[0], i)
            logger.log_scalar("losses/critic_loss", loss[1], i)
Пример #9
0
from kayddrl.configs.default import default
from kayddrl.demo.ddpg_test import run
from kayddrl.envs import make_env
from kayddrl.utils.logging import logger
from kayddrl.utils.utils import set_global_seeds

if __name__ == '__main__':
    cfg = default()
    set_global_seeds(cfg.glob.seed)
    env = make_env(cfg.env)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    logger.info(env.action_space.low, env.action_space.high,
                env.observation_space.shape[0])
    run(env, cfg, state_dim, action_dim)