def __init__(self, seed, cfg: Configuration, num_agents=1):
        super(DdpgAgent, self).__init__(cfg)
        """Initialize an Agent object.

        Params
        ======
            seed (int): random seed
            cfg (Config): configration
            num_agents(int): number of agents
        """

        self.num_agents = cfg.get_current_exp_cfg().environment_cfg.num_agents

        self.state_size = cfg.get_current_exp_cfg().agent_cfg.state_size
        self.action_size = cfg.get_current_exp_cfg().agent_cfg.action_size

        # Actor Network (w/ Target Network)
        # Critic Network (w/ Target Network)
        self.actor_current_model, self.actor_target_model, self.critic_current_model, self.critic_target_model = ModelFactory.create(
            seed, device, cfg)

        self.actor_optimizer = optim.Adam(
            self.actor_current_model.parameters(),
            lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_actor)

        self.critic_optimizer = optim.Adam(
            self.critic_current_model.parameters(),
            lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_critic,
            weight_decay=self.reinforcement_learning_cfg.ddpg_cfg.weight_decay)

        # Noise process
        self.noise = OUNoiseStandardNormal(self.action_size, seed)

        # Replay Memory
        if self.replay_memory_cfg.prioritized_replay:
            self.memory = PrioritizedReplayBuffer(
                int(self.replay_memory_cfg.buffer_size),
                alpha=self.replay_memory_cfg.prioritized_replay_alpha)
            if self.prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = self.trainer_cfg.max_steps
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=self.replay_memory_cfg.prioritized_replay_beta0,
                final_p=1.0)
        else:
            self.memory = ReplayBuffer(self.replay_memory_cfg.buffer_size)
            self.beta_schedule = None

        # Initialize time step counter (for prioritized memory replay)
        self.step_counter = 0
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.step_update_counter = 0
    def __init__(self, cfg: Configuration):

        # training parameters
        self.trainer_cfg = cfg.get_current_exp_cfg().trainer_cfg

        # agent parameters
        self.agent_cfg = cfg.get_current_exp_cfg().agent_cfg

        # replay memory parameters
        self.replay_memory_cfg = cfg.get_current_exp_cfg().replay_memory_cfg
        self.prioritized_replay_beta_iters = None

        # reinforcement learning parameters
        self.reinforcement_learning_cfg = cfg.get_current_exp_cfg(
        ).reinforcement_learning_cfg

        self.frames_queue = deque(maxlen=self.agent_cfg.num_frames)
    def create(config: Configuration) -> IEnvironment:

        env_name = config.get_current_exp_cfg().gym_id

        env_type = config.get_current_exp_cfg().environment_cfg.env_type

        if env_type == 'gym':
            env = GymStandardEnv(name=env_name)
        elif env_type == 'atari_gym':
            env = GymAtariEnv(name=env_name)
        elif env_type == 'spaceinvaders_atari_gym':
            env = GymAtariSpaceInvadersEnv(name=env_name)
        elif env_type == 'unity':
            env = UnityEnv(name=env_name)
        elif env_type == 'unity-multiple':
            env = UnityMultipleEnv(name=env_name)
        else:
            raise Exception(
                "Environment '{}' type not supported".format(env_type))

        return env
Пример #4
0
    def __init__(self, cfg: Configuration, session_id, path_models='models'):

        super(MasterTrainer, self).__init__(cfg, session_id)

        reinforcement_learning_cfg = self.cfg.get_current_exp_cfg().reinforcement_learning_cfg

        if cfg.get_current_exp_cfg().reinforcement_learning_cfg.algorithm_type.startswith('ddpg'):
            self.eps = reinforcement_learning_cfg.ddpg_cfg.epsilon_start   # initialize epsilon
            self.eps_end = reinforcement_learning_cfg.ddpg_cfg.epsilon_end
            self.eps_decay = reinforcement_learning_cfg.ddpg_cfg.epsilon_decay
        else:
            self.eps = reinforcement_learning_cfg.dqn_cfg.epsilon_start   # initialize epsilon
            self.eps_end = reinforcement_learning_cfg.dqn_cfg.epsilon_end
            self.eps_decay = reinforcement_learning_cfg.dqn_cfg.epsilon_decay
    def create(cfg: Configuration, session_id) -> ITrainer:

        algorithm_type = cfg.get_current_exp_cfg(
        ).reinforcement_learning_cfg.algorithm_type

        if algorithm_type.startswith('dqn'):
            trainer = MasterTrainer(cfg=cfg, session_id=session_id)
        elif algorithm_type.startswith('ddpg'):
            trainer = MasterTrainer(cfg=cfg, session_id=session_id)
        else:
            raise Exception(
                "Trainer for algorighm '{}' type not supported".format(
                    algorithm_type))

        return trainer
Пример #6
0
    def create(config: Configuration, seed=0) -> IAgent:

        algorithm_type = config.get_current_exp_cfg(
        ).reinforcement_learning_cfg.algorithm_type

        if algorithm_type.startswith('dqn'):
            agent = DqnAgent(seed=seed, cfg=config)
        elif algorithm_type.startswith('ddpg'):
            agent = DdpgAgent(seed=seed, cfg=config)
        else:
            raise Exception(
                "Agent for algorighm '{}' type not supported".format(
                    algorithm_type))

        return agent
    def test_record_recordsParameters_multipleSavesOverwrites(self):

        config = Configuration(test_flag=True)
        session_id = Experiment(config).get_session_id()
        experiments_path = config.get_app_experiments_path(train_mode=False)
        model = 'model123'

        header = ['episode', 'step', 'action', 'reward']

        recorder = Recorder(header=header,
                            experiments_path=experiments_path,
                            session_id=session_id,
                            model=model,
                            configuration=config.get_current_exp_cfg())

        header_result = recorder.get_header()

        assert all([a == b for a, b in zip(header, header_result)])

        parameters1 = [1, 1, 0, 0]
        parameters2 = [1, 2, 1, 0]
        parameters3 = [1, 3, 0, 10]

        # episode 1
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]

        # episode 2
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]
        assert config['session_id'] == session_id
        assert config['model'] == model