Пример #1
0
    def __init__(self, env, agent_params):
        super(ACAgent, self).__init__()

        self.env = env
        self.agent_params = agent_params
        self.num_critic_updates_per_agent_update = agent_params[
            'num_critic_updates_per_agent_update']
        self.num_actor_updates_per_agent_update = agent_params[
            'num_actor_updates_per_agent_update']
        self.device = agent_params['device']

        self.gamma = self.agent_params['gamma']
        self.standardize_advantages = self.agent_params[
            'standardize_advantages']

        self.actor = MLPPolicyAC(
            self.agent_params['ac_dim'],
            self.agent_params['ob_dim'],
            self.agent_params['n_layers'],
            self.agent_params['size'],
            self.agent_params['device'],
            discrete=self.agent_params['discrete'],
            learning_rate=self.agent_params['learning_rate'],
        )
        # introduced in actor-critic to improve advantage function.
        self.critic = BootstrappedContinuousCritic(self.agent_params)

        self.replay_buffer = ReplayBuffer()
    def __init__(self, env, agent_params):
        super().__init__()

        self.env = env
        self.agent_params = agent_params

        self.gamma = self.agent_params['gamma']
        self.standardize_advantages = self.agent_params['standardize_advantages']
        self.n_drivers = self.agent_params['n_drivers']

        self.actor = MLPPolicyAC(
            self.agent_params['ac_dim'],
            self.agent_params['ob_dim'],
            self.agent_params['n_layers'],
            self.agent_params['size_ac'],
            self.agent_params['shared_exp'],
            self.agent_params['shared_exp_lambda'],
            self.agent_params['is_city'],
            self.agent_params['learning_rate'],
            self.agent_params['n_drivers']
        )

        self.critic = BootstrappedContinuousCritic(self.agent_params)

        self.replay_buffer = ReplayBuffer()
    def __init__(self, env, agent_params, **kwargs):
        super(ACAgent, self).__init__()

        self.env = env
        self.agent_params = agent_params
        self.gamma = self.agent_params['gamma']
        self.standardize_advantages = self.agent_params['standardize_advantages']

        # actor/policy
        if self.agent_params['discrete']:
            self.actor = DiscreteMLPPolicy(self.agent_params['ac_dim'],
                                           self.agent_params['ob_dim'],
                                           self.agent_params['n_layers'],
                                           self.agent_params['size'])
        else:
            self.actor = ContinuousMLPPolicy(self.agent_params['ac_dim'],
                                             self.agent_params['ob_dim'],
                                             self.agent_params['n_layers'],
                                             self.agent_params['size'])
        self.policy_optimizer = tf.keras.optimizers.Adam(learning_rate=self.agent_params['learning_rate'])

        self.critic = BootstrappedContinuousCritic(self.agent_params)
        self.critic_loss = tf.keras.losses.MeanSquaredError()
        self.critic_optimizer = tf.keras.optimizers.Adam(learning_rate=self.agent_params['learning_rate'])
        self.critic.nn_critic.compile(optimizer=self.critic_optimizer, loss=self.critic_loss)

        self.replay_buffer = ReplayBuffer()
Пример #4
0
    def __init__(self, env, agent_params):
        super(ACAgent, self).__init__()

        self.env = env
        self.agent_params = agent_params

        self.gamma = self.agent_params['gamma']
        self.standardize_advantages = self.agent_params['standardize_advantages']

        self.actor = MLPPolicyAC(
            self.agent_params['ac_dim'],
            self.agent_params['ob_dim'],
            self.agent_params['n_layers'],
            self.agent_params['size'],
            self.agent_params['discrete'],
            self.agent_params['learning_rate'],
        )
        self.critic = BootstrappedContinuousCritic(self.agent_params)

        self.replay_buffer = ReplayBuffer()
Пример #5
0
    def __init__(self, env, agent_params):
        super(ACAgent, self).__init__()

        self.env = env
        self.agent_params = agent_params

        self.gamma = self.agent_params["gamma"]
        self.standardize_advantages = self.agent_params["standardize_advantages"]

        self.actor = MLPPolicyAC(
            self.agent_params["ac_dim"],
            self.agent_params["ob_dim"],
            self.agent_params["n_layers"],
            self.agent_params["size"],
            self.agent_params["discrete"],
            self.agent_params["learning_rate"],
        )
        self.critic = BootstrappedContinuousCritic(self.agent_params)

        self.replay_buffer = ReplayBuffer()
Пример #6
0
    def __init__(self, env, agent_params):
        super(ACAgent, self).__init__()

        self.env = env
        self.agent_params = agent_params

        self.gamma = self.agent_params['gamma']
        self.standardize_advantages = self.agent_params[
            'standardize_advantages']
        self.gae = self.agent_params['gae']
        self.gae_lambda = self.agent_params['gae_lambda']
        self.ppo = self.agent_params['ppo']

        self.actor = MLPPolicyAC(
            self.agent_params['ac_dim'],
            self.agent_params['ob_dim'],
            self.agent_params['n_layers'],
            self.agent_params['size'],
            self.agent_params['discrete'],
            self.agent_params['learning_rate'],
            self.agent_params['clip_eps'],
        )

        if self.ppo:
            self.old_actor = MLPPolicyAC(
                self.agent_params['ac_dim'],
                self.agent_params['ob_dim'],
                self.agent_params['n_layers'],
                self.agent_params['size'],
                self.agent_params['discrete'],
                self.agent_params['learning_rate'],
                self.agent_params['clip_eps'],
            )
            self.old_actor.load_state_dict(self.actor.state_dict())

        self.critic = BootstrappedContinuousCritic(self.agent_params)

        self.replay_buffer = ReplayBuffer()