Пример #1
0
    def _init_alg(self):
        """
        Initialize the algorithm based on what algorithm is specified.
        """
        # init storage for actor and critic models
        self.actors = []
        self.actor_targets = []
        self.critics = []
        self.critic_targets = []

        # create all models separately for each agent instance
        for _ in range(self.num_instances):
            actor = ActorNetwork(self.state_size, self.action_size)
            target_actor = ActorNetwork(self.state_size, self.action_size)
            target_actor = utils.copy_weights(actor)

            critic = CriticNetwork(self.state_size, self.action_size)
            target_critic = CriticNetwork(self.state_size, self.action_size)
            target_critic = utils.copy_weights(critic)

            self.actors.append(actor)
            self.actor_targets.append(target_actor)

            self.critics.append(critic)
            self.critic_targets.append(target_critic)

        # initialize the replay buffer
        self.memory = ReplayBuffer(self.buffer_size,
                                   self.batch_size,
                                   seed=self.seed)
Пример #2
0
    def _init_alg(self):
        """
        Initialize the algorithm based on what algorithm is specified.
        """
        # initialize the actor and critics separately
        self.actor = ActorNetwork(self.state_size, self.action_size,
                                  self.actor_inter_dims,
                                  use_batch_norm=self.use_batch_norm
                                  ).to(self.device)
        self.actor_target = ActorNetwork(self.state_size, self.action_size,
                                         self.actor_inter_dims,
                                         use_batch_norm=self.use_batch_norm
                                         ).to(self.device)
        self.actor_target = utils.copy_weights(self.actor, self.actor_target)

        self.critic = CriticNetwork(self.state_size, self.action_size,
                                    self.critic_inter_dims
                                    ).to(self.device)
        self.critic_target = CriticNetwork(self.state_size, self.action_size,
                                           self.critic_inter_dims
                                           ).to(self.device)
        self.critic_target = utils.copy_weights(self.critic, self.critic_target)

        # initializer optimizers
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                           lr=self.actor_alpha)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                            lr=self.critic_alpha,
                                            eps=1e-4)

        # initialize the replay buffer
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size,
                                   seed=self.seed)
Пример #3
0
    def step(self):
        """
        Update state of the agent and take a step through the learning process
        to reflect experiences have been acquired and/or learned from.
        """
        # update actor target network
        self.actor_target = utils.copy_weights(self.actor, self.actor_target,
                                               self.tau)

        # update critic target network
        self.critic_target = utils.copy_weights(self.critic,
                                                self.critic_target, self.tau)
Пример #4
0
    def step(self):
        """
        Update state of the agent and take a step through the learning process
        to reflect experiences have been acquired and/or learned from.
        """
        # update actor target network
        self.prev_policy = utils.copy_weights(self.policy, self.prev_policy)

        # decay epsilon for random noise
        self.epsilon = np.max([self.epsilon * self.epsilon_decay,
                               self.epsilon_min])

        # update action variance to choose more selectively
        self.action_variances = self.set_action_variances(self.variance_decay)

        self.t = -1