def _init_alg(self): """ Initialize the algorithm based on what algorithm is specified. """ # init storage for actor and critic models self.actors = [] self.actor_targets = [] self.critics = [] self.critic_targets = [] # create all models separately for each agent instance for _ in range(self.num_instances): actor = ActorNetwork(self.state_size, self.action_size) target_actor = ActorNetwork(self.state_size, self.action_size) target_actor = utils.copy_weights(actor) critic = CriticNetwork(self.state_size, self.action_size) target_critic = CriticNetwork(self.state_size, self.action_size) target_critic = utils.copy_weights(critic) self.actors.append(actor) self.actor_targets.append(target_actor) self.critics.append(critic) self.critic_targets.append(target_critic) # initialize the replay buffer self.memory = ReplayBuffer(self.buffer_size, self.batch_size, seed=self.seed)
def _init_alg(self): """ Initialize the algorithm based on what algorithm is specified. """ # initialize the actor and critics separately self.actor = ActorNetwork(self.state_size, self.action_size, self.actor_inter_dims, use_batch_norm=self.use_batch_norm ).to(self.device) self.actor_target = ActorNetwork(self.state_size, self.action_size, self.actor_inter_dims, use_batch_norm=self.use_batch_norm ).to(self.device) self.actor_target = utils.copy_weights(self.actor, self.actor_target) self.critic = CriticNetwork(self.state_size, self.action_size, self.critic_inter_dims ).to(self.device) self.critic_target = CriticNetwork(self.state_size, self.action_size, self.critic_inter_dims ).to(self.device) self.critic_target = utils.copy_weights(self.critic, self.critic_target) # initializer optimizers self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.actor_alpha) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=self.critic_alpha, eps=1e-4) # initialize the replay buffer self.memory = ReplayBuffer(self.buffer_size, self.batch_size, seed=self.seed)
def step(self): """ Update state of the agent and take a step through the learning process to reflect experiences have been acquired and/or learned from. """ # update actor target network self.actor_target = utils.copy_weights(self.actor, self.actor_target, self.tau) # update critic target network self.critic_target = utils.copy_weights(self.critic, self.critic_target, self.tau)
def step(self): """ Update state of the agent and take a step through the learning process to reflect experiences have been acquired and/or learned from. """ # update actor target network self.prev_policy = utils.copy_weights(self.policy, self.prev_policy) # decay epsilon for random noise self.epsilon = np.max([self.epsilon * self.epsilon_decay, self.epsilon_min]) # update action variance to choose more selectively self.action_variances = self.set_action_variances(self.variance_decay) self.t = -1