def train(self, batches=5000): losses = np.zeros(batches) batch_size = 100 loss = nn.MSELoss() optimiser = optim.SGD(self.actor.parameters(), lr=0.001) for i in range(batches): x, u = self.buffer.sample(batch_size) state = torch.FloatTensor(x) action = torch.FloatTensor(u) optimiser.zero_grad() outputs = self.actor(state) actor_loss = loss(outputs[:, 0], action) actor_loss.backward() optimiser.step() losses[i] = actor_loss if i % 500 == 0: print(f"Batch: {i}: Loss: {actor_loss}") lib.plot(losses, 100) self.save() return losses
def print_update(self, plot_reward=True): if self.ptr < 5: return mean = np.mean(self.rewards[0:self.ptr]) score = self.rewards[-1] print(f"Run: {self.t_counter} --> Score: {score:.2f} --> Mean: {mean:.2f} ") if plot_reward: lib.plot(self.rewards[0:self.ptr], figure_n=2)