def __init__(self, size): self.size = size self.memory = deque(maxlen=self.size) def update(self, SARS): self.memory.append(SARS) def sample(self, batch_size): return zip(*random.sample(self.memory, batch_size)) r_memory = ReplayMemory(memory_size) agent = DQN(12, 12, 16) target = DQN(12, 12, 16) target.load_state_dict(agent.state_dict()) optimizer = Adam(agent.parameters()) def update_target(): if len(r_memory.memory) < batch_size: return observation, action, reward, observation_next, done = r_memory.sample( batch_size) observations = torch.cat(observation) observation_next = torch.cat(observation_next) actions = index_action(torch.LongTensor(action)) rewards = torch.LongTensor(reward) done = torch.FloatTensor(done) q_values = agent(observations) p_q_values_next = agent(observation_next) q_values_next = target(observation_next)
loss.backward() optimizer.step() return loss.data[0] def save_checkpoint(state, filename): torch.save(state, filename) #def load_model(): #torch.load( if __name__ == '__main__': model = DQN() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss() # Load data data = np.load('training_data.npy') X_train = np.stack(data[:, 0], axis=0).reshape( (len(data), 1, len(data[0][0]), len(data[0][0][0]))) Y_train = data[:, 3] # Training loop for epoch in range(100): # Randomize and batch training data batchsize = 8 # Randomly shuffle each epoch np.random.shuffle(X_train) np.random.shuffle(Y_train)