Пример #1
0
    def __init__(self, size):
        self.size = size
        self.memory = deque(maxlen=self.size)

    def update(self, SARS):
        self.memory.append(SARS)

    def sample(self, batch_size):
        return zip(*random.sample(self.memory, batch_size))


r_memory = ReplayMemory(memory_size)
agent = DQN(12, 12, 16)
target = DQN(12, 12, 16)
target.load_state_dict(agent.state_dict())
optimizer = Adam(agent.parameters())


def update_target():
    if len(r_memory.memory) < batch_size:
        return
    observation, action, reward, observation_next, done = r_memory.sample(
        batch_size)
    observations = torch.cat(observation)
    observation_next = torch.cat(observation_next)
    actions = index_action(torch.LongTensor(action))
    rewards = torch.LongTensor(reward)
    done = torch.FloatTensor(done)
    q_values = agent(observations)
    p_q_values_next = agent(observation_next)
    q_values_next = target(observation_next)
Пример #2
0
    loss.backward()
    optimizer.step()

    return loss.data[0]


def save_checkpoint(state, filename):
    torch.save(state, filename)


#def load_model():
#torch.load(

if __name__ == '__main__':
    model = DQN()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    # Load data
    data = np.load('training_data.npy')
    X_train = np.stack(data[:, 0], axis=0).reshape(
        (len(data), 1, len(data[0][0]), len(data[0][0][0])))
    Y_train = data[:, 3]

    # Training loop
    for epoch in range(100):
        # Randomize and batch training data
        batchsize = 8
        # Randomly shuffle each epoch
        np.random.shuffle(X_train)
        np.random.shuffle(Y_train)