def _init_agents(self):
     # parameter sharing
     self.embedding = Embedding_Layer(self.input_dim,
                                      self.hidden_dim).to(self.device)
     self.rnn = RNN_Model(self.hidden_dim, self.num_agents).to(self.device)
     self.attention = Attention_Model(self.hidden_dim).to(self.device)
     self.embedding_target = Embedding_Layer(
         self.input_dim, self.hidden_dim).to(self.device)
     self.rnn_target = RNN_Model(self.hidden_dim,
                                 self.num_agents).to(self.device)
     self.attention_target = Attention_Model(self.hidden_dim).to(
         self.device)
     Dueling_DDQN_Learner.copy_network(self.embedding,
                                       self.embedding_target)
     Dueling_DDQN_Learner.copy_network(self.rnn, self.rnn_target)
     Dueling_DDQN_Learner.copy_network(self.attention,
                                       self.attention_target)
     self.share_para = chain(self.embedding.parameters(),
                             self.attention.parameters(),
                             self.rnn.parameters())
     self.all_para = chain(self.embedding.parameters(),
                           self.attention.parameters(),
                           self.rnn.parameters())
     # init the optimizer
     for i in range(self.num_agents):
         self.agents.append(Dueling_DDQN_Learner(self.config))
         self.all_para = chain(self.all_para,
                               self.agents[i].get_q_network().parameters())
     # self.all_para = chain(self.all_para)
     self.share_optimizer = optim.RMSprop(self.all_para,
                                          lr=self.lr,
                                          weight_decay=1e-4)
    def _init_agents(self):
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.attention = Attention_Model(self.hidden_dim).to(self.device)
        self.temporal_attention = Attention_Model(self.hidden_dim).to(
            self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        self.attention_target = Attention_Model(self.hidden_dim).to(
            self.device)
        self.temporal_attention_target = Attention_Model(self.hidden_dim).to(
            self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)
        Dueling_DDQN_Learner.copy_network(self.attention,
                                          self.attention_target)
        Dueling_DDQN_Learner.copy_network(self.temporal_attention,
                                          self.temporal_attention_target)
        for i in range(self.num_agents):
            q_network = Double_Attention_Model(self.input_dim, self.output_dim,
                                               self.hidden_dim).to(self.device)
            q_network_target = Double_Attention_Model(
                self.input_dim, self.output_dim,
                self.hidden_dim).to(self.device)
            q_network.set_layer_para(self.embedding, self.attention,
                                     self.temporal_attention)
            q_network_target.set_layer_para(self.embedding_target,
                                            self.attention_target,
                                            self.temporal_attention_target)
            self.agents[i].set_q_network(q_network, q_network_target)


# def change_mode(self):
# self.q_network.change_mode()
# self.q_network_target.change_mode()
# for i in range(self.num_agents):
#     self.agents[i].q_network_current.change_mode()
#     self.agents[i].q_network_target.change_mode()
# self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim[0])
# self.attention = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2])
# self.embedding_target = Embedding_Layer(self.input_dim, self.hidden_dim[0])
# self.attention_target = Attention_Layer(self.hidden_dim[0], self.hidden_dim[1], self.hidden_dim[2])
# Dueling_DDQN_Learner.copy_network(self.embedding, self.embedding_target)
# Dueling_DDQN_Learner.copy_network(self.attention, self.attention_target)
# def get_action(self, i, obs):
#     return self.agents[i].step(obs)
# def store_experience(self, i, obs, action, reward, next_obs, is_done):
#     self.agents[i].store_experience(obs, action, reward, next_obs, is_done)
    def __init__(self, input_dim, output_dim, hidden_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.pre_train = False
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        # if embedding_layer:
        #     self.embedding = embedding_layer
        #     self.attention = attention_layer
        #     self.linear1 = nn.Linear(self.input_dim, self.output_dim)
        # else:
        self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim)
        self.attention = Attention_Model(self.hidden_dim)
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(self.hidden_dim, self.output_dim)

        self.attention_score = None
示例#4
0
 def __init__(self, input_dim, output_dim, hidden_dim):
     super().__init__()
     self.input_dim = input_dim
     self.output_dim = output_dim
     self.hidden_dim = hidden_dim
     self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim)
     self.rnn = RNN_Model(self.hidden_dim, self.hidden_dim)
     self.attention = Attention_Model(self.hidden_dim)
     self.relu = nn.ReLU()
     self.linear_out = nn.Linear(self.hidden_dim, self.output_dim)
     self.hidden, self.cell = None, None
class LSTM_Attention_Agents(Attention_Agents):
    def __init__(self, config, num_agents, input_dim, hidden_dim, output_dim,
                 neighbor_map, node_name):
        super().__init__(config, num_agents, input_dim, hidden_dim, output_dim,
                         neighbor_map, node_name)
        self._init_agents()
        self.hidden = np.zeros((self.num_agents, self.hidden_dim))
        self.hidden_target = np.zeros((self.num_agents, self.hidden_dim))
        self.cell = np.zeros((self.num_agents, self.hidden_dim))
        self.cell_target = np.zeros((self.num_agents, self.hidden_dim))

    def _init_agents(self):
        # parameter sharing
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.rnn = RNN_Model(self.hidden_dim, self.num_agents).to(self.device)
        self.attention = Attention_Model(self.hidden_dim).to(self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        self.rnn_target = RNN_Model(self.hidden_dim,
                                    self.num_agents).to(self.device)
        self.attention_target = Attention_Model(self.hidden_dim).to(
            self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)
        Dueling_DDQN_Learner.copy_network(self.rnn, self.rnn_target)
        Dueling_DDQN_Learner.copy_network(self.attention,
                                          self.attention_target)
        self.share_para = chain(self.embedding.parameters(),
                                self.attention.parameters(),
                                self.rnn.parameters())
        self.all_para = chain(self.embedding.parameters(),
                              self.attention.parameters(),
                              self.rnn.parameters())
        # init the optimizer
        for i in range(self.num_agents):
            self.agents.append(Dueling_DDQN_Learner(self.config))
            self.all_para = chain(self.all_para,
                                  self.agents[i].get_q_network().parameters())
        # self.all_para = chain(self.all_para)
        self.share_optimizer = optim.RMSprop(self.all_para,
                                             lr=self.lr,
                                             weight_decay=1e-4)

    def _get_embedding(self, state):
        state_embedding = self.embedding(state)
        batch_size = state.shape[0]
        if batch_size == 1:
            # get hidden state to store
            self.hidden, self.cell = self.rnn.get_hidden_state()
            self.hidden_target, self.cell_target = self.rnn_target.get_hidden_state(
            )
            state_hidden, _ = self.rnn(state_embedding)
        else:
            state_hidden, _ = self.rnn(state_embedding, self.hidden, self.cell)
        state_attention = self.attention(state_hidden, self.adj)
        return state_attention

    def _get_embedding_target(self, state):
        state_embedding_target = self.embedding_target(state)
        batch_size = state.shape[0]
        if batch_size == 1:
            state_hidden_target, _ = self.rnn_target(state_embedding_target)
        else:
            state_hidden_target, _ = self.rnn_target(state_embedding_target,
                                                     self.hidden_target,
                                                     self.cell_target)
        state_attention_target = self.attention_target(state_hidden_target,
                                                       self.adj)
        return state_attention_target

    def _update_sharing_target_network(self):
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.embedding, self.embedding_target, self.tau)
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.rnn, self.rnn_target, self.tau)
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.attention, self.attention_target, self.tau)

    def store_experience(self, states, actions, rewards, next_states,
                         is_dones):
        hidden = np.stack(
            (self.hidden, self.hidden_target, self.cell, self.cell_target),
            axis=1)
        self.buffer.store_experience(states, actions, rewards, next_states,
                                     is_dones, hidden)

    def sample_experience(self):
        states, actions, rewards, next_states, is_dones, hidden = self.buffer.sample_experience(
        )
        # get  hidden state
        self.hidden = hidden[:, 0]
        self.hidden_target = hidden[:, 1]
        self.cell = hidden[:, 2]
        self.cell_target = hidden[:, 3]
        return states, actions, rewards, next_states, is_dones

    def get_share_para(self):
        dic1 = dict(self.embedding.named_parameters())
        dic2 = dict(self.rnn.named_parameters())
        dic3 = dict(dic1, **dic2)
        dic4 = dict(self.attention.named_parameters())
        return dict(dic3, **dic4)
class Basic_Agents:
    def __init__(self, config, num_agents, input_dim, hidden_dim, output_dim):

        self.num_agents = num_agents
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.config = config

        # Replay Buffer相关参数
        self.batch_size = config['batch_size']
        self.buffer_size = config['buffer_size']
        self.buffer = Replay_Buffer(self.buffer_size, self.batch_size)

        self.lr = config['lr']
        self.tau = config['tau']
        self.agents = []

        self.update_step = config['update_step']
        self.curr_step = 0
        self._init_agents()

    def _init_agents(self):
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)

        self.share_para = self.embedding.parameters()
        self.all_para = self.embedding.parameters()
        # init the optimizer
        for i in range(self.num_agents):
            self.agents.append(Dueling_DDQN_Learner(self.config))
            self.all_para = chain(self.all_para,
                                  self.agents[i].get_q_network().parameters())
            # para = chain(self.embedding.parameters(), self.agents[i].get_q_network().parameters())
            # self.optimizer.append(optim.Adam(self.agents[i].get_q_network().parameters(), lr=1e-3))
        # self.all_para = chain(self.all_para)
        self.share_optimizer = optim.RMSprop(self.all_para,
                                             lr=self.lr,
                                             weight_decay=1e-4)

    def get_agent(self, i):
        return self.agents[i]

    def step(self, state, test=False):
        state_embedding = self._get_embedding(state)
        action = []
        for i in range(self.num_agents):
            action.append(self.agents[i].step(state_embedding[:, i], test))
        action = np.asarray(action)
        self.curr_step += 1
        return action

    def learn(self):
        # if self.curr_step > 0 and self.curr_step % self.update_step == 0:
        for i in range(self.update_step):
            states, actions, rewards, next_states, is_dones = self.sample_experience(
            )
            actions = torch.from_numpy(actions).long().to(self.device)
            rewards = torch.from_numpy(rewards).float().to(self.device)
            is_dones = torch.from_numpy(is_dones).float().to(self.device)
            states_embedding = self._get_embedding(states)
            next_states_embedding = self._get_embedding(next_states)
            next_states_embedding_target = self._get_embedding_target(
                next_states)
            total_loss = 0
            for i in range(self.num_agents):
                actions_values_current = self.agents[
                    i].cal_current_actions_value(
                        next_states_embedding[:, i],
                        next_states_embedding_target[:, i], rewards[:, i],
                        is_dones)
                actions_values_expected = self.agents[
                    i].cal_expected_actions_value(states_embedding[:, i],
                                                  actions[:, i])
                loss = F.mse_loss(actions_values_expected,
                                  actions_values_current)
                # loss.backward(retain_graph=True)
                total_loss += loss
                # 反向传播
                # self.optimizer[i].zero_grad()
            self.share_optimizer.zero_grad()
            total_loss.backward()
            # self._scale_shared_grads()
            torch.nn.utils.clip_grad_value_(self.all_para, 1)
            self.share_optimizer.step()
            for i in range(self.num_agents):
                # torch.nn.utils.clip_grad_value_(self.agents[i].q_network_current.parameters(), 1)
                # self.optimizer[i].step()
                # 更新target net
                Dueling_DDQN_Learner.soft_update_of_target_network(
                    self.agents[i].q_network_current,
                    self.agents[i].q_network_target, self.tau)
            self._update_sharing_target_network()
            # self.share_optimizer.zero_grad()

    def get_share_para(self):
        return dict(self.embedding.named_parameters())

    def store_experience(self, states, actions, rewards, next_states,
                         is_dones):
        self.buffer.store_experience(states, actions, rewards, next_states,
                                     is_dones)

    def sample_experience(self):
        states, actions, rewards, next_states, is_dones = self.buffer.sample_experience(
        )
        return states, actions, rewards, next_states, is_dones

    def _get_embedding(self, state):
        return self.embedding(state)

    def _get_embedding_target(self, state):
        return self.embedding_target(state)

    def _update_sharing_target_network(self):
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.embedding, self.embedding_target, self.tau)

    def get_attention_score(self, i):
        return -1

    def _scale_shared_grads(self):
        """
        Scale gradients for parameters that are shared since they accumulate
        gradients from the critic loss function multiple times
        """
        for p in self.share_para:
            p.grad.data.mul_(1. / self.num_agents)

    def save_model(self, path):
        share_model_name = path + '/share_model.pkl'
        torch.save(self.embedding.state_dict(), share_model_name)
        for i in range(self.num_agents):
            unique_model_name = path + '/q_network_%d.pkl' % i
            torch.save(self.agents[i].q_network_current.state_dict(),
                       unique_model_name)

    def load_model(self, path):
        share_model_name = path + '/share_model.pkl'
        self.embedding.load_state_dict(
            torch.load(share_model_name, map_location=self.device))
        for i in range(self.num_agents):
            unique_model_name = path + '/q_network_%d.pkl' % i
            self.agents[i].q_network_current.load_state_dict(
                torch.load(unique_model_name, map_location=self.device))
class Attention_Model(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.pre_train = False
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
        # if embedding_layer:
        #     self.embedding = embedding_layer
        #     self.attention = attention_layer
        #     self.linear1 = nn.Linear(self.input_dim, self.output_dim)
        # else:
        self.embedding = Embedding_Layer(self.input_dim, self.hidden_dim)
        self.attention = Attention_Model(self.hidden_dim)
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(self.hidden_dim, self.output_dim)

        self.attention_score = None
        # for para in self.attention.parameters():
        #     para.requires_grad = False

    '''
    input:
    state: [batch, lane_num+1, feature_dim, neighbors_num]
    output:
    [batch, feature_out]
    '''

    def forward(self, state):
        agent_state, neighbors_state = state[:, :, :, 0], state[:, :, :, 0:]
        neighbors_state = np.transpose(neighbors_state, (0, 3, 1, 2))
        batch_size = state.shape[0]
        neighbors_num = state.shape[-1]
        # agent_state = torch.from_numpy(agent_state).float().to(self.device).unsqueeze(1).view(batch_size, 1, -1)
        # neighbors_state = torch.from_numpy(np.transpose(neighbors_state, (0, 3, 1, 2))).\
        #     float().to(self.device).contiguous().view(batch_size, neighbors_num, -1)
        agent_embedding = self.embedding(agent_state)
        # if self.pre_train:
        #     out = self.linear1(agent_embedding)
        #     return out
        # agent_embedding = agent_embedding.unsqueeze(1)
        # neighbors_embedding = None
        # for i in range(neighbors_num):
        #     if i == 0:
        #         neighbors_embedding = self.embedding(neighbors_state[:, i]).unsqueeze(1)
        #     else:
        #         neighbors_embedding = torch.cat((neighbors_embedding,
        #                                          self.embedding(neighbors_state[:, i]).unsqueeze(1)), dim=1)
        # agent_embedding = agent_embedding.permute((1, 0, 2))
        # neighbors_embedding = neighbors_embedding.permute((1, 0, 2))
        # out, attention_score = self.attention(agent_embedding, neighbors_embedding)
        # out = out.squeeze(0)
        # out = self.relu(out)
        # self.attention_score = attention_score
        out = self.relu(agent_embedding)
        out = self.linear1(out)
        return out

    def change_mode(self):
        self.pre_train = False
        for para in self.embedding.parameters():
            para.requires_grad = False
        for para in self.attention.parameters():
            para.requires_grad = True

    def set_layer_para(self, embedding_layer=None, attention_layer=None):
        if embedding_layer is not None:
            self.embedding = embedding_layer
        if attention_layer is not None:
            self.attention = attention_layer

    def get_attention_score(self):
        att = self.attention_score.cpu().detach().numpy()
        idx = np.nonzero(att)
        att = att(idx)
        return att
class Double_Attention_Agents(Basic_Agents):
    def __init__(self, config, num_agents, input_dim, hidden_dim, output_dim,
                 seq_len, neighbor_map, node_name):
        super().__init__(config, num_agents, input_dim, hidden_dim, output_dim)
        self.adj = self._get_adj(neighbor_map, node_name)
        # self.n_heads = config['n_heads']

    def _init_agents(self):
        # parameter sharing
        self.n_heads = self.config['n_heads']
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.attention = Double_Attention_Model(self.hidden_dim,
                                                self.n_heads).to(self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        self.attention_target = Double_Attention_Model(
            self.hidden_dim, self.n_heads).to(self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)
        Dueling_DDQN_Learner.copy_network(self.attention,
                                          self.attention_target)
        self.share_para = chain(self.embedding.parameters(),
                                self.attention.parameters())
        self.all_para = chain(self.embedding.parameters(),
                              self.attention.parameters())
        # init the optimizer
        for i in range(self.num_agents):
            self.agents.append(Dueling_DDQN_Learner(self.config))
            self.all_para = chain(self.all_para,
                                  self.agents[i].get_q_network().parameters())
        self.share_optimizer = optim.RMSprop(self.all_para,
                                             lr=self.lr,
                                             weight_decay=1e-4)

    def _get_embedding(self, state):
        if len(state.shape) == 4:
            state = np.expand_dims(state, axis=0)
        state_embedding = self.embedding(state)
        state_attention = self.attention(state_embedding, self.adj)
        return state_attention

    def _get_embedding_target(self, state):
        state_embedding_target = self.embedding_target(state)
        state_attention_target = self.attention_target(state_embedding_target,
                                                       self.adj)
        return state_attention_target

    def _update_sharing_target_network(self):
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.embedding, self.embedding_target, self.tau)
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.attention, self.attention_target, self.tau)

    def store_experience(self, states, actions, rewards, next_states,
                         is_dones):
        states = np.expand_dims(states, axis=0)
        next_states = np.expand_dims(next_states, axis=0)
        self.buffer.store_experience(states, actions, rewards, next_states,
                                     is_dones)

    def _get_adj(self, neighbor_map, node_name):
        adj = np.zeros((self.num_agents, self.num_agents), dtype=bool)
        for i, node in enumerate(node_name):
            adj[i][i] = True
            for neighbor in neighbor_map[node]:
                idx = node_name.index(neighbor)
                adj[i][idx] = True
        return adj

    def get_share_para(self):
        dic1 = dict(self.embedding.named_parameters())
        dic2 = dict(self.attention.named_parameters())
        return dict(dic1, **dic2)

    def get_attention_score(self, i):
        return self.attention.get_attention_score(i)
class Attention_Agents(Basic_Agents):
    def __init__(self, config, num_agents, input_dim, hidden_dim, output_dim,
                 neighbor_map, node_name):
        super().__init__(config, num_agents, input_dim, hidden_dim, output_dim)
        # self.q_network = Attention_Model(self.input_dim, self.output_dim, self.hidden_dim).to(self.device)
        # self.q_network_target = Attention_Model(self.input_dim, self.output_dim, self.hidden_dim).to(self.device)
        self._init_agents()

        self.adj = self._get_adj(neighbor_map, node_name)

    def _init_agents(self):
        # parameter sharing
        self.n_heads = self.config['n_heads']
        self.embedding = Embedding_Layer(self.input_dim,
                                         self.hidden_dim).to(self.device)
        self.attention = Attention_Model(self.hidden_dim,
                                         self.n_heads).to(self.device)
        self.embedding_target = Embedding_Layer(
            self.input_dim, self.hidden_dim).to(self.device)
        self.attention_target = Attention_Model(self.hidden_dim,
                                                self.n_heads).to(self.device)
        Dueling_DDQN_Learner.copy_network(self.embedding,
                                          self.embedding_target)
        Dueling_DDQN_Learner.copy_network(self.attention,
                                          self.attention_target)
        # self.share_optimizer = optim.Adam(chain(self.embedding.parameters(), self.attention.parameters()), lr=1e-3)
        self.share_para = chain(self.embedding.parameters(),
                                self.attention.parameters())
        self.all_para = chain(self.embedding.parameters(),
                              self.attention.parameters())
        # init the optimizer
        for i in range(self.num_agents):
            self.agents.append(Dueling_DDQN_Learner(self.config))
            self.all_para = chain(self.all_para,
                                  self.agents[i].get_q_network().parameters())
        # self.all_para = chain(self.all_para)
        self.share_optimizer = optim.RMSprop(self.all_para,
                                             lr=self.lr,
                                             weight_decay=1e-4)

    def _get_embedding(self, state):
        state_embedding = self.embedding(state)
        state_attention = self.attention(state_embedding, self.adj)
        return state_attention

    def _get_embedding_target(self, state):
        state_embedding_target = self.embedding_target(state)
        state_attention_target = self.attention_target(state_embedding_target,
                                                       self.adj)
        return state_attention_target

    def _update_sharing_target_network(self):
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.embedding, self.embedding_target, self.tau)
        Dueling_DDQN_Learner.soft_update_of_target_network(
            self.attention, self.attention_target, self.tau)

    def _get_adj(self, neighbor_map, node_name):
        adj = np.zeros((self.num_agents, self.num_agents), dtype=bool)
        for i, node in enumerate(node_name):
            adj[i][i] = True
            for neighbor in neighbor_map[node]:
                idx = node_name.index(neighbor)
                # idx = int(neighbor[2:] - 1)
                adj[i][idx] = True
        return adj

    def get_attention_score(self, i):
        att = self.attention.get_attention_score(i, self.adj)
        return att

    def get_share_para(self):
        dic1 = dict(self.embedding.named_parameters())
        dic2 = dict(self.attention.named_parameters())
        return dict(dic1, **dic2)