def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape self.args = args rnn_input_shape = self.obs_shape # 根据参数决定RNN的输入维度 if args.last_action: rnn_input_shape += self.n_actions # 当前agent的上一个动作的one_hot向量 if args.reuse_network: rnn_input_shape += self.n_agents # 神经网络 self.eval_rnn = RNN(rnn_input_shape, args) # 每个agent选动作的网络 self.target_rnn = RNN(rnn_input_shape, args) self.eval_joint_q = QtranQBase(args) # Joint action-value network self.target_joint_q = QtranQBase(args) self.v = QtranV(args) if self.args.cuda: self.eval_rnn.cuda() self.target_rnn.cuda() self.eval_joint_q.cuda() self.target_joint_q.cuda() self.v.cuda() self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map # 如果存在模型则加载模型 if os.path.exists(self.model_dir + '/rnn_net_params.pkl'): path_rnn = self.model_dir + '/rnn_net_params.pkl' path_joint_q = self.model_dir + '/joint_q_params.pkl' path_v = self.model_dir + '/v_params.pkl' self.eval_rnn.load_state_dict(torch.load(path_rnn)) self.eval_joint_q.load_state_dict(torch.load(path_joint_q)) self.v.load_state_dict(torch.load(path_v)) print('Successfully load the model: {}, {} and {}'.format( path_rnn, path_joint_q, path_v)) # 让target_net和eval_net的网络参数相同 self.target_rnn.load_state_dict(self.eval_rnn.state_dict()) self.target_joint_q.load_state_dict(self.eval_joint_q.state_dict()) self.eval_parameters = list(self.eval_joint_q.parameters()) + \ list(self.v.parameters()) + \ list(self.eval_rnn.parameters()) if args.optimizer == "RMS": self.optimizer = torch.optim.RMSprop(self.eval_parameters, lr=args.lr) # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden self.eval_hidden = None self.target_hidden = None
def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape input_shape = self.obs_shape # 根据参数决定RNN的输入维度 if args.last_action: input_shape += self.n_actions if args.reuse_network: input_shape += self.n_agents # 神经网络 self.eval_rnn = RNN(input_shape, args) # 每个agent选动作的网络 self.target_rnn = RNN(input_shape, args) self.eval_qmix_net = QMixNet(args) # 把agentsQ值加起来的网络 self.target_qmix_net = QMixNet(args) self.args = args if self.args.cuda: self.eval_rnn.cuda() self.target_rnn.cuda() self.eval_qmix_net.cuda() self.target_qmix_net.cuda() self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map # 如果存在模型则加载模型 if os.path.exists(self.model_dir + '/rnn_net_params.pkl'): path_rnn = self.model_dir + '/rnn_net_params.pkl' path_qmix = self.model_dir + '/qmix_net_params.pkl' self.eval_rnn.load_state_dict(torch.load(path_rnn)) self.eval_qmix_net.load_state_dict(torch.load(path_qmix)) print('Successfully load the model: {} and {}'.format( path_rnn, path_qmix)) # 让target_net和eval_net的网络参数相同 self.target_rnn.load_state_dict(self.eval_rnn.state_dict()) self.target_qmix_net.load_state_dict(self.eval_qmix_net.state_dict()) self.eval_parameters = list(self.eval_qmix_net.parameters()) + list( self.eval_rnn.parameters()) if args.optimizer == "RMS": self.optimizer = torch.optim.RMSprop(self.eval_parameters, lr=args.lr) # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden self.eval_hidden = None self.target_hidden = None
def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape input_shape = self.obs_shape # 根据参数决定RNN的输入维度 if args.last_action: input_shape += self.n_actions if args.reuse_network: input_shape += self.n_agents # 神经网络 self.eval_rnn = RNN(input_shape, args) # 每个agent选动作的网络 self.target_rnn = RNN(input_shape, args) self.eval_vdn_net = VDNNet() # 把agentsQ值加起来的网络 self.target_vdn_net = VDNNet() # 如果存在模型则加载模型 if args.model_dir != '': path_rnn = args.model_dir + '/rnn_net_params.pkl' path_vdn = args.model_dir + '/vdn_net_params.pkl' self.eval_rnn.load_state_dict( torch.load(args.model_dir + '/rnn_net_params.pkl')) self.eval_vdn_net.load_state_dict( torch.load(args.model_dir + '/vdn_net_params.pkl')) print('Successfully load the rnn model {} and the vdn model {}'. format(path_rnn, path_vdn)) # 让target_net和eval_net的网络参数相同 self.target_rnn.load_state_dict(self.eval_rnn.state_dict()) self.target_vdn_net.load_state_dict(self.eval_vdn_net.state_dict()) self.eval_parameters = list(self.eval_vdn_net.parameters()) + list( self.eval_rnn.parameters()) if args.optimizer == "RMS": self.optimizer = torch.optim.RMSprop(self.eval_parameters, lr=args.lr) self.args = args # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden self.eval_hidden = None self.target_hidden = None
def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape actor_input_shape = self.obs_shape # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构 critic_input_shape = self._get_critic_input_shape() # critic网络输入的维度 # 根据参数决定RNN的输入维度 if args.last_action: actor_input_shape += self.n_actions if args.reuse_network: actor_input_shape += self.n_agents self.args = args # 神经网络 # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。 self.eval_rnn = RNN(actor_input_shape, args) # 得到当前agent的所有可执行动作对应的联合Q值,得到之后需要用该Q值和actor网络输出的概率计算advantage self.eval_critic = ComaCritic(critic_input_shape, self.args) self.target_critic = ComaCritic(critic_input_shape, self.args) if self.args.cuda: self.eval_rnn.cuda() self.eval_critic.cuda() self.target_critic.cuda() self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map # 如果存在模型则加载模型 if os.path.exists(self.model_dir + '/rnn_params.pkl'): path_rnn = self.model_dir + '/rnn_params.pkl' path_coma = self.model_dir + '/critic_params.pkl' self.eval_rnn.load_state_dict(torch.load(path_rnn)) self.eval_critic.load_state_dict(torch.load(path_coma)) print('Successfully load the model: {} and {}'.format( path_rnn, path_coma)) # 让target_net和eval_net的网络参数相同 self.target_critic.load_state_dict(self.eval_critic.state_dict()) self.rnn_parameters = list(self.eval_rnn.parameters()) self.critic_parameters = list(self.eval_critic.parameters()) if args.optimizer == "RMS": self.critic_optimizer = torch.optim.RMSprop(self.critic_parameters, lr=args.lr_critic) self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters, lr=args.lr_actor) self.args = args # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden self.eval_hidden = None
embeddings = np.random.uniform(-1, 1, [vocab_size, embedding_dim]) for k in data: if k[0] in vocab.word2id: embeddings[vocab.word2id[k[0]]] = list(map(float, k[1:])) weights = embeddings with open('vocab.pkl', 'wb') as f: pickle.dump(vocab, f) print('dictionary dump') # # Build models encoder = vgg.vgg16() decoder = RNN(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=vocab_size, num_layers=1, weights=weights) # Loss and optimizer criterion = nn.CrossEntropyLoss() params = list(encoder.parameters()) + list(decoder.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) # Train models num_epochs = 100 save_iter = 10 for epoch in range(num_epochs): loss_list = [] for _, data in enumerate(dataloader, 0):