Пример #1
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        self.args = args

        rnn_input_shape = self.obs_shape

        # 根据参数决定RNN的输入维度
        if args.last_action:
            rnn_input_shape += self.n_actions  # 当前agent的上一个动作的one_hot向量
        if args.reuse_network:
            rnn_input_shape += self.n_agents

        # 神经网络
        self.eval_rnn = RNN(rnn_input_shape, args)  # 每个agent选动作的网络
        self.target_rnn = RNN(rnn_input_shape, args)

        self.eval_joint_q = QtranQBase(args)  # Joint action-value network
        self.target_joint_q = QtranQBase(args)

        self.v = QtranV(args)

        if self.args.cuda:
            self.eval_rnn.cuda()
            self.target_rnn.cuda()
            self.eval_joint_q.cuda()
            self.target_joint_q.cuda()
            self.v.cuda()

        self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map
        # 如果存在模型则加载模型
        if os.path.exists(self.model_dir + '/rnn_net_params.pkl'):
            path_rnn = self.model_dir + '/rnn_net_params.pkl'
            path_joint_q = self.model_dir + '/joint_q_params.pkl'
            path_v = self.model_dir + '/v_params.pkl'
            self.eval_rnn.load_state_dict(torch.load(path_rnn))
            self.eval_joint_q.load_state_dict(torch.load(path_joint_q))
            self.v.load_state_dict(torch.load(path_v))
            print('Successfully load the model: {}, {} and {}'.format(
                path_rnn, path_joint_q, path_v))

        # 让target_net和eval_net的网络参数相同
        self.target_rnn.load_state_dict(self.eval_rnn.state_dict())
        self.target_joint_q.load_state_dict(self.eval_joint_q.state_dict())

        self.eval_parameters = list(self.eval_joint_q.parameters()) + \
                               list(self.v.parameters()) + \
                               list(self.eval_rnn.parameters())
        if args.optimizer == "RMS":
            self.optimizer = torch.optim.RMSprop(self.eval_parameters,
                                                 lr=args.lr)

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden
        self.eval_hidden = None
        self.target_hidden = None
Пример #2
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        input_shape = self.obs_shape
        # 根据参数决定RNN的输入维度
        if args.last_action:
            input_shape += self.n_actions
        if args.reuse_network:
            input_shape += self.n_agents

        # 神经网络
        self.eval_rnn = RNN(input_shape, args)  # 每个agent选动作的网络
        self.target_rnn = RNN(input_shape, args)
        self.eval_qmix_net = QMixNet(args)  # 把agentsQ值加起来的网络
        self.target_qmix_net = QMixNet(args)
        self.args = args
        if self.args.cuda:
            self.eval_rnn.cuda()
            self.target_rnn.cuda()
            self.eval_qmix_net.cuda()
            self.target_qmix_net.cuda()
        self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map
        # 如果存在模型则加载模型
        if os.path.exists(self.model_dir + '/rnn_net_params.pkl'):
            path_rnn = self.model_dir + '/rnn_net_params.pkl'
            path_qmix = self.model_dir + '/qmix_net_params.pkl'
            self.eval_rnn.load_state_dict(torch.load(path_rnn))
            self.eval_qmix_net.load_state_dict(torch.load(path_qmix))
            print('Successfully load the model: {} and {}'.format(
                path_rnn, path_qmix))

        # 让target_net和eval_net的网络参数相同
        self.target_rnn.load_state_dict(self.eval_rnn.state_dict())
        self.target_qmix_net.load_state_dict(self.eval_qmix_net.state_dict())

        self.eval_parameters = list(self.eval_qmix_net.parameters()) + list(
            self.eval_rnn.parameters())
        if args.optimizer == "RMS":
            self.optimizer = torch.optim.RMSprop(self.eval_parameters,
                                                 lr=args.lr)

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden
        self.eval_hidden = None
        self.target_hidden = None
Пример #3
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        input_shape = self.obs_shape
        # 根据参数决定RNN的输入维度
        if args.last_action:
            input_shape += self.n_actions
        if args.reuse_network:
            input_shape += self.n_agents

        # 神经网络
        self.eval_rnn = RNN(input_shape, args)  # 每个agent选动作的网络
        self.target_rnn = RNN(input_shape, args)
        self.eval_vdn_net = VDNNet()  # 把agentsQ值加起来的网络
        self.target_vdn_net = VDNNet()

        # 如果存在模型则加载模型
        if args.model_dir != '':
            path_rnn = args.model_dir + '/rnn_net_params.pkl'
            path_vdn = args.model_dir + '/vdn_net_params.pkl'
            self.eval_rnn.load_state_dict(
                torch.load(args.model_dir + '/rnn_net_params.pkl'))
            self.eval_vdn_net.load_state_dict(
                torch.load(args.model_dir + '/vdn_net_params.pkl'))
            print('Successfully load the rnn model {} and the vdn model {}'.
                  format(path_rnn, path_vdn))

        # 让target_net和eval_net的网络参数相同
        self.target_rnn.load_state_dict(self.eval_rnn.state_dict())
        self.target_vdn_net.load_state_dict(self.eval_vdn_net.state_dict())

        self.eval_parameters = list(self.eval_vdn_net.parameters()) + list(
            self.eval_rnn.parameters())
        if args.optimizer == "RMS":
            self.optimizer = torch.optim.RMSprop(self.eval_parameters,
                                                 lr=args.lr)
        self.args = args

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden、target_hidden
        self.eval_hidden = None
        self.target_hidden = None
Пример #4
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        actor_input_shape = self.obs_shape  # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构
        critic_input_shape = self._get_critic_input_shape()  # critic网络输入的维度
        # 根据参数决定RNN的输入维度
        if args.last_action:
            actor_input_shape += self.n_actions
        if args.reuse_network:
            actor_input_shape += self.n_agents
        self.args = args

        # 神经网络
        # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。
        self.eval_rnn = RNN(actor_input_shape, args)

        # 得到当前agent的所有可执行动作对应的联合Q值,得到之后需要用该Q值和actor网络输出的概率计算advantage
        self.eval_critic = ComaCritic(critic_input_shape, self.args)
        self.target_critic = ComaCritic(critic_input_shape, self.args)

        if self.args.cuda:
            self.eval_rnn.cuda()
            self.eval_critic.cuda()
            self.target_critic.cuda()

        self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map
        # 如果存在模型则加载模型
        if os.path.exists(self.model_dir + '/rnn_params.pkl'):
            path_rnn = self.model_dir + '/rnn_params.pkl'
            path_coma = self.model_dir + '/critic_params.pkl'
            self.eval_rnn.load_state_dict(torch.load(path_rnn))
            self.eval_critic.load_state_dict(torch.load(path_coma))
            print('Successfully load the model: {} and {}'.format(
                path_rnn, path_coma))

        # 让target_net和eval_net的网络参数相同
        self.target_critic.load_state_dict(self.eval_critic.state_dict())

        self.rnn_parameters = list(self.eval_rnn.parameters())
        self.critic_parameters = list(self.eval_critic.parameters())

        if args.optimizer == "RMS":
            self.critic_optimizer = torch.optim.RMSprop(self.critic_parameters,
                                                        lr=args.lr_critic)
            self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters,
                                                     lr=args.lr_actor)
        self.args = args

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden
        self.eval_hidden = None
Пример #5
0
embeddings = np.random.uniform(-1, 1, [vocab_size, embedding_dim])
for k in data:
    if k[0] in vocab.word2id:
        embeddings[vocab.word2id[k[0]]] = list(map(float, k[1:]))

weights = embeddings
with open('vocab.pkl', 'wb') as f:
    pickle.dump(vocab, f)
    print('dictionary dump')

# # Build models
encoder = vgg.vgg16()
decoder = RNN(embedding_dim=embedding_dim,
              hidden_dim=hidden_dim,
              vocab_size=vocab_size,
              num_layers=1,
              weights=weights)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = torch.optim.Adam(params, lr=learning_rate)

# Train models
num_epochs = 100
save_iter = 10
for epoch in range(num_epochs):

    loss_list = []
    for _, data in enumerate(dataloader, 0):