示例#1
0
文件: qmixAgent.py 项目: cu-rie/sc2rl
    def get_action(self, hist_graph, curr_graph, tag2unit_dict):

        assert isinstance(
            curr_graph,
            dgl.DGLGraph), "get action is designed to work on a single graph!"
        num_time_steps = hist_graph.batch_size
        hist_node_feature = hist_graph.ndata.pop('node_feature')
        curr_node_feature = curr_graph.ndata.pop('node_feature')
        maximum_num_enemy = get_largest_number_of_enemy_nodes([curr_graph])

        nn_actions, info_dict = self.brain.get_action(
            num_time_steps, hist_graph, hist_node_feature, curr_graph,
            curr_node_feature, maximum_num_enemy)

        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        hist_graph.ndata['node_feature'] = hist_node_feature
        curr_graph.ndata['node_feature'] = curr_node_feature
        return nn_actions, sc2_actions, info_dict
示例#2
0
    ac_module = ActorCriticModule(node_input_dim=26)
    # ac_module = ActorCriticModule(node_input_dim=26)

    done_cnt = 0
    i = 0

    while True:
        # print("=========={} th iter ============".format(i))
        cur_state = env.observe()
        # for tag, unit in cur_state['tag2unit_dict'].items():
        #     print("TAG: {} | NAME: {} | Health: {} | POS: {} ".format(tag, unit.name, unit.health, unit.position))

        g = cur_state['g']
        node_feature = g.ndata.pop('node_feature')

        num_enemy = get_largest_number_of_enemy_nodes([g])
        nn_actions, info_dict = ac_module.get_action(g, node_feature, num_enemy)

        tag2unit_dict = cur_state['tag2unit_dict']
        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        loss = ac_module(g, node_feature, num_enemy)

        next_state, reward, done = env.step(action=sc2_actions)
示例#3
0
文件: qmixAgent.py 项目: cu-rie/sc2rl
    def fit(self, device='cpu'):
        # the prefix 'c' indicates #current# time stamp inputs
        # the prefix 'n' indicates #next# time stamp inputs

        # expected specs:
        # bs = batch_size, nt = hist_num_time_steps
        # 'h_graph' = list of graph lists [[g_(0,0), g_(0,1), ... g_(0,nt)],
        #                                  [g_(1,0), g_(1,1), ..., g_(1,nt)],
        #                                  [g_(2,0), ..., g_(bs, 0), ... g_(bs, nt)]]
        # 'graph' = list of graphs  [g_(0), g_(1), ..., g_(bs)]

        fit_conf = self.conf.fit_conf

        batch_size = fit_conf['batch_size']
        hist_num_time_steps = fit_conf['hist_num_time_steps']

        c_h_graph, c_graph, actions, rewards, n_h_graph, n_graph, dones = self.buffer.sample(
            batch_size)

        c_maximum_num_enemy = get_largest_number_of_enemy_nodes(c_graph)
        n_maximum_num_enemy = get_largest_number_of_enemy_nodes(n_graph)

        # batching graphs
        list_c_h_graph = [g for L in c_h_graph for g in L]
        list_n_h_graph = [g for L in n_h_graph for g in L]

        c_hist_graph = dgl.batch(list_c_h_graph)
        n_hist_graph = dgl.batch(list_n_h_graph)

        c_curr_graph = dgl.batch(c_graph)
        n_curr_graph = dgl.batch(n_graph)

        # casting actions to one torch tensor
        actions = torch.cat(actions).long()

        # prepare rewards
        rewards = torch.Tensor(rewards)

        # preparing dones
        dones = torch.Tensor(dones)

        if device != 'cpu':
            c_hist_graph.to(torch.device('cuda'))
            n_hist_graph.to(torch.device('cuda'))
            c_curr_graph.to(torch.device('cuda'))
            n_curr_graph.to(torch.device('cuda'))
            actions = actions.to(torch.device('cuda'))
            rewards = rewards.to(torch.device('cuda'))
            dones = dones.to(torch.device('cuda'))

        c_hist_feature = c_hist_graph.ndata.pop('node_feature')
        c_curr_feature = c_curr_graph.ndata.pop('node_feature')

        n_hist_feature = n_hist_graph.ndata.pop('node_feature')
        n_curr_feature = n_curr_graph.ndata.pop('node_feature')

        fit_return_dict = self.brain.fit(
            num_time_steps=hist_num_time_steps,
            c_hist_graph=c_hist_graph,
            c_hist_feature=c_hist_feature,
            c_curr_graph=c_curr_graph,
            c_curr_feature=c_curr_feature,
            c_maximum_num_enemy=c_maximum_num_enemy,
            n_hist_graph=n_hist_graph,
            n_hist_feature=n_hist_feature,
            n_curr_graph=n_curr_graph,
            n_curr_feature=n_curr_feature,
            n_maximum_num_enemy=n_maximum_num_enemy,
            actions=actions,
            rewards=rewards,
            dones=dones)

        return fit_return_dict
    def fit(self):
        # the prefix 'c' indicates #current# time stamp inputs
        # the prefix 'n' indicates #next# time stamp inputs

        # expected specs:
        # bs = batch_size, nt = hist_num_time_steps
        # 'h_graph' = list of graph lists [[g_(0,0), g_(0,1), ... g_(0,nt)],
        #                                  [g_(1,0), g_(1,1), ..., g_(1,nt)],
        #                                  [g_(2,0), ..., g_(bs, 0), ... g_(bs, nt)]]
        # 'graph' = list of graphs  [g_(0), g_(1), ..., g_(bs)]

        fit_conf = self.conf.fit_conf

        batch_size = fit_conf['batch_size']
        hist_num_time_steps = fit_conf['hist_num_time_steps']

        c_h_graph, c_graph, actions, rewards, n_h_graph, n_graph, dones = self.buffer.sample(
            batch_size)

        c_maximum_num_enemy = get_largest_number_of_enemy_nodes(c_graph)
        n_maximum_num_enemy = get_largest_number_of_enemy_nodes(n_graph)

        # casting actions to one torch tensor
        actions = torch.cat(actions).long()

        # 'c_graph' is now list of graphs
        c_ally_units = [
            len(get_filtered_node_index_by_type(graph, NODE_ALLY))
            for graph in c_graph
        ]
        c_ally_units = torch.Tensor(c_ally_units).long()

        # prepare rewards
        rewards = torch.Tensor(rewards)
        rewards = rewards.repeat_interleave(c_ally_units, dim=0)

        # preparing dones
        dones = torch.Tensor(dones)
        dones = dones.repeat_interleave(c_ally_units, dim=0)

        # batching graphs
        list_c_h_graph = [g for L in c_h_graph for g in L]
        list_n_h_graph = [g for L in n_h_graph for g in L]
        c_hist_graph = dgl.batch(list_c_h_graph)
        n_hist_graph = dgl.batch(list_n_h_graph)

        c_curr_graph = dgl.batch(c_graph)
        n_curr_graph = dgl.batch(n_graph)

        c_hist_feature = c_hist_graph.ndata.pop('node_feature')
        c_curr_feature = c_curr_graph.ndata.pop('node_feature')

        n_hist_feature = n_hist_graph.ndata.pop('node_feature')
        n_curr_feature = n_curr_graph.ndata.pop('node_feature')

        fit_return_dict = self.brain.fit(
            num_time_steps=hist_num_time_steps,
            c_hist_graph=c_hist_graph,
            c_hist_feature=c_hist_feature,
            c_curr_graph=c_curr_graph,
            c_curr_feature=c_curr_feature,
            c_maximum_num_enemy=c_maximum_num_enemy,
            n_hist_graph=n_hist_graph,
            n_hist_feature=n_hist_feature,
            n_curr_graph=n_curr_graph,
            n_curr_feature=n_curr_feature,
            n_maximum_num_enemy=n_maximum_num_enemy,
            actions=actions,
            rewards=rewards,
            dones=dones)

        return fit_return_dict