示例#1
0
from sc2rl.rl.agents.MAAC.MultiStepActorCriticAgent import MultiStepActorCriticAgent, MultiStepActorCriticAgentConfig
from sc2rl.rl.brains.MAAC.MultiStepActorCriticBrain import MultiStepActorCriticBrainConfig
from sc2rl.rl.networks.MultiStepInputGraphNetwork import MultiStepInputGraphNetworkConfig

from sc2rl.memory.n_step_memory import NstepInputMemoryConfig
from sc2rl.runners.RunnerManager import RunnerConfig, RunnerManager

if __name__ == "__main__":

    map_name = "training_scenario_1"

    agent_conf = MultiStepActorCriticAgentConfig()
    network_conf = MultiStepInputGraphNetworkConfig()
    brain_conf = MultiStepActorCriticBrainConfig()
    buffer_conf = NstepInputMemoryConfig()
    use_attention = False
    num_runners = 5
    num_samples = 10

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    agent = MultiStepActorCriticAgent(agent_conf,
                                      network_conf,
                                      brain_conf,
                                      buffer_conf,
                                      use_attention=use_attention)

    config = RunnerConfig(map_name=map_name, reward_func=great_victor_with_kill_bonus,
                          state_proc_func=process_game_state_to_dgl,
示例#2
0
                'num_neurons': num_neurons,
                'num_relations': num_relations
            },
            curr_enc_conf={
                'spectral_norm': spectral_norm,
                'num_layers': enc_gnn_num_layer,
                'model_dim': node_input_dim,
                'use_concat': use_concat_input_gnn,
                'num_neurons': num_neurons,
                'num_relations': num_relations
            })
    qnet_conf.gnn_conf = gnn_conf

    buffer_conf = NstepInputMemoryConfig(memory_conf={
        'use_return': True,
        'N': num_hist_time_steps,
        'gamma': gamma
    })
    brain_conf = HierarchicalQmixBrainConfig(brain_conf={
        'use_double_q':
        use_double_q,
        'gamma':
        gamma,
        'eps':
        eps_init,
        'eps_gamma':
        eps_gamma,
        'use_mixer_hidden':
        use_mixer_hidden
    },
                                             fit_conf={
示例#3
0
    exp_name = '[S4] scheduler'

    qnet_conf = MultiStepInputQnetConfig(
        qnet_actor_conf={'spectral_norm': spectral_norm})
    actor_conf = MultiStepInputActorConfig()
    if use_attention:
        gnn_conf = MultiStepInputNetworkConfig()
    else:
        gnn_conf = MultiStepInputGraphNetworkConfig(
            hist_enc_conf={'spectral_norm': spectral_norm},
            curr_enc_conf={'spectral_norm': spectral_norm})

    qnet_conf.gnn_conf = gnn_conf
    actor_conf.gnn_conf = gnn_conf

    buffer_conf = NstepInputMemoryConfig(memory_conf={'use_return': True})
    brain_conf = QmixActorCriticBrainConfig(brain_conf={'use_double_q': True})

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    run_device = 'cpu'
    fit_device = 'cuda' if torch.cuda.is_available() else 'cpu'

    if use_attention:
        raise NotImplementedError
    else:
        mixer_gnn_conf = RelationalGraphNetworkConfig(
            gnn_conf={'spectral_norm': spectral_norm})
    mixer_ff_conf = FeedForwardConfig(
        mlp_conf={'spectral_norm': spectral_norm})