from sc2rl.rl.agents.MAAC.MultiStepActorCriticAgent import MultiStepActorCriticAgent, MultiStepActorCriticAgentConfig from sc2rl.rl.brains.MAAC.MultiStepActorCriticBrain import MultiStepActorCriticBrainConfig from sc2rl.rl.networks.MultiStepInputGraphNetwork import MultiStepInputGraphNetworkConfig from sc2rl.memory.n_step_memory import NstepInputMemoryConfig from sc2rl.runners.RunnerManager import RunnerConfig, RunnerManager if __name__ == "__main__": map_name = "training_scenario_1" agent_conf = MultiStepActorCriticAgentConfig() network_conf = MultiStepInputGraphNetworkConfig() brain_conf = MultiStepActorCriticBrainConfig() buffer_conf = NstepInputMemoryConfig() use_attention = False num_runners = 5 num_samples = 10 sample_spec = buffer_conf.memory_conf['spec'] num_hist_steps = buffer_conf.memory_conf['N'] agent = MultiStepActorCriticAgent(agent_conf, network_conf, brain_conf, buffer_conf, use_attention=use_attention) config = RunnerConfig(map_name=map_name, reward_func=great_victor_with_kill_bonus, state_proc_func=process_game_state_to_dgl,
'num_neurons': num_neurons, 'num_relations': num_relations }, curr_enc_conf={ 'spectral_norm': spectral_norm, 'num_layers': enc_gnn_num_layer, 'model_dim': node_input_dim, 'use_concat': use_concat_input_gnn, 'num_neurons': num_neurons, 'num_relations': num_relations }) qnet_conf.gnn_conf = gnn_conf buffer_conf = NstepInputMemoryConfig(memory_conf={ 'use_return': True, 'N': num_hist_time_steps, 'gamma': gamma }) brain_conf = HierarchicalQmixBrainConfig(brain_conf={ 'use_double_q': use_double_q, 'gamma': gamma, 'eps': eps_init, 'eps_gamma': eps_gamma, 'use_mixer_hidden': use_mixer_hidden }, fit_conf={
exp_name = '[S4] scheduler' qnet_conf = MultiStepInputQnetConfig( qnet_actor_conf={'spectral_norm': spectral_norm}) actor_conf = MultiStepInputActorConfig() if use_attention: gnn_conf = MultiStepInputNetworkConfig() else: gnn_conf = MultiStepInputGraphNetworkConfig( hist_enc_conf={'spectral_norm': spectral_norm}, curr_enc_conf={'spectral_norm': spectral_norm}) qnet_conf.gnn_conf = gnn_conf actor_conf.gnn_conf = gnn_conf buffer_conf = NstepInputMemoryConfig(memory_conf={'use_return': True}) brain_conf = QmixActorCriticBrainConfig(brain_conf={'use_double_q': True}) sample_spec = buffer_conf.memory_conf['spec'] num_hist_steps = buffer_conf.memory_conf['N'] run_device = 'cpu' fit_device = 'cuda' if torch.cuda.is_available() else 'cpu' if use_attention: raise NotImplementedError else: mixer_gnn_conf = RelationalGraphNetworkConfig( gnn_conf={'spectral_norm': spectral_norm}) mixer_ff_conf = FeedForwardConfig( mlp_conf={'spectral_norm': spectral_norm})