scenarios = [ 'simple_adversary', 'simple_crypto', 'simple_push', 'simple_tag', 'simple_world_comm' ] TEST_ONLY = False if TEST_ONLY: arglist.is_training = False for scenario_name in scenarios: arglist.actor_learning_rate = 1e-2 arglist.critic_learning_rate = 1e-2 for cnt in range(10): # scenario_name = 'simple_spread' env = make_env(scenario_name, discrete_action=True) seed = cnt + 12345678 env.seed(seed) torch.cuda.empty_cache() np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) dim_obs_own = env.observation_space[-1].shape[0] dim_obs_adv = env.observation_space[0].shape[0] dim_action_own = env.action_space[-1].n dim_action_adv = env.action_space[0].n action_type = 'Discrete' # num_adv & adv action dims
if __name__ == '__main__': from rls.model.ac_network_model_multi_gumbel import ActorNetwork, CriticNetwork from rls.agent.multiagent.model_ddpg_gumbel_fix import Trainer from experiments.scenarios import make_env import os arglist.actor_learning_rate = 1e-2 arglist.critic_learning_rate = 1e-2 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = '1' cnt = 11 # scenario_name = 'simple_spread' scenario_name = 'simple_speaker_listener' env = make_env(scenario_name, benchmark=False, discrete_action=True) seed = cnt + 12345678 env.seed(seed) torch.cuda.empty_cache() np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) dim_obs = env.observation_space[0].shape[0] if hasattr(env.action_space[0], 'high'): dim_action = env.action_space[0].high + 1 dim_action = dim_action.tolist() action_type = 'MultiDiscrete' else: dim_action = env.action_space[0].n action_type = 'Discrete'
'simple_spread', 'simple_reference', 'simple_speaker_listener', 'fullobs_collect_treasure', 'multi_speaker_listener' ] TEST_ONLY = False scenario_name = 'simple_spread' for n_agent in [6, 9, 12]: arglist.actor_learning_rate = 1e-2 arglist.critic_learning_rate = 1e-2 for cnt in range(5): # scenario_name = 'simple_spread' env = make_env(scenario_name, n=n_agent, benchmark=False, discrete_action=True, local_observation=True) seed = cnt + 12345678 # print(env.observation_space) env.seed(seed) torch.cuda.empty_cache() np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) dim_obs = env.observation_space[0].shape[0] if hasattr(env.action_space[0], 'high'): dim_action = env.action_space[0].high + 1