def train_fixed(seed, agent_setting, game_name="ma_softq"): set_seed(seed) suffix = f"fixed_play1/{game_name}/{agent_setting}/{seed}" set_logger(suffix) batch_size = 512 training_steps = 2000 exploration_steps = 100 max_replay_buffer_size = 1e5 hidden_layer_sizes = (128, 128) max_path_length = 1 agent_num = 2 env = DifferentialGame(game_name, agent_num) agents = [] agent_types = agent_setting.split("_") assert len(agent_types) == agent_num for i, agent_type in enumerate(agent_types): agents.append( get_agent_by_type( agent_type, i, env, hidden_layer_sizes=hidden_layer_sizes, max_replay_buffer_size=max_replay_buffer_size, ) ) sampler = MASampler( agent_num, batch_size=batch_size, max_path_length=max_path_length ) sampler.initialize(env, agents) trainer = MATrainer( env=env, agents=agents, sampler=sampler, steps=training_steps, exploration_steps=exploration_steps, training_interval=1, extra_experiences=["annealing", "recent_experiences"], batch_size=batch_size, ) trainer.run()
def train_fixed(seed, agent_setting, game_name='ma_softq'): set_seed(seed) suffix = f'fixed_play/{game_name}/{agent_setting}/{seed}' set_logger(suffix) batch_size = 1024 training_steps = 25 * 60000 exploration_steps = 2000 max_replay_buffer_size = 1e5 hidden_layer_sizes = (100, 100) max_path_length = 25 agent_num = 3 env = make_particle_env(game_name) agents = [] agent_types = agent_setting.split('_') assert len(agent_types) == agent_num for i, agent_type in enumerate(agent_types): agents.append( get_agent_by_type(agent_type, i, env, hidden_layer_sizes=hidden_layer_sizes, max_replay_buffer_size=max_replay_buffer_size)) sampler = MASampler(agent_num, batch_size=batch_size, max_path_length=max_path_length) sampler.initialize(env, agents) trainer = MATrainer(env=env, agents=agents, sampler=sampler, steps=training_steps, exploration_steps=exploration_steps, training_interval=10, extra_experiences=['annealing', 'recent_experiences'], batch_size=batch_size) trainer.run()
def train_fixed(seed, agent_setting, env_configs, fullly_centralized): set_seed(seed) scenario = env_configs["scenario"] suffix = f"fixed_play/{scenario}/{agent_setting}/{seed}" set_logger(suffix) batch_size = 50 training_steps = 25 * 60000 exploration_steps = 100 max_replay_buffer_size = 1e5 hidden_layer_sizes = (100, 100) max_path_length = 25 agent_num = env_configs["n_agents"] raw_env = ListHiWayEnv(env_configs) env = Wrapper(env=raw_env, action_space=raw_env.action_sapce) if fullly_centralized: agent = get_agent_by_type( agent_setting, 0, env, hidden_layer_sizes=hidden_layer_sizes, max_replay_buffer_size=max_replay_buffer_size, ) sampler = SingleSampler(batch_size=batch_size, max_path_length=max_path_length) sampler.initialize(env, agent) extra_experiences = ["target_actions"] trainer = SATrainer( env=env, agent=agent, sampler=sampler, steps=training_steps, exploration_steps=exploration_steps, training_interval=10, extra_experiences=extra_experiences, batch_size=batch_size, ) else: agents = [] for i in range(agent_num): agents.append( get_agent_by_type( agent_setting, i, env, hidden_layer_sizes=hidden_layer_sizes, max_replay_buffer_size=max_replay_buffer_size, )) sampler = MASampler(agent_num, batch_size=batch_size, max_path_length=max_path_length) sampler.initialize(env, agents) extra_experiences = [ "annealing", "recent_experiences", "target_actions" ] trainer = MATrainer( env=env, agents=agents, sampler=sampler, steps=training_steps, exploration_steps=exploration_steps, training_interval=10, extra_experiences=extra_experiences, batch_size=batch_size, ) trainer.run()
batch_size = 128 training_steps = 10000 exploration_step = 1000 hidden_layer_sizes = (10, 10) max_replay_buffer_size = 1e5 env = DifferentialGame(game_name, agent_num) agents = [] for i in range(agent_num): agent = get_maddpg_agent( env, i, hidden_layer_sizes=hidden_layer_sizes, max_replay_buffer_size=max_replay_buffer_size, ) agents.append(agent) sampler = MASampler(agent_num) sampler.initialize(env, agents) trainer = MATrainer( env=env, agents=agents, sampler=sampler, steps=training_steps, exploration_steps=exploration_step, extra_experiences=["target_actions"], ) trainer.run()