示例#1
0
def train_fixed(seed, agent_setting, game_name="ma_softq"):
    set_seed(seed)

    suffix = f"fixed_play1/{game_name}/{agent_setting}/{seed}"

    set_logger(suffix)

    batch_size = 512
    training_steps = 2000
    exploration_steps = 100
    max_replay_buffer_size = 1e5
    hidden_layer_sizes = (128, 128)
    max_path_length = 1

    agent_num = 2
    env = DifferentialGame(game_name, agent_num)

    agents = []
    agent_types = agent_setting.split("_")
    assert len(agent_types) == agent_num
    for i, agent_type in enumerate(agent_types):
        agents.append(
            get_agent_by_type(
                agent_type,
                i,
                env,
                hidden_layer_sizes=hidden_layer_sizes,
                max_replay_buffer_size=max_replay_buffer_size,
            )
        )

    sampler = MASampler(
        agent_num, batch_size=batch_size, max_path_length=max_path_length
    )
    sampler.initialize(env, agents)

    trainer = MATrainer(
        env=env,
        agents=agents,
        sampler=sampler,
        steps=training_steps,
        exploration_steps=exploration_steps,
        training_interval=1,
        extra_experiences=["annealing", "recent_experiences"],
        batch_size=batch_size,
    )

    trainer.run()
示例#2
0
def train_fixed(seed, agent_setting, game_name='ma_softq'):
    set_seed(seed)
    suffix = f'fixed_play/{game_name}/{agent_setting}/{seed}'

    set_logger(suffix)

    batch_size = 1024
    training_steps = 25 * 60000
    exploration_steps = 2000
    max_replay_buffer_size = 1e5
    hidden_layer_sizes = (100, 100)
    max_path_length = 25

    agent_num = 3
    env = make_particle_env(game_name)
    agents = []
    agent_types = agent_setting.split('_')
    assert len(agent_types) == agent_num
    for i, agent_type in enumerate(agent_types):
        agents.append(
            get_agent_by_type(agent_type,
                              i,
                              env,
                              hidden_layer_sizes=hidden_layer_sizes,
                              max_replay_buffer_size=max_replay_buffer_size))

    sampler = MASampler(agent_num,
                        batch_size=batch_size,
                        max_path_length=max_path_length)
    sampler.initialize(env, agents)

    trainer = MATrainer(env=env,
                        agents=agents,
                        sampler=sampler,
                        steps=training_steps,
                        exploration_steps=exploration_steps,
                        training_interval=10,
                        extra_experiences=['annealing', 'recent_experiences'],
                        batch_size=batch_size)

    trainer.run()
示例#3
0
def train_fixed(seed, agent_setting, env_configs, fullly_centralized):
    set_seed(seed)
    scenario = env_configs["scenario"]
    suffix = f"fixed_play/{scenario}/{agent_setting}/{seed}"

    set_logger(suffix)

    batch_size = 50
    training_steps = 25 * 60000
    exploration_steps = 100
    max_replay_buffer_size = 1e5
    hidden_layer_sizes = (100, 100)
    max_path_length = 25

    agent_num = env_configs["n_agents"]
    raw_env = ListHiWayEnv(env_configs)
    env = Wrapper(env=raw_env, action_space=raw_env.action_sapce)

    if fullly_centralized:
        agent = get_agent_by_type(
            agent_setting,
            0,
            env,
            hidden_layer_sizes=hidden_layer_sizes,
            max_replay_buffer_size=max_replay_buffer_size,
        )

        sampler = SingleSampler(batch_size=batch_size,
                                max_path_length=max_path_length)
        sampler.initialize(env, agent)
        extra_experiences = ["target_actions"]
        trainer = SATrainer(
            env=env,
            agent=agent,
            sampler=sampler,
            steps=training_steps,
            exploration_steps=exploration_steps,
            training_interval=10,
            extra_experiences=extra_experiences,
            batch_size=batch_size,
        )
    else:
        agents = []
        for i in range(agent_num):
            agents.append(
                get_agent_by_type(
                    agent_setting,
                    i,
                    env,
                    hidden_layer_sizes=hidden_layer_sizes,
                    max_replay_buffer_size=max_replay_buffer_size,
                ))

        sampler = MASampler(agent_num,
                            batch_size=batch_size,
                            max_path_length=max_path_length)
        sampler.initialize(env, agents)
        extra_experiences = [
            "annealing", "recent_experiences", "target_actions"
        ]
        trainer = MATrainer(
            env=env,
            agents=agents,
            sampler=sampler,
            steps=training_steps,
            exploration_steps=exploration_steps,
            training_interval=10,
            extra_experiences=extra_experiences,
            batch_size=batch_size,
        )

    trainer.run()
示例#4
0
batch_size = 128
training_steps = 10000
exploration_step = 1000
hidden_layer_sizes = (10, 10)
max_replay_buffer_size = 1e5

env = DifferentialGame(game_name, agent_num)
agents = []
for i in range(agent_num):
    agent = get_maddpg_agent(
        env,
        i,
        hidden_layer_sizes=hidden_layer_sizes,
        max_replay_buffer_size=max_replay_buffer_size,
    )
    agents.append(agent)

sampler = MASampler(agent_num)
sampler.initialize(env, agents)

trainer = MATrainer(
    env=env,
    agents=agents,
    sampler=sampler,
    steps=training_steps,
    exploration_steps=exploration_step,
    extra_experiences=["target_actions"],
)

trainer.run()