Exemplo n.º 1
0
    def create_agent(alg_name):
        if alg_name == 'maddpg':
            ret_agent = MADDPGAgent(env.observation_space,
                                    env.action_space,
                                    agent_idx,
                                    batch_size,
                                    buff_size,
                                    lr,
                                    num_layers,
                                    num_units,
                                    gamma,
                                    tau,
                                    priori_replay,
                                    alpha=alpha,
                                    max_step=num_episodes * max_episode_len,
                                    initial_beta=beta,
                                    _run=_run)
        elif alg_name == 'matd3':
            ret_agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif alg_name == 'mad3pg':
            ret_agent = MAD3PGAgent(env.observation_space,
                                    env.action_space,
                                    agent_idx,
                                    batch_size,
                                    buff_size,
                                    lr,
                                    num_layers,
                                    num_units,
                                    gamma,
                                    tau,
                                    priori_replay,
                                    alpha=alpha,
                                    max_step=num_episodes * max_episode_len,
                                    initial_beta=beta,
                                    num_atoms=num_atoms,
                                    min_val=min_val,
                                    max_val=max_val,
                                    _run=_run)
        elif alg_name == 'masac':
            ret_agent = MASACAgent(env.observation_space,
                                   env.action_space,
                                   agent_idx,
                                   batch_size,
                                   buff_size,
                                   lr,
                                   num_layers,
                                   num_units,
                                   gamma,
                                   tau,
                                   priori_replay,
                                   alpha=alpha,
                                   max_step=num_episodes * max_episode_len,
                                   initial_beta=beta,
                                   entropy_coeff=entropy_coeff,
                                   policy_update_freq=policy_update_rate,
                                   _run=_run)
        else:
            raise RuntimeError('Invalid Class')

        return ret_agent
Exemplo n.º 2
0
def get_agents(_run, env, num_adversaries, good_policy, adv_policy, lr,
               batch_size, buff_size, num_units, num_layers, gamma, tau,
               priori_replay, alpha, num_episodes, max_episode_len, beta,
               policy_update_rate, critic_action_noise_stddev, entropy_coeff,
               num_atoms, min_val, max_val) -> List[AbstractAgent]:
    """
    This function generates the agents for the environment. The parameters are meant to be filled
    by sacred, and are therefore documented in the configuration function train_config.

    :returns List[AbstractAgent] returns a list of instantiated agents
    """
    agents = []
    for agent_idx in range(num_adversaries):
        if adv_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif adv_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    for agent_idx in range(num_adversaries,
                           num_adversaries + env.n_good_agents - 1):
        if good_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif good_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    for agent_idx in range(num_adversaries + env.n_good_agents, env.n):
        if good_policy == 'maddpg':
            agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                _run=_run)
        elif good_policy == 'matd3':
            agent = MATD3Agent(
                env.observation_space,
                env.action_space,
                agent_idx,
                batch_size,
                buff_size,
                lr,
                num_layers,
                num_units,
                gamma,
                tau,
                priori_replay,
                alpha=alpha,
                max_step=num_episodes * max_episode_len,
                initial_beta=beta,
                policy_update_freq=policy_update_rate,
                target_policy_smoothing_eps=critic_action_noise_stddev,
                _run=_run)
        elif adv_policy == 'mad3pg':
            agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                agent_idx,
                                batch_size,
                                buff_size,
                                lr,
                                num_layers,
                                num_units,
                                gamma,
                                tau,
                                priori_replay,
                                alpha=alpha,
                                max_step=num_episodes * max_episode_len,
                                initial_beta=beta,
                                num_atoms=num_atoms,
                                min_val=min_val,
                                max_val=max_val,
                                _run=_run)
        elif good_policy == 'masac':
            agent = MASACAgent(env.observation_space,
                               env.action_space,
                               agent_idx,
                               batch_size,
                               buff_size,
                               lr,
                               num_layers,
                               num_units,
                               gamma,
                               tau,
                               priori_replay,
                               alpha=alpha,
                               max_step=num_episodes * max_episode_len,
                               initial_beta=beta,
                               entropy_coeff=entropy_coeff,
                               policy_update_freq=policy_update_rate,
                               _run=_run)
        else:
            raise RuntimeError('Invalid Class')
        agents.append(agent)
    print('Using good policy {} and adv policy {}'.format(
        good_policy, adv_policy))
    return agents
Exemplo n.º 3
0
def create_agent(alg_name, index: int, env: MultiAgentEnv, exp):
    conf = exp.config
    if alg_name == 'maddpg':
        ret_agent = MADDPGAgent(env.observation_space,
                                env.action_space,
                                index,
                                conf.batch_size,
                                conf.buff_size,
                                conf.lr,
                                conf.num_layers,
                                conf.num_units,
                                conf.gamma,
                                conf.tau,
                                conf.priori_replay,
                                alpha=conf.alpha,
                                max_step=conf.num_episodes *
                                conf.max_episode_len,
                                initial_beta=conf.beta,
                                _run=exp)
    elif alg_name == 'matd3':
        ret_agent = MATD3Agent(
            env.observation_space,
            env.action_space,
            index,
            conf.batch_size,
            conf.buff_size,
            conf.lr,
            conf.num_layers,
            conf.num_units,
            conf.gamma,
            conf.tau,
            conf.priori_replay,
            alpha=conf.alpha,
            max_step=conf.num_episodes * conf.max_episode_len,
            initial_beta=conf.beta,
            policy_update_freq=conf.policy_update_rate,
            target_policy_smoothing_eps=conf.critic_action_noise_stddev,
            _run=exp)
    elif alg_name == 'mad3pg':
        ret_agent = MAD3PGAgent(env.observation_space,
                                env.action_space,
                                index,
                                conf.batch_size,
                                conf.buff_size,
                                conf.lr,
                                conf.num_layers,
                                conf.num_units,
                                conf.gamma,
                                conf.tau,
                                conf.priori_replay,
                                alpha=conf.alpha,
                                max_step=conf.num_episodes *
                                conf.max_episode_len,
                                initial_beta=conf.beta,
                                num_atoms=conf.num_atoms,
                                min_val=conf.min_val,
                                max_val=conf.max_val,
                                _run=exp)
    elif alg_name == 'masac':
        ret_agent = MASACAgent(env.observation_space,
                               env.action_space,
                               index,
                               conf.batch_size,
                               conf.buff_size,
                               conf.lr,
                               conf.num_layers,
                               conf.num_units,
                               conf.gamma,
                               conf.tau,
                               conf.priori_replay,
                               alpha=conf.alpha,
                               max_step=conf.num_episodes *
                               conf.max_episode_len,
                               initial_beta=conf.beta,
                               entropy_coeff=conf.entropy_coeff,
                               policy_update_freq=conf.policy_update_rate,
                               _run=exp)
    else:
        raise RuntimeError(f'Invalid Class - {alg_name} is unknown')

    return ret_agent