def create_agent(alg_name): if alg_name == 'maddpg': ret_agent = MADDPGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, _run=_run) elif alg_name == 'matd3': ret_agent = MATD3Agent( env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, policy_update_freq=policy_update_rate, target_policy_smoothing_eps=critic_action_noise_stddev, _run=_run) elif alg_name == 'mad3pg': ret_agent = MAD3PGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, num_atoms=num_atoms, min_val=min_val, max_val=max_val, _run=_run) elif alg_name == 'masac': ret_agent = MASACAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, entropy_coeff=entropy_coeff, policy_update_freq=policy_update_rate, _run=_run) else: raise RuntimeError('Invalid Class') return ret_agent
def get_agents(_run, env, num_adversaries, good_policy, adv_policy, lr, batch_size, buff_size, num_units, num_layers, gamma, tau, priori_replay, alpha, num_episodes, max_episode_len, beta, policy_update_rate, critic_action_noise_stddev, entropy_coeff, num_atoms, min_val, max_val) -> List[AbstractAgent]: """ This function generates the agents for the environment. The parameters are meant to be filled by sacred, and are therefore documented in the configuration function train_config. :returns List[AbstractAgent] returns a list of instantiated agents """ agents = [] for agent_idx in range(num_adversaries): if adv_policy == 'maddpg': agent = MADDPGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, _run=_run) elif adv_policy == 'matd3': agent = MATD3Agent( env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, policy_update_freq=policy_update_rate, target_policy_smoothing_eps=critic_action_noise_stddev, _run=_run) elif adv_policy == 'mad3pg': agent = MAD3PGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, num_atoms=num_atoms, min_val=min_val, max_val=max_val, _run=_run) elif good_policy == 'masac': agent = MASACAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, entropy_coeff=entropy_coeff, policy_update_freq=policy_update_rate, _run=_run) else: raise RuntimeError('Invalid Class') agents.append(agent) for agent_idx in range(num_adversaries, num_adversaries + env.n_good_agents - 1): if good_policy == 'maddpg': agent = MADDPGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, _run=_run) elif good_policy == 'matd3': agent = MATD3Agent( env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, policy_update_freq=policy_update_rate, target_policy_smoothing_eps=critic_action_noise_stddev, _run=_run) elif adv_policy == 'mad3pg': agent = MAD3PGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, num_atoms=num_atoms, min_val=min_val, max_val=max_val, _run=_run) elif good_policy == 'masac': agent = MASACAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, entropy_coeff=entropy_coeff, policy_update_freq=policy_update_rate, _run=_run) else: raise RuntimeError('Invalid Class') agents.append(agent) for agent_idx in range(num_adversaries + env.n_good_agents, env.n): if good_policy == 'maddpg': agent = MADDPGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, _run=_run) elif good_policy == 'matd3': agent = MATD3Agent( env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, policy_update_freq=policy_update_rate, target_policy_smoothing_eps=critic_action_noise_stddev, _run=_run) elif adv_policy == 'mad3pg': agent = MAD3PGAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, num_atoms=num_atoms, min_val=min_val, max_val=max_val, _run=_run) elif good_policy == 'masac': agent = MASACAgent(env.observation_space, env.action_space, agent_idx, batch_size, buff_size, lr, num_layers, num_units, gamma, tau, priori_replay, alpha=alpha, max_step=num_episodes * max_episode_len, initial_beta=beta, entropy_coeff=entropy_coeff, policy_update_freq=policy_update_rate, _run=_run) else: raise RuntimeError('Invalid Class') agents.append(agent) print('Using good policy {} and adv policy {}'.format( good_policy, adv_policy)) return agents
def create_agent(alg_name, index: int, env: MultiAgentEnv, exp): conf = exp.config if alg_name == 'maddpg': ret_agent = MADDPGAgent(env.observation_space, env.action_space, index, conf.batch_size, conf.buff_size, conf.lr, conf.num_layers, conf.num_units, conf.gamma, conf.tau, conf.priori_replay, alpha=conf.alpha, max_step=conf.num_episodes * conf.max_episode_len, initial_beta=conf.beta, _run=exp) elif alg_name == 'matd3': ret_agent = MATD3Agent( env.observation_space, env.action_space, index, conf.batch_size, conf.buff_size, conf.lr, conf.num_layers, conf.num_units, conf.gamma, conf.tau, conf.priori_replay, alpha=conf.alpha, max_step=conf.num_episodes * conf.max_episode_len, initial_beta=conf.beta, policy_update_freq=conf.policy_update_rate, target_policy_smoothing_eps=conf.critic_action_noise_stddev, _run=exp) elif alg_name == 'mad3pg': ret_agent = MAD3PGAgent(env.observation_space, env.action_space, index, conf.batch_size, conf.buff_size, conf.lr, conf.num_layers, conf.num_units, conf.gamma, conf.tau, conf.priori_replay, alpha=conf.alpha, max_step=conf.num_episodes * conf.max_episode_len, initial_beta=conf.beta, num_atoms=conf.num_atoms, min_val=conf.min_val, max_val=conf.max_val, _run=exp) elif alg_name == 'masac': ret_agent = MASACAgent(env.observation_space, env.action_space, index, conf.batch_size, conf.buff_size, conf.lr, conf.num_layers, conf.num_units, conf.gamma, conf.tau, conf.priori_replay, alpha=conf.alpha, max_step=conf.num_episodes * conf.max_episode_len, initial_beta=conf.beta, entropy_coeff=conf.entropy_coeff, policy_update_freq=conf.policy_update_rate, _run=exp) else: raise RuntimeError(f'Invalid Class - {alg_name} is unknown') return ret_agent