def make_env(args): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(args.scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) # env = MultiAgentEnv(world) args.n_players = env.n # The number of all players including enemies args.n_agents = env.n - args.num_adversaries # The number of players that need to be controlled, although the enemy can also be controlled, but if both parties learn, different algorithms are required args.obs_shape = [ env.observation_space[i].shape[0] for i in range(args.n_agents) ] # Each dimension represents the obs dimension of the agent action_shape = [] for content in env.action_space: action_shape.append(content.n) args.action_shape = action_shape[:args. n_agents] # Each dimension represents the act dimension of the agent args.high_action = 1 args.low_action = -1 return env, args
def make_env(scenario_name, arglist, benchmark=False, agent_size=0.1): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world(num_agents=arglist.num_agents, agent_size=agent_size) # check if done function exists try: done_cb = scenario.done except: done_cb = lambda agent, world: False # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data, done_cb) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=done_cb) return env
def __init__(self, n_agents, seed=None, state_last_action=False, obs_last_action=False, obs_use_simple_scheme=False): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios self._seed = seed self.scenario = scenarios.load("simple_spread.py").SpreadScenario( n_agents, seed) self.world = self.scenario.make_world() self.env = MultiAgentEnv(self.world, self.scenario.reset_world, self.scenario.reward, self.scenario.observation) self.env.discrete_action_input = True self.n_agents = n_agents self.n_actions = self.world.dim_p * 2 + 1 self.episode_limit = 25 self.state_last_action = state_last_action self.obs_last_action = obs_last_action self.obs_use_simple_scheme = obs_use_simple_scheme self._episode_steps = 0 self.last_action = [ np.zeros(self.n_actions) for _ in range(self.n_agents) ] super(SpreadEnv, self).__init__()
def make_env(scenario_name, benchmark=False): ''' Creates a MultiAgentEnv object as env. This can be used similar to a gym environment by calling env.reset() and env.step(). Use env.render() to view the environment on the screen. Input: scenario_name : name of the scenario from ./scenarios/ to be Returns (without the .py extension) benchmark : whether you want to produce benchmarking data (usually only done during evaluation) Some useful env properties (see environment.py): .observation_space : Returns the observation space for each agent .action_space : Returns the action space for each agent .n : Returns the number of Agents ''' from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def make_env(env_name='love_chase'): scenario = scenarios.load(env_name + '.py').Scenario() world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) env.discrete_action_input = True return env
def create_env(scenario_name, scenario_parameters={}): """ Creates a multi-agent enviroment based on the scenario name and parameters :param scenario_name: string (e.g. 'simple_tag') :param scenario_parameters: dictionary (e.g. {'num_agents': 2, 'num_adversaries': 1} :return: a MultiAgentEnv representing the scenario created with the specified parameters """ scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world(**scenario_parameters) env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, discrete_action_input=True, discrete_action_space=True) env.observation_structures = calculate_observation_structure( scenario_name, env) # experiment with unshared reward env.shared_reward = True env.episode_limit = 25 for a in env.agents: a.adversary = getattr(a, 'adversary', False) return env
def make_env(args): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(args.scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) # env = MultiAgentEnv(world) args.n_players = env.n # 包含敌人的所有玩家个数 args.n_agents = env.n - args.num_adversaries # 需要操控的玩家个数,虽然敌人也可以控制,但是双方都学习的话需要不同的算法 args.obs_shape = [ env.observation_space[i].shape[0] for i in range(args.n_agents) ] # 每一维代表该agent的obs维度 action_shape = [] for content in env.action_space: action_shape.append(content.n) args.action_shape = action_shape[:args.n_agents] # 每一维代表该agent的act维度 args.high_action = 1 args.low_action = -1 return env, args
def _make_env(): """Load multi-agent particle environment This code is modified from: https://github.com/openai/maddpg/blob/master/experiments/train.py """ # Check github branch check_github(path="./thirdparty/multiagent-particle-envs", branch_name="predator_prey") # Load multi-agent particle env scenario = scenarios.load(args.env_name + ".py").Scenario() world = scenario.make_world(n_prey=args.n_prey, n_predator=args.n_predator) done_callback = scenario.done_callback env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, done_callback=done_callback) print("i_worker:", i_worker) env.seed(i_worker) assert env.discrete_action_space is False, "For cont. action, this flag must be False" assert env.shared_reward is False, "For predator-prey, this must be False" return env
def make_env(scenario_name, arglist, benchmark=False): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment def done_callback(agent, world): if hasattr(world, 'is_scenareo_over'): return world.is_scenareo_over(agent, world) return False if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data, done_callback=done_callback) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=done_callback) #env = BatchMultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def make_env_vec(scenario_name, arglist, benchmark=False): from multiagent.environment_vec import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data, seed_callback=scenario.seed, cam_range=scenario.world_radius) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, seed_callback=scenario.seed, cam_range=scenario.world_radius) return env
def make_env(args): # load scenario from script scenario = scenarios.load(args.scenario_name + ".py").Scenario() # env setup world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) # add to args args.num_agents = env.n args.obs_dim_arr = [ env.observation_space[i].shape[0] for i in range(args.num_agents) ] args.act_dim = env.action_space[0].n args.num_friends = args.num_agents - args.num_adversaries # args.action_bound_max = 1 # args.action_bound_min = -1 print('-' * 100) print(f'num of agent:\t\t {env.n}') print(f'num of friends:\t\t {args.num_friends}') print(f'obs dim:\t\t {args.obs_dim_arr}') print(f'action dim:\t\t {args.act_dim}') print(f'len action space:\t {len(env.action_space)}') print(f'len obs space:\t\t {len(env.observation_space)}') print('-' * 100) return env, args
def make_env(self, env_id, seed, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0, info_keywords=()): """ Create a wrapped, monitored gym.Env for safety. """ scenario = scenarios.load('{}.py'.format(env_id)).Scenario() world = scenario.make_world() env_dict = { "world": world, 'reset_callback': scenario.reset_world, 'reward_callback': scenario.reward, 'observation_callback': scenario.observation, 'info_callback': None, 'done_callback': scenario.done, 'shared_viewer': True } env = gym.make('MultiAgent-v0', **env_dict) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True, info_keywords=info_keywords) env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_env(scenario_name, benchmark=False, rank=-1, seed=0): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment # print(world.agents) if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data, name=scenario_name) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, name=scenario_name) # env.seed = np.random.randint(1000) env.seed = (seed + rank) env.ID = rank env.name = scenario_name return env
def make_env(env_name, scenario_name, arglist, benchmark=False): if env_name == "particle": from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world if not arglist.partial_obs: world = scenario.make_world() else: world = scenario.make_world(args=arglist) # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: if not arglist.partial_obs: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.full_observation) elif env_name == "multiagent_mujoco": from envs.multiagent_mujoco.mujoco_multi import MujocoMulti kwargs = {"scenario": arglist.scenario, "agent_obsk": arglist.agent_obsk, "k_categories": arglist.k_categories, "env_version": arglist.env_version, "agent_conf": arglist.agent_conf, "obs_add_global_pos": arglist.obs_add_global_pos, "episode_limit": arglist.max_episode_len} env = MujocoMulti(env_args=kwargs) print("ENV TOTAL ACTION SPACE: {}", env.action_space) return env
def make_env(scenario_name, benchmark=False): scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def make_env(scenario_name, benchmark=False): scenario_names = [ "simple", "simple_adversary", "simplecrypto", "simple_push", "simple_reference", "simple_speaker_listener", "simple_spread", "simple_tag", "simple_world_comm" ] def load(name): pathname = osp.join(osp.dirname(__file__), name) return imp.load_source('', pathname) # load scenario from script if scenario_name in scenario_names: scenario = scenarios.load(scenario_name + ".py").Scenario() else: scenario = load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def create_environment(self): """ Create environment instance :return: environment (gym interface), env_name, task_name, n_agents, observation_sizes, action_sizes, discrete_actions """ # load scenario from script if self.arglist.partial_observable: scenario = scenarios.load(self.arglist.scenario + "_partial_observable.py").POScenario() elif self.arglist.observation_noise: scenario = scenarios.load(self.arglist.scenario + "_observation_noise.py").ONScenario() elif self.arglist.environment_noise: scenario = scenarios.load(self.arglist.scenario + "_env_noise.py").ENScenario() else: scenario = scenarios.load(self.arglist.scenario + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) env_name = "mape" task_name = "mape_" + self.arglist.scenario n_agents = env.n print("Observation spaces: ", [env.observation_space[i] for i in range(n_agents)]) print("Action spaces: ", [env.action_space[i] for i in range(n_agents)]) observation_sizes = self.extract_sizes(env.observation_space) action_sizes = self.extract_sizes(env.action_space) discrete_actions = True return ( env, env_name, task_name, n_agents, observation_sizes, action_sizes, discrete_actions, )
def makeMultiAgentEnv(): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios scenario = scenarios.load("simple_tag.py").Scenario() world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def main(): parser = get_parser() args = vars(parser.parse_args()) agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) training_config = configs.get_training_config(args) print(f'\n training_conf: \t{training_config}') print(f'\n game conf: \t\t{game_config}') print(f'\n agent conf: \t\t{agent_config}\n') # an agent composed of modules (processing, goal_predicting, word_counting, action) # agent = AgentAdapted(agent_config) agent = MyAgent(agent_config) scenario = scenarios.load('custom/custom_no_comm.py').Scenario() scenario.setup(num_agents=2, num_landmarks=3) world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer=False) env.render() sleep(1) # create interactive policies for each agent policies = [RandomPolicy(env, i) for i in range(env.n)] # execution loop for epoch in range(training_config.num_epochs): # randomly place the agent(s) obs_n = env.reset() for step in range(agent_config.time_horizon): # query for action from each agent's policy act_n = [] for i, policy in enumerate(policies): act_n.append(policy.action(obs_n[i])) # step environment obs_n, reward_n, done_n, _ = env.step(act_n) # ----------------------------------- # all agents make step here agent.make_step(game_config.batch_size, num_agents=env.n, num_entities=len(env.world.entities), observations=obs_n) env.render() print('done')
def make_env(scenario_name, benchmark=False): # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment env = scenario.get_env(world, scenario.reset_world, scenario.reward, scenario.observation, done_callback=scenario.done) return env
def make_multiagent_env(scenario_name, benchmark=False): scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world() if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def make_env(scenario_name): from multiagent.env_CTA_sim import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script world = scenarios.load(scenario_name + ".py").World() world.make_world() # create multiagent environment env = MultiAgentEnv(world) return env
def make_env(scenario_name, arglist, benchmark=False): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def _make_env(scenario_name): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world() env = MultiAgentEnv( world, scenario.reset_world, scenario.reward, scenario.observation ) return env
def make_env(scenario_name, benchmark=False, use_discrete_action=False, use_max_speed=False, world_params=None): ''' Creates a MultiAgentEnv object as env. This can be used similar to a gym environment by calling env.reset() and env.step(). Use env.render() to view the environment on the screen. Input: scenario_name : name of the scenario from ./scenarios/ to be Returns (without the .py extension) benchmark : whether you want to produce benchmarking data (usually only done during evaluation) Some useful env properties (see environment.py): .observation_space : Returns the observation space for each agent .action_space : Returns the action space for each agent .n : Returns the number of Agents ''' from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios if world_params is None: world_params = {} else: assert type(world_params) is dict # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world(**world_params) # create multiagent environment if benchmark: env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, done_callback=scenario.check_if_done, post_step_callback=scenario.post_step, discrete_action=use_discrete_action, use_max_speed=use_max_speed, info_callback=scenario.benchmark_data) else: env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, done_callback=scenario.check_if_done, post_step_callback=scenario.post_step, discrete_action=use_discrete_action, use_max_speed=use_max_speed) if all([hasattr(a, 'adversary') for a in env.agents]): env.agent_types = ['adversary' if a.adversary else 'agent' for a in env.agents] else: env.agent_types = ['agent' for _ in env.agents] return env
def make_env(scenario_name, benchmark=False, discrete_action=False): ''' Creates a MultiAgentEnv object as env. This can be used similar to a gym environment by calling env.reset() and env.step(). Use env.render() to view the environment on the screen. Input: scenario_name : name of the scenario from ./scenarios/ to be Returns (without the .py extension) benchmark : whether you want to produce benchmarking data (usually only done during evaluation) Some useful env properties (see environment.py): .observation_space : Returns the observation space for each agent .action_space : Returns the action space for each agent .n : Returns the number of Agents ''' from multiagent.environment import MultiAgentEnv import multiagent.scenarios as old_scenarios import envs.mpe_scenarios as new_scenarios #Load scenario from script try: scenario = old_scenarios.load(scenario_name + ".py").Scenario() except: scenario = new_scenarios.load(scenario_name + ".py").Scenario() #Create world world = scenario.make_world() #Create multiagent environment if hasattr(scenario, 'post_step'): post_step = scenario.post_step else: post_step = None if benchmark: env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, post_step_callback=post_step, info_callback=scenario.benchmark_data, discrete_action=discrete_action, done_callback=scenario.game_done) else: env = MultiAgentEnv(world, reset_callback=scenario.reset_world, reward_callback=scenario.reward, observation_callback=scenario.observation, post_step_callback=post_step, discrete_action=discrete_action, done_callback=scenario.game_done) return env
def make_env(scenario_name, benchmark=False): """ create the environment from script """ scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world() if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
def create_env(self, num_agents): scenario = scenarios.load('new_env.py').Scenario( num_agents, self.max_edge) world = scenario.make_world() env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer=False) obs_n = env.reset() return env, obs_n[0]
def make_env(scenario_name, arglist, benchmark=False, mat_scene = -1): from multiagent.environment import MultiAgentEnv from multiagent.environment import MatlabMultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script if arglist.scenario == 'matlab_simple_spread_assigned': scenario = scenarios.load(scenario_name + ".py").MATLAB_Scenario() else: scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world(mat_scene = mat_scene) # create multiagent environment if benchmark: #env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) env = MatlabMultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: #env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) env = MatlabMultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env, scenario
def make_env(): from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios # load scenario from script scenario = scenarios.load("simple_spread.py").Scenario() # create world world = scenario.make_world() # create openai_multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
import os,sys sys.path.insert(1, os.path.join(sys.path[0], '..')) import argparse from multiagent.environment import MultiAgentEnv from multiagent.policy import InteractivePolicy import multiagent.scenarios as scenarios if __name__ == '__main__': # parse arguments parser = argparse.ArgumentParser(description=None) parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.') args = parser.parse_args() # load scenario from script scenario = scenarios.load(args.scenario).Scenario() # create world world = scenario.make_world() # create multiagent environment env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False) # render call to create viewer window (necessary only for interactive policies) env.render() # create interactive policies for each agent policies = [InteractivePolicy(env,i) for i in range(env.n)] # execution loop obs_n = env.reset() while True: # query for action from each agent's policy act_n = [] for i, policy in enumerate(policies): act_n.append(policy.action(obs_n[i]))