def make_env(scenario_name, benchmark=False): ''' Creates a PersonalAgentEnv object as env. This can be used similar to a gym environment by calling env.reset() and env.step(). Use env.render() to view the environment on the screen. Input: scenario_name : name of the scenario from ./scenarios/ to be Returns (without the .py extension) benchmark : whether you want to produce benchmarking data (usually only done during evaluation) Some useful env properties (see environment.py): .observation_space : Returns the observation space for each agent .action_space : Returns the action space for each agent .n : Returns the number of Agents ''' from particles.environment import PersonalAgentEnv import particles.scenarios as scenarios # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment if benchmark: env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env
if args.specific_agents != '': specific_agents = args.specific_agents.split(' ') else: specific_agents = None scenario = scenarios.load(args.scenario).Scenario( kind=args.personalization, num_agents=args.num_agents, seed=args.seed, load_agents=load_agents, save_agents=None, specific_agents=specific_agents) # create world world = scenario.make_world() world.episode_len = args.episode_len env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, done_callback=scenario.done, shared_viewer=True) env.discrete_action_input = True env.render() policies = [Reinforce(i, env.observation_space[i].shape[0], env.action_space[0].n) for i in range(env.n)] policies[0].load_state_dict(torch.load( './trained_models/' + args.trained_model)) eps = np.finfo(np.float32).eps.item() obs_n = env.reset() running_reward = 10
support_agents = args.specific_agents.split(' ') # for agent in support_agents: # scenario = scenarios.load(args.scenario).Scenario( # kind=args.personalization, num_agents=args.num_agents, seed=args.seed, # load_agents=load_agents) scenario = scenarios.load(args.scenario).Scenario( kind=args.personalization, num_agents=args.num_agents, seed=args.seed, load_agents=load_agents, specific_agents=support_agents) world = scenario.make_world() world.episode_len = args.episode_len env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, done_callback=scenario.done, shared_viewer=True) env.discrete_action_input = True env.seed(args.seed) policies = [model(i, env.observation_space[i].shape[0], env.action_space[0].n) for i in range(env.n)] if args.optimizer == 'Adam': optimizer = optim.Adam(policies[0].parameters(), lr=args.lr) elif args.optimizer == 'SGD': optimizer = optim.SGD(policies[0].parameters(), lr=args.lr) else: raise NotImplementedError scenario.sample_task = True # Start off true
# load scenario from script scenario = scenarios.load(args.scenario).Scenario( kind=args.personalization, num_agents=args.num_agents, seed=args.seed, load_agents=None, save_agents=None, specific_agents=None) # create world world = scenario.make_world() world.episode_len = args.episode_len # create multiagent environment env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, done_callback=scenario.done, shared_viewer=True) # render call to create viewer window (necessary only for interactive policies) env.render() # create interactive policies for each agent policies = [InteractivePolicy(env, i) for i in range(env.n)] # execution loop obs_n = env.reset() for n in range(100): t = 0 env.reset() while t < args.episode_len: ep_reward = 0 # query for action from each agent's policy