from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios import torch import numpy as np from agent import DDPGAgent from maddpg import MADDPG from utils import MultiAgentReplayBuffer def make_env(scenario_name, benchmark=False): # load scenario from script scenario = scenarios.load(scenario_name + ".py").Scenario() # create world world = scenario.make_world() # create multiagent environment if benchmark: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) else: env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) return env env = make_env(scenario_name="simple_spread") ma_controller = MADDPG(env, 1000000) ma_controller.run(500, 300, 32)
self.reset() def reset(self): self.states = self.env.reset() self.step = 0 def get_actions(self, states): actions = [] for i in range(self.num_agents): action = self.agents[i].get_action(states[i]) actions.append(action) return actions def run(self): for i in range(200): actions = self.get_actions(self.states) next_states, rewards, dones, _ = self.env.step(actions) self.env.render() if all(dones) or self.step == 199:# cfg.max_episode_len - 1: self.reset() break else: dones = [0 for _ in range(self.num_agents)] self.states = next_states self.step += 1 if __name__ == '__main__': maddpg = MADDPG() maddpg.run()