def gen_config(**kwargs): scenario_path = Path(kwargs["scenario"]).absolute() agent_missions_count = Scenario.discover_agent_missions_count( scenario_path) if agent_missions_count == 0: agent_ids = ["default_policy"] else: agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)] config = load_config(kwargs["config_file"], mode=kwargs.get("mode", "training")) agents = {agent_id: AgentSpec(**config["agent"]) for agent_id in agent_ids} config["env_config"].update({ "seed": 42, "scenarios": [str(scenario_path)], "headless": kwargs["headless"], "agent_specs": agents, }) obs_space, act_space = config["policy"][1:3] tune_config = config["run"]["config"] if kwargs["paradigm"] == "centralized": config["env_config"].update({ "obs_space": gym.spaces.Tuple([obs_space] * agent_missions_count), "act_space": gym.spaces.Tuple([act_space] * agent_missions_count), "groups": { "group": agent_ids }, }) tune_config.update(config["policy"][-1]) else: policies = {} for k in agents: policies[k] = config["policy"][:-1] + ({ **config["policy"][-1], "agent_id": k }, ) tune_config.update({ "multiagent": { "policies": policies, "policy_mapping_fn": lambda agent_id: agent_id, } }) return config
def main( scenario, config_file, checkpoint, num_steps=1000, num_episodes=10, paradigm="decentralized", headless=False, ): scenario_path = Path(scenario).absolute() agent_missions_count = Scenario.discover_agent_missions_count(scenario_path) if agent_missions_count == 0: agent_ids = ["default_policy"] else: agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)] config = load_config(config_file, mode="evaluate") agents = { agent_id: AgentSpec( **config["agent"], interface=AgentInterface(**config["interface"]) ) for agent_id in agent_ids } config["env_config"].update( { "seed": 42, "scenarios": [str(scenario_path)], "headless": headless, "agent_specs": agents, } ) obs_space, act_space = config["policy"][1:3] tune_config = config["run"]["config"] if paradigm == "centralized": config["env_config"].update( { "obs_space": gym.spaces.Tuple([obs_space] * agent_missions_count), "act_space": gym.spaces.Tuple([act_space] * agent_missions_count), "groups": {"group": agent_ids}, } ) tune_config.update(config["policy"][-1]) else: policies = {} for k in agents: policies[k] = config["policy"][:-1] + ( {**config["policy"][-1], "agent_id": k}, ) tune_config.update( { "multiagent": { "policies": policies, "policy_mapping_fn": lambda agent_id: agent_id, } } ) ray.init() trainer_cls = config["trainer"] trainer_config = {"env_config": config["env_config"]} if paradigm != "centralized": trainer_config.update({"multiagent": tune_config["multiagent"]}) else: trainer_config.update({"model": tune_config["model"]}) trainer = trainer_cls(env=tune_config["env"], config=trainer_config) trainer.restore(checkpoint) rollout(trainer, None, num_steps, num_episodes) trainer.stop()
def __init__(self, load_path, algorithm, policy_name, yaml_path): load_path = str(load_path) if algorithm == "ppo": from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy as LoadPolicy elif algorithm in "a2c": from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy as LoadPolicy from ray.rllib.agents.a3c import DEFAULT_CONFIG elif algorithm == "pg": from ray.rllib.agents.pg.pg_tf_policy import PGTFPolicy as LoadPolicy elif algorithm == "dqn": from ray.rllib.agents.dqn import DQNTFPolicy as LoadPolicy elif algorithm == "maac": from benchmark.agents.maac.tf_policy import CA2CTFPolicy as LoadPolicy from benchmark.agents.maac.tf_policy import DEFAULT_CONFIG elif algorithm == "maddpg": from benchmark.agents.maddpg.tf_policy import MADDPG2TFPolicy as LoadPolicy from benchmark.agents.maddpg.tf_policy import DEFAULT_CONFIG elif algorithm == "mfac": from benchmark.agents.mfac.tf_policy import MFACTFPolicy as LoadPolicy from benchmark.agents.mfac.tf_policy import DEFAULT_CONFIG elif algorithm == "networked_pg": from benchmark.agents.networked_pg.tf_policy import ( NetworkedPG as LoadPolicy, ) from benchmark.agents.networked_pg.tf_policy import ( PG_DEFAULT_CONFIG as DEFAULT_CONFIG, ) else: raise ValueError(f"Unsupported algorithm: {algorithm}") yaml_path = BASE_DIR / yaml_path load_path = BASE_DIR / f"log/results/run/{load_path}" config = load_config(yaml_path) observation_space = config["policy"][1] action_space = config["policy"][2] pconfig = DEFAULT_CONFIG pconfig["model"].update(config["policy"][-1].get("model", {})) pconfig["agent_id"] = policy_name self._prep = ModelCatalog.get_preprocessor_for_space(observation_space) self._sess = tf.Session(graph=tf.get_default_graph()) with tf.name_scope(policy_name): # Observation space needs to be flattened before passed to the policy flat_obs_space = self._prep.observation_space policy = LoadPolicy(flat_obs_space, action_space, pconfig) self._sess.run(tf.global_variables_initializer()) objs = pickle.load(open(load_path, "rb")) objs = pickle.loads(objs["worker"]) state = objs["state"] weights = state[policy_name] policy.set_weights(weights) # for op in tf.get_default_graph().get_operations(): # print(str(op.name)) # These tensor names were found by inspecting the trained model if algorithm == "ppo": # CRUCIAL FOR SAFETY: # We use Tensor("split") instead of Tensor("add") to force # PPO to be deterministic. self._input_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/observation:0" ) self._output_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/split:0" ) elif algorithm == "dqn": self._input_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/observations:0" ) self._output_node = tf.argmax( self._sess.graph.get_tensor_by_name( f"{policy_name}/value_out/BiasAdd:0" ), axis=1, ) elif algorithm == "maac": self._input_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/policy-inputs:0" ) self._output_node = tf.argmax( self._sess.graph.get_tensor_by_name( f"{policy_name}/logits_out/BiasAdd:0" ), axis=1, ) elif algorithm == "maddpg": self._input_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/obs_2:0" ) self._output_node = tf.argmax( self._sess.graph.get_tensor_by_name( f"{policy_name}/actor/AGENT_2_actor_RelaxedOneHotCategorical_1/sample/AGENT_2_actor_exp/forward/Exp:0" ) ) else: self._input_node = self._sess.graph.get_tensor_by_name( f"{policy_name}/observations:0" ) self._output_node = tf.argmax( self._sess.graph.get_tensor_by_name(f"{policy_name}/fc_out/BiasAdd:0"), axis=1, )
def main( scenario, config_file, log_dir, restore_path=None, num_workers=1, horizon=1000, paradigm="decentralized", headless=False, cluster=False, ): if cluster: # ray.init(address="auto", redis_password="******",memory=500*1024*1024) ray.init(memory=500 * 1024 * 1024) print("--------------- Ray startup ------------\n{}".format( ray.state.cluster_resources())) scenario_path = Path(scenario).absolute() agent_missions_count = Scenario.discover_agent_missions_count( scenario_path) if agent_missions_count == 0: agent_ids = ["default_policy"] else: agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)] config = load_config(config_file) agents = { agent_id: AgentSpec(**config["agent"], interface=AgentInterface(**config["interface"])) for agent_id in agent_ids } config["env_config"].update({ "seed": 42, "scenarios": [str(scenario_path)], "headless": headless, "agent_specs": agents, }) obs_space, act_space = config["policy"][1:3] tune_config = config["run"]["config"] if paradigm == "centralized": config["env_config"].update({ "obs_space": Tuple([obs_space] * agent_missions_count), "act_space": Tuple([act_space] * agent_missions_count), "groups": { "group": agent_ids }, }) tune_config.update(config["policy"][-1]) else: policies = {} for k in agents: policies[k] = config["policy"][:-1] + ({ **config["policy"][-1], "agent_id": k }, ) tune_config.update({ "multiagent": { "policies": policies, "policy_mapping_fn": lambda agent_id: agent_id, }, }) tune_config.update({ "env_config": config["env_config"], "callbacks": SimpleCallbacks, "num_workers": num_workers, "horizon": horizon, }) experiment_name = EXPERIMENT_NAME.format( scenario=scenario_path.stem, n_agent=len(agents), ) log_dir = Path(log_dir).expanduser().absolute() / RUN_NAME log_dir.mkdir(parents=True, exist_ok=True) if restore_path is not None: restore_path = Path(restore_path).expanduser() print(f"Loading model from {restore_path}") # run experiments config["run"].update({ "run_or_experiment": config["trainer"], "name": experiment_name, "local_dir": str(log_dir), "restore": restore_path, }) analysis = tune.run(**config["run"]) print(analysis.dataframe().head())