def get_a3c_train(name, pols, env, logdir): config = { "multiagent": { "policies": pols, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": [name], }, # disable filters, otherwise we would need to synchronize those # as well to the DQN agent "observation_filter": "NoFilter", "callbacks": { "on_train_result": on_episode_end } } return A3CTrainer(env=env, config=config, logger_creator=lambda _: UnifiedLogger(config, logdir))
net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/a3c-4x4grid', use_gui=False, num_seconds=80000, time_to_load_vehicles=120, max_depart_delay=0, phases=[ traci.trafficlight.Phase(35, "GGGrrr"), # north-south traci.trafficlight.Phase(2, "yyyrrr"), traci.trafficlight.Phase(35, "rrrGGG"), # west-east traci.trafficlight.Phase(2, "rrryyy") ])) trainer = A3CTrainer( env="4x4grid", config={ "multiagent": { "policy_graphs": { '0': (A3CTFPolicy, spaces.Box(low=np.zeros(10), high=np.ones(10)), spaces.Discrete(2), {}) }, "policy_mapping_fn": lambda id: '0' # Traffic lights are always controlled by this policy }, "lr": 0.0001, }) while True: print(trainer.train()) # distributed training step
elif(train_algo == "A3C"): # A3C print("Training algorithm: A3C ") trainer = A3CTrainer( env=env_title, config={ "num_workers": num_workers, "num_cpus_per_worker": num_cpus_per_worker, "num_gpus": num_gpus, "num_gpus_per_worker": num_gpus_per_worker, "model": nw_model, "multiagent": { "policy_graphs": policy_graphs, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": ["agent_policy{}".format(i) for i in range(n_agents)], }, "callbacks": { "on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), "on_episode_end": tune.function(on_episode_end), }, "log_level": "ERROR", }) else: print("Unknown training algorithm")
"model": { "custom_model": "yaniv_mask", "fcnet_hiddens": [512, 512], }, "num_envs_per_worker": 1, "num_cpus_per_worker": args.cpus_per_worker, "num_cpus_for_driver": args.cpus_for_driver, "num_workers": 1, "evaluation_num_workers": args.num_workers, "evaluation_num_episodes": args.eval_num, "evaluation_interval": 1, } ray.init(include_dashboard=False, local_mode=False) trainer = A3CTrainer(env="yaniv", config=config) # models_path = "/scratch/student/models" models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models" models = os.listdir(models_path) results = {} def make_update_env_fn(env_conf): def update_env_conf(env): env.config.update(env_conf) env.game.configure(env.config) def update_env_fn(worker): worker.foreach_env(update_env_conf) return update_env_fn
def main(): parser = argparse.ArgumentParser() parser.add_argument("--eval-num", type=int, default=5) parser.add_argument("--eval-every", type=int, default=1) parser.add_argument("--num-workers", type=int, default=1) parser.add_argument("--cpus-per-worker", type=float, default=0.5) parser.add_argument("--cpus-for-driver", type=float, default=0.5) parser.add_argument("--address", type=str, default=None) parser.add_argument( "--model-path", type=str, default="/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models", ) parser.add_argument("--opponent", type=str, default="intermediate") args = parser.parse_args() register_env("yaniv", lambda config: YanivEnv(config)) ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel) if args.opponent == "intermediate": stepfn = intermediate_rule_step elif args.opponent == "novice": stepfn = novice_rule_step else: raise ValueError("opponent not defined: {}".format(args.opponent)) env_config = { "end_after_n_deck_replacements": 0, "end_after_n_steps": 130, "early_end_reward": 0, "use_scaled_negative_reward": True, "use_scaled_positive_reward": True, "max_negative_reward": -1, "negative_score_cutoff": 30, "single_step": False, "step_reward": 0, "use_unkown_cards_in_state": False, "use_dead_cards_in_state": True, "observation_scheme": 1, "n_players": 2, "state_n_players": 2, "player_step_fn": {"player_1": stepfn}, } env = YanivEnv(env_config) obs_space = env.observation_space act_space = env.action_space config = { "callbacks": YanivCallbacks, "num_gpus": 1, "env": "yaniv", "env_config": env_config, "framework": "torch", "multiagent": { "policies": { "policy_1": (None, obs_space, act_space, {}), }, "policy_mapping_fn": policy_mapping_fn, "policies_to_train": ["policy_1"], }, "model": { "custom_model": "yaniv_mask", "fcnet_hiddens": [512, 512], }, "num_envs_per_worker": 1, "num_cpus_per_worker": args.cpus_per_worker, "num_cpus_for_driver": args.cpus_for_driver, "num_workers": 1, "evaluation_num_workers": args.num_workers, "evaluation_num_episodes": args.eval_num, "evaluation_interval": 1, } ray.init(include_dashboard=False, address=args.address) trainer = A3CTrainer(env="yaniv", config=config) # models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models" # models_path = "/scratch/student/models" models_path = args.model_path models = os.listdir(models_path) results = [] for model in tqdm(sorted(models)): if not model.startswith("model"): print("idk", model) continue model_num = int(model[6:-4]) if model_num % args.eval_every != 0: continue path = os.path.join(models_path, model) with open(path, "rb") as f: policy = pickle.load(f) trainer.get_policy("policy_1").set_state(policy) metrics = trainer._evaluate() metrics["evaluation"].pop("hist_stats") stats = { k: v for k, v in metrics["evaluation"]["custom_metrics"].items() if k.endswith("mean") } stats["model_number"] = model_num tqdm.write( "model: {: <6}: win_mean: {}, episodes: {}".format( model_num, stats["player_0_win_mean"], metrics["evaluation"]["episodes_this_iter"], ) ) results.append(stats) with open("{}_vs_models_{}.json".format(args.opponent, args.eval_num), "w") as f: json.dump(results, f, indent=4)
return post_batch def get_policy_class_modified(config): if config["use_pytorch"]: raise NotImplementedError() else: return ANA3CTFPolicy ANA3CTFPolicy = A3CTFPolicy.with_updates(name="ANA3CTFPolicy", postprocess_fn=modified_postprocess) ANA3CTrainer = A3CTrainer.with_updates( name="ANA3C", default_policy=ANA3CTFPolicy, get_policy_class=get_policy_class_modified) if __name__ == '__main__': import yaml import argparse parser = argparse.ArgumentParser() parser.add_argument("--file", "-f", type=str, default="") parser.add_argument("--num-samples", type=int, default=5) args = parser.parse_args() if args.file: with open(args.file, "r") as f: config = yaml.safe_load(f) else:
register_env("2way-single-intersection", lambda _: SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml', route_file='nets/2way-single-intersection/single-intersection-gen.rou.xml', out_csv_name='outputs/2way-single-intersection/a3c-contexts', use_gui=False, num_seconds=100000, time_to_load_vehicles=120, max_depart_delay=0, phases=[ traci.trafficlight.Phase(32, "GGrrrrGGrrrr"), traci.trafficlight.Phase(2, "yyrrrryyrrrr"), traci.trafficlight.Phase(32, "rrGrrrrrGrrr"), traci.trafficlight.Phase(2, "rryrrrrryrrr"), traci.trafficlight.Phase(32, "rrrGGrrrrGGr"), traci.trafficlight.Phase(2, "rrryyrrrryyr"), traci.trafficlight.Phase(32, "rrrrrGrrrrrG"), traci.trafficlight.Phase(2, "rrrrryrrrrry") ])) trainer = A3CTrainer(env="2way-single-intersection", config={ "multiagent": { "policy_graphs": { '0': (A3CTFPolicy, spaces.Box(low=np.zeros(21), high=np.ones(21)), spaces.Discrete(4), {}) }, "policy_mapping_fn": policy_mapping # Traffic lights are always controlled by this policy }, "lr": 0.0001, }) while True: result = trainer.train() print(pretty_print(result))
# 'vf_share_layers': 'true', # 'num_gpus': 0, # 'lr': 2.5e-4, # 'log_level': 'DEBUG', # 'simple_optimizer': args.simple, # 'multiagent': { # 'policies': policies, # 'policy_mapping_fn': tune.function( # lambda agent_id: random.choice(["policy_0"])), # }, # }, # ) trainer = A3CTrainer( env="rcrsgymrllib", config={ "multiagent": { "policies": policies, "policy_mapping_fn": tune.function( lambda agent_id: random.choice(["policy_0"]) ), # Traffic lights are always controlled by this policy }, "lr": 0.0001, }) while True: result = trainer.train() print(pretty_print(result))
"""Experimental pipeline-based impl; run this with --run='A3C_pl'""" from ray.rllib.agents.a3c.a3c import A3CTrainer from ray.rllib.utils.experimental_dsl import (AsyncGradients, ApplyGradients, StandardMetricsReporting) def training_pipeline(workers, config): # For A3C, compute policy gradients remotely on the rollout workers. grads = AsyncGradients(workers) # Apply the gradients as they arrive. We set update_all to False so that # only the worker sending the gradient is updated with new weights. train_op = grads.for_each(ApplyGradients(workers, update_all=False)) return StandardMetricsReporting(train_op, workers, config) A3CPipeline = A3CTrainer.with_updates(training_pipeline=training_pipeline)
from ray.rllib.agents.a3c.a3c import A3CTrainer from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env from gym import spaces import numpy as np import sumo_rl import traci if __name__ == '__main__': ray.init() register_env("4x4grid", lambda _: PettingZooEnv(sumo_rl.env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/a3c', use_gui=False, num_seconds=80000))) trainer = A3CTrainer(env="4x4grid", config={ "multiagent": { "policies": { '0': (A3CTFPolicy, spaces.Box(low=np.zeros(11), high=np.ones(11)), spaces.Discrete(2), {}) }, "policy_mapping_fn": (lambda id: '0') # Traffic lights are always controlled by this policy }, "lr": 0.001, "no_done_at_end": True }) while True: print(trainer.train()) # distributed training step