Exemplo n.º 1
0
def get_a3c_train(name, pols, env, logdir):
    config = {
        "multiagent": {
            "policies": pols,
            "policy_mapping_fn": policy_mapping_fn,
            "policies_to_train": [name],
        },
        # disable filters, otherwise we would need to synchronize those
        # as well to the DQN agent
        "observation_filter": "NoFilter",
        "callbacks": {
            "on_train_result": on_episode_end
        }
    }
    return A3CTrainer(env=env,
                      config=config,
                      logger_creator=lambda _: UnifiedLogger(config, logdir))
Exemplo n.º 2
0
            net_file='nets/4x4-Lucas/4x4.net.xml',
            route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
            out_csv_name='outputs/4x4grid/a3c-4x4grid',
            use_gui=False,
            num_seconds=80000,
            time_to_load_vehicles=120,
            max_depart_delay=0,
            phases=[
                traci.trafficlight.Phase(35, "GGGrrr"),  # north-south
                traci.trafficlight.Phase(2, "yyyrrr"),
                traci.trafficlight.Phase(35, "rrrGGG"),  # west-east
                traci.trafficlight.Phase(2, "rrryyy")
            ]))

    trainer = A3CTrainer(
        env="4x4grid",
        config={
            "multiagent": {
                "policy_graphs": {
                    '0':
                    (A3CTFPolicy, spaces.Box(low=np.zeros(10),
                                             high=np.ones(10)),
                     spaces.Discrete(2), {})
                },
                "policy_mapping_fn": lambda id:
                '0'  # Traffic lights are always controlled by this policy
            },
            "lr": 0.0001,
        })
    while True:
        print(trainer.train())  # distributed training step
elif(train_algo == "A3C"):
  
  # A3C
  print("Training algorithm: A3C ")
  
  trainer = A3CTrainer(
              env=env_title,
              config={
                "num_workers": num_workers,
                "num_cpus_per_worker": num_cpus_per_worker,
                "num_gpus": num_gpus,
                "num_gpus_per_worker": num_gpus_per_worker,
                "model": nw_model,
                "multiagent": {
                  "policy_graphs": policy_graphs,
                  "policy_mapping_fn": policy_mapping_fn,
                  "policies_to_train": ["agent_policy{}".format(i) for i in range(n_agents)],
                },
                "callbacks": {
                  "on_episode_start": tune.function(on_episode_start),
                  "on_episode_step": tune.function(on_episode_step),
                  "on_episode_end": tune.function(on_episode_end),
                },
                "log_level": "ERROR",
              })

else:
  print("Unknown training algorithm")


Exemplo n.º 4
0
        "model": {
            "custom_model": "yaniv_mask",
            "fcnet_hiddens": [512, 512],
        },
        "num_envs_per_worker": 1,
        "num_cpus_per_worker": args.cpus_per_worker,
        "num_cpus_for_driver": args.cpus_for_driver,
        "num_workers": 1,
        "evaluation_num_workers": args.num_workers,
        "evaluation_num_episodes": args.eval_num,
        "evaluation_interval": 1,
    }

    ray.init(include_dashboard=False, local_mode=False)

    trainer = A3CTrainer(env="yaniv", config=config)
    # models_path = "/scratch/student/models"
    models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models"
    models = os.listdir(models_path)

    results = {}

    def make_update_env_fn(env_conf):
        def update_env_conf(env):
            env.config.update(env_conf)
            env.game.configure(env.config)

        def update_env_fn(worker):
            worker.foreach_env(update_env_conf)

        return update_env_fn
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--eval-num", type=int, default=5)
    parser.add_argument("--eval-every", type=int, default=1)
    parser.add_argument("--num-workers", type=int, default=1)
    parser.add_argument("--cpus-per-worker", type=float, default=0.5)
    parser.add_argument("--cpus-for-driver", type=float, default=0.5)
    parser.add_argument("--address", type=str, default=None)
    parser.add_argument(
        "--model-path",
        type=str,
        default="/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models",
    )
    parser.add_argument("--opponent", type=str, default="intermediate")
    args = parser.parse_args()

    register_env("yaniv", lambda config: YanivEnv(config))
    ModelCatalog.register_custom_model("yaniv_mask", YanivActionMaskModel)

    if args.opponent == "intermediate":
        stepfn = intermediate_rule_step
    elif args.opponent == "novice":
        stepfn = novice_rule_step
    else:
        raise ValueError("opponent not defined: {}".format(args.opponent))

    env_config = {
        "end_after_n_deck_replacements": 0,
        "end_after_n_steps": 130,
        "early_end_reward": 0,
        "use_scaled_negative_reward": True,
        "use_scaled_positive_reward": True,
        "max_negative_reward": -1,
        "negative_score_cutoff": 30,
        "single_step": False,
        "step_reward": 0,
        "use_unkown_cards_in_state": False,
        "use_dead_cards_in_state": True,
        "observation_scheme": 1,
        "n_players": 2,
        "state_n_players": 2,
        "player_step_fn": {"player_1": stepfn},
    }

    env = YanivEnv(env_config)
    obs_space = env.observation_space
    act_space = env.action_space

    config = {
        "callbacks": YanivCallbacks,
        "num_gpus": 1,
        "env": "yaniv",
        "env_config": env_config,
        "framework": "torch",
        "multiagent": {
            "policies": {
                "policy_1": (None, obs_space, act_space, {}),
            },
            "policy_mapping_fn": policy_mapping_fn,
            "policies_to_train": ["policy_1"],
        },
        "model": {
            "custom_model": "yaniv_mask",
            "fcnet_hiddens": [512, 512],
        },
        "num_envs_per_worker": 1,
        "num_cpus_per_worker": args.cpus_per_worker,
        "num_cpus_for_driver": args.cpus_for_driver,
        "num_workers": 1,
        "evaluation_num_workers": args.num_workers,
        "evaluation_num_episodes": args.eval_num,
        "evaluation_interval": 1,
    }

    ray.init(include_dashboard=False, address=args.address)
    trainer = A3CTrainer(env="yaniv", config=config)

    # models_path = "/home/jippo/ray_results/YanivTrainer_2021-05-02_16-44-14/YanivTrainer_yaniv_3ee8a_00000_0_2021-05-02_16-44-14/models"
    # models_path = "/scratch/student/models"
    models_path = args.model_path
    models = os.listdir(models_path)

    results = []

    for model in tqdm(sorted(models)):
        if not model.startswith("model"):
            print("idk", model)
            continue

        model_num = int(model[6:-4])

        if model_num % args.eval_every != 0:
            continue

        path = os.path.join(models_path, model)
        with open(path, "rb") as f:
            policy = pickle.load(f)

        trainer.get_policy("policy_1").set_state(policy)
        metrics = trainer._evaluate()
        metrics["evaluation"].pop("hist_stats")

        stats = {
            k: v
            for k, v in metrics["evaluation"]["custom_metrics"].items()
            if k.endswith("mean")
        }
        stats["model_number"] = model_num
        tqdm.write(
            "model: {: <6}: win_mean: {}, episodes: {}".format(
                model_num,
                stats["player_0_win_mean"],
                metrics["evaluation"]["episodes_this_iter"],
            )
        )
        results.append(stats)

    with open("{}_vs_models_{}.json".format(args.opponent, args.eval_num), "w") as f:
        json.dump(results, f, indent=4)
Exemplo n.º 6
0
    return post_batch


def get_policy_class_modified(config):
    if config["use_pytorch"]:
        raise NotImplementedError()
    else:
        return ANA3CTFPolicy


ANA3CTFPolicy = A3CTFPolicy.with_updates(name="ANA3CTFPolicy",
                                         postprocess_fn=modified_postprocess)

ANA3CTrainer = A3CTrainer.with_updates(
    name="ANA3C",
    default_policy=ANA3CTFPolicy,
    get_policy_class=get_policy_class_modified)

if __name__ == '__main__':
    import yaml
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--file", "-f", type=str, default="")
    parser.add_argument("--num-samples", type=int, default=5)
    args = parser.parse_args()

    if args.file:
        with open(args.file, "r") as f:
            config = yaml.safe_load(f)
    else:
    register_env("2way-single-intersection", lambda _: SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml',
                                                    route_file='nets/2way-single-intersection/single-intersection-gen.rou.xml',
                                                    out_csv_name='outputs/2way-single-intersection/a3c-contexts',
                                                    use_gui=False,
                                                    num_seconds=100000,
                                                    time_to_load_vehicles=120,
                                                    max_depart_delay=0,
                                                    phases=[
                                                        traci.trafficlight.Phase(32, "GGrrrrGGrrrr"),  
                                                        traci.trafficlight.Phase(2, "yyrrrryyrrrr"),
                                                        traci.trafficlight.Phase(32, "rrGrrrrrGrrr"),   
                                                        traci.trafficlight.Phase(2, "rryrrrrryrrr"),
                                                        traci.trafficlight.Phase(32, "rrrGGrrrrGGr"),   
                                                        traci.trafficlight.Phase(2, "rrryyrrrryyr"),
                                                        traci.trafficlight.Phase(32, "rrrrrGrrrrrG"), 
                                                        traci.trafficlight.Phase(2, "rrrrryrrrrry")
                                                        ]))

    trainer = A3CTrainer(env="2way-single-intersection", config={
        "multiagent": {
            "policy_graphs": {
                '0': (A3CTFPolicy, spaces.Box(low=np.zeros(21), high=np.ones(21)), spaces.Discrete(4), {})
            },
            "policy_mapping_fn": policy_mapping  # Traffic lights are always controlled by this policy
        },
        "lr": 0.0001,
    })
    
    while True:
        result = trainer.train()
        print(pretty_print(result))
Exemplo n.º 8
0
    #       'vf_share_layers': 'true',
    #       'num_gpus': 0,
    #       'lr': 2.5e-4,
    #       'log_level': 'DEBUG',
    #       'simple_optimizer': args.simple,
    #       'multiagent': {
    #           'policies': policies,
    #           'policy_mapping_fn': tune.function(
    #               lambda agent_id: random.choice(["policy_0"])),
    #       },
    #   },
    #  )

    trainer = A3CTrainer(
        env="rcrsgymrllib",
        config={
            "multiagent": {
                "policies":
                policies,
                "policy_mapping_fn":
                tune.function(
                    lambda agent_id: random.choice(["policy_0"])
                ),  # Traffic lights are always controlled by this policy
            },
            "lr": 0.0001,
        })

    while True:
        result = trainer.train()
        print(pretty_print(result))
Exemplo n.º 9
0
"""Experimental pipeline-based impl; run this with --run='A3C_pl'"""

from ray.rllib.agents.a3c.a3c import A3CTrainer
from ray.rllib.utils.experimental_dsl import (AsyncGradients, ApplyGradients,
                                              StandardMetricsReporting)


def training_pipeline(workers, config):
    # For A3C, compute policy gradients remotely on the rollout workers.
    grads = AsyncGradients(workers)

    # Apply the gradients as they arrive. We set update_all to False so that
    # only the worker sending the gradient is updated with new weights.
    train_op = grads.for_each(ApplyGradients(workers, update_all=False))

    return StandardMetricsReporting(train_op, workers, config)


A3CPipeline = A3CTrainer.with_updates(training_pipeline=training_pipeline)
Exemplo n.º 10
0
from ray.rllib.agents.a3c.a3c import A3CTrainer
from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
from ray.rllib.env import PettingZooEnv
from ray.tune.registry import register_env
from gym import spaces
import numpy as np
import sumo_rl
import traci


if __name__ == '__main__':
    ray.init()

    register_env("4x4grid", lambda _: PettingZooEnv(sumo_rl.env(net_file='nets/4x4-Lucas/4x4.net.xml',
                                                    route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                                                    out_csv_name='outputs/4x4grid/a3c',
                                                    use_gui=False,
                                                    num_seconds=80000)))

    trainer = A3CTrainer(env="4x4grid", config={
        "multiagent": {
            "policies": {
                '0': (A3CTFPolicy, spaces.Box(low=np.zeros(11), high=np.ones(11)), spaces.Discrete(2), {})
            },
            "policy_mapping_fn": (lambda id: '0')  # Traffic lights are always controlled by this policy
        },
        "lr": 0.001,
        "no_done_at_end": True
    })
    while True:
        print(trainer.train())  # distributed training step