def test_a2c_exec_impl(ray_start_regular): config = {"min_iter_time_s": 0} for _ in framework_iterator(config): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) assert isinstance(trainer.train(), dict) check_compute_single_action(trainer) trainer.stop()
def game_eval(): env = PomFFA() obs = env.reset() model_path = "/home/subill/ray_results/A2C_pom_2020-03-04_16-49-377fgvw_gr/checkpoint_601/checkpoint-601" config = a3c.DEFAULT_CONFIG.copy() config["num_gpus"] = 1 config["num_workers"] = 1 config["eager"] = False config["use_pytorch"] = True config["env_config"] = {"is_training": False} config["model"] = model_config trainer = a3c.A2CTrainer(env="pom", config=config) trainer.restore(model_path) for i in range(500): env.render() actions = trainer.compute_action(obs) print(actions) obs, reward, done, _ = env.step(actions) if done: break time.sleep(0.5) env.render() time.sleep(10)
def test_a2c_exec_impl(self): config = a3c.A2CConfig().reporting(min_time_s_per_reporting=0) for _ in framework_iterator(config): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def test_a2c_exec_impl_microbatch(ray_start_regular): config = { "min_iter_time_s": 0, "microbatch_size": 10, } for _ in framework_iterator(config, ("tf", "torch")): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) assert isinstance(trainer.train(), dict) check_compute_action(trainer)
def test_a2c_exec_impl(ray_start_regular): config = {"min_time_s_per_reporting": 0} for _ in framework_iterator(config): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def test_a2c_exec_impl_microbatch(ray_start_regular): config = { "min_iter_time_s": 0, "microbatch_size": 10, } for _ in framework_iterator(config): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def test_a2c_compilation(self): """Test whether an A2CTrainer can be built with both frameworks.""" config = a3c.a2c.A2C_DEFAULT_CONFIG.copy() config["num_workers"] = 2 config["num_envs_per_worker"] = 2 num_iterations = 1 # Test against all frameworks. for _ in framework_iterator(config): for env in ["PongDeterministic-v0"]: trainer = a3c.A2CTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() print(results) check_compute_single_action(trainer) trainer.stop()
def game_train(): config = a3c.DEFAULT_CONFIG.copy() config["num_gpus"] = 1 config["num_workers"] = 6 config["eager"] = False config["use_pytorch"] = True config["model"] = model_config print(config) trainer = a3c.A2CTrainer(env="pom", config=config) # Can optionally call trainer.restore(path) to load a checkpoint. for i in range(10000): result = trainer.train() print(pretty_print(result)) if i % 200 == 0: checkpoint = trainer.save() print("checkpoint saved at", checkpoint)
def get_rl_agent(agent_name, config, env_to_agent): if agent_name == A2C: import ray.rllib.agents.a3c as a2c agent = a2c.A2CTrainer(config=config, env=env_to_agent) elif agent_name == A3C: import ray.rllib.agents.a3c as a3c agent = a3c.A3CTrainer(config=config, env=env_to_agent) elif agent_name == BC: import ray.rllib.agents.marwil as bc agent = bc.BCTrainer(config=config, env=env_to_agent) elif agent_name == DQN: import ray.rllib.agents.dqn as dqn agent = dqn.DQNTrainer(config=config, env=env_to_agent) elif agent_name == APEX_DQN: import ray.rllib.agents.dqn as dqn agent = dqn.ApexTrainer(config=config, env=env_to_agent) elif agent_name == IMPALA: import ray.rllib.agents.impala as impala agent = impala.ImpalaTrainer(config=config, env=env_to_agent) elif agent_name == MARWIL: import ray.rllib.agents.marwil as marwil agent = marwil.MARWILTrainer(config=config, env=env_to_agent) elif agent_name == PG: import ray.rllib.agents.pg as pg agent = pg.PGTrainer(config=config, env=env_to_agent) elif agent_name == PPO: import ray.rllib.agents.ppo as ppo agent = ppo.PPOTrainer(config=config, env=env_to_agent) elif agent_name == APPO: import ray.rllib.agents.ppo as ppo agent = ppo.APPOTrainer(config=config, env=env_to_agent) elif agent_name == SAC: import ray.rllib.agents.sac as sac agent = sac.SACTrainer(config=config, env=env_to_agent) elif agent_name == LIN_UCB: import ray.rllib.contrib.bandits.agents.lin_ucb as lin_ucb agent = lin_ucb.LinUCBTrainer(config=config, env=env_to_agent) elif agent_name == LIN_TS: import ray.rllib.contrib.bandits.agents.lin_ts as lin_ts agent = lin_ts.LinTSTrainer(config=config, env=env_to_agent) else: raise Exception("Not valid agent name") return agent
def get_rllib_agent(agent_name, env_name, env, env_to_agent): config = get_config(env_name, env, 1) if is_rllib_agent(agent_name) else {} if agent_name == RLLIB_A2C: import ray.rllib.agents.a3c as a2c agent = a2c.A2CTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_A3C: import ray.rllib.agents.a3c as a3c agent = a3c.A3CTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_BC: import ray.rllib.agents.marwil as bc agent = bc.BCTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_DQN: import ray.rllib.agents.dqn as dqn agent = dqn.DQNTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_APEX_DQN: import ray.rllib.agents.dqn as dqn agent = dqn.ApexTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_IMPALA: import ray.rllib.agents.impala as impala agent = impala.ImpalaTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_MARWIL: import ray.rllib.agents.marwil as marwil agent = marwil.MARWILTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_PG: import ray.rllib.agents.pg as pg agent = pg.PGTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_PPO: import ray.rllib.agents.ppo as ppo agent = ppo.PPOTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_APPO: import ray.rllib.agents.ppo as ppo agent = ppo.APPOTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_SAC: import ray.rllib.agents.sac as sac agent = sac.SACTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_LIN_UCB: import ray.rllib.contrib.bandits.agents.lin_ucb as lin_ucb agent = lin_ucb.LinUCBTrainer(config=config, env=env_to_agent) elif agent_name == RLLIB_LIN_TS: import ray.rllib.contrib.bandits.agents.lin_ts as lin_ts agent = lin_ts.LinTSTrainer(config=config, env=env_to_agent) return agent
def test_a2c_fake_multi_gpu_learning(self): """Test whether A2CTrainer can learn CartPole w/ faked multi-GPU.""" config = copy.deepcopy(a3c.a2c.A2C_DEFAULT_CONFIG) # Fake GPU setup. config["num_gpus"] = 2 config["_fake_gpus"] = True config["framework"] = "tf" # Mimic tuned_example for A2C CartPole. config["lr"] = 0.001 trainer = a3c.A2CTrainer(config=config, env="CartPole-v0") num_iterations = 100 learnt = False for i in range(num_iterations): results = trainer.train() print("reward={}".format(results["episode_reward_mean"])) if results["episode_reward_mean"] > 100.0: learnt = True break assert learnt, "A2C multi-GPU (with fake-GPUs) did not learn CartPole!" trainer.stop()
def create_agent(args): """Create XXX agent. Args: args (argparse.Namespace): argparse arguments. Returns: agent (ray.rllib.agents.trainer_template.XXX): XXX agent. """ # A3C # ------------------------------------------ if args.agent == "A2C": # Custom configuration config = a3c.DEFAULT_CONFIG.copy() config["framework"] = "torch" config["lr"] = 5e-4 config["num_gpus"] = 1 config["num_workers"] = 1 config["train_batch_size"] = 128 config["use_critic"] = True config["use_gae"] = False # Custom model config["model"]["fcnet_activation"] = "tanh" config["model"]["fcnet_hiddens"] = [64, 64, 64] # Agent creation agent = a3c.A2CTrainer(env=GymEnv, config=config) # DQN # ------------------------------------------ elif args.agent == "DQN": # Custom configuration config = dqn.DEFAULT_CONFIG.copy() config["double_q"] = False config["dueling"] = False config["framework"] = "torch" config["lr"] = 5e-3 config["num_gpus"] = 1 config["num_workers"] = 1 config["train_batch_size"] = 128 # Custom model config["model"]["fcnet_activation"] = "tanh" config["model"]["fcnet_hiddens"] = [128, 128, 128] # Agent creation agent = dqn.DQNTrainer(env=GymEnv, config=config) # To optionally load a checkpoint if args.checkpoint: agent.restore(args.checkpoint) # Print model if args.verbose > 0: model = agent.get_policy().model if config["framework"] == "tf": print(type(model.base_model.summary())) elif config["framework"] == "torch": print(model) return agent
# config["sample_batch_size"]=256 config["gamma"] = 0.995 # config["entropy_coeff"] = 0.01 config['env_config'] = { # "num_rigid": 0, "reward": { "version": "v3" }, } config["model"] = { "custom_model": "my_model", "custom_options": {}, # extra options to pass to your model } trainer = a3c.A2CTrainer(env="my_env", config=config) # trainer = ppo.PPOTrainer(env="my_env", config=config) # trainer = dqn.DQNTrainer(env="my_env", config=config) policy = trainer.get_policy() print(policy.model.base_model.summary()) model_path = "/home/charlieqiu818_gmail_com/ray_results/A2C_my_env_2020-04-12_21-36-42vs43fldq/checkpoint_7005/checkpoint-7005" trainer.restore(model_path) for i in range(10000): result = trainer.train() print(pretty_print(result)) if i % 50 == 0: checkpoint = trainer.save() print("checkpoint saved at", checkpoint)
ray.shutdown(True) ray.init(num_gpus=1, temp_dir='/home/baitianxiang/ray_tmp') config = { 'env_config': env_config, 'num_workers': 2, 'log_level': 'ERROR', 'framework': 'tf', 'model': model_config, } register_env('DirectCnnEnv-v0', lambda env_config: DirectCnnEnv(env_config)) a2c_trainer = a3c.A2CTrainer(config=config, env='DirectCnnEnv-v0') policy = a2c_trainer.get_policy() cur_model = policy.model.base_model cur_model.summary() for i in tqdm(range(1000)): result = a2c_trainer.train() print(f"{result['episode_reward_max']:.4f} | " f"{result['episode_reward_mean']:.4f} | " f"{result['episode_reward_min']:.4f}") if i % 10 == 0: checkpoint = a2c_trainer.save() print("checkpoint saved at", checkpoint)