def test_ray(self): """ Integration test for ray/rllib + flow """ # Test 1: test_two_level_ray config = ppo.DEFAULT_CONFIG.copy() num_workers = 1 ray.init(num_cpus=num_workers, redirect_output=False) config["num_workers"] = num_workers config["timesteps_per_batch"] = min(HORIZON * num_workers, 128) config["num_sgd_iter"] = 1 config["model"].update({"fcnet_hiddens": [3, 3]}) config["gamma"] = 0.999 config["min_steps_per_task"] = HORIZON config["horizon"] = HORIZON config["sgd_batchsize"] = 4 additional_env_params = { "target_velocity": 8, "scenario_type": LoopScenario } additional_net_params = { "length": 260, "lanes": 1, "speed_limit": 30, "resolution": 40 } vehicle_params = [ dict(veh_id="rl", num_vehicles=1, acceleration_controller=(RLController, {}), routing_controller=(ContinuousRouter, {})), dict(veh_id="idm", num_vehicles=21, acceleration_controller=(IDMController, {}), routing_controller=(ContinuousRouter, {})) ] flow_params = dict(sumo=dict(sim_step=0.1, no_step_log=False), env=dict(horizon=HORIZON, additional_params=additional_env_params), net=dict(no_internal_links=False, additional_params=additional_net_params), veh=vehicle_params, initial=dict(spacing="uniform", bunching=30, min_gap=0)) flow_env_name = "WaveAttenuationPOEnv" create_env, env_name = make_create_env(flow_env_name, flow_params, 0) # Register as rllib env registry.register_env(env_name, create_env) alg = ppo.PPOAgent(env=env_name, registry=registry.get_registry(), config=config) for i in range(1): alg.train() checkpoint_path = alg.save() self.assertTrue("%s.index" % os.path.exists(checkpoint_path)) # Test 2: test_two_level_ray # Integration test for two-level fcnet policy # FIXME(cathywu) ray restart currently not supported, so need to tie # integration tests together for the time being. # reload(ppo) # reload(registry) config = ppo.DEFAULT_CONFIG.copy() num_workers = 1 # ray.init(num_cpus=num_workers, redirect_output=True) config["num_workers"] = num_workers config["timesteps_per_batch"] = min(HORIZON * num_workers, 128) config["num_sgd_iter"] = 1 config["model"].update({"fcnet_hiddens": [3, 3]}) config["gamma"] = 0.999 config["min_steps_per_task"] = HORIZON config["horizon"] = HORIZON config["sgd_batchsize"] = 4 config["model"].update({"fcnet_hiddens": [5, 3]}, ) options = { "num_subpolicies": 2, "fn_choose_subpolicy": fn_choose_subpolicy, "hierarchical_fcnet_hiddens": [[3, 3]] * 2 } config["model"].update({"custom_options": options})
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save-checkpoint-dir', help='Checkpoint dir', default=None) parser.add_argument('--load-checkpoint', help='Path to existing checkpoint created by _save', default=None) parser.add_argument('--local', help='Use retro_contest.local.make') args = parser.parse_args() env_name = 'sonic_env' # Note that the hyperparameters have been tuned for sonic, which can be used if args.local: game = 'SonicTheHedgehog-Genesis' state = 'GreenHillZone.Act1' register_env(env_name, lambda config: sonic_on_ray.make_local(game, state)) else: register_env(env_name, lambda config: sonic_on_ray.make()) ray.init() config = ppo.DEFAULT_CONFIG.copy() config.update({ 'timesteps_per_batch': 40000, 'min_steps_per_task': 100, 'num_workers': 32, 'gamma': 0.99, 'lambda': 0.95, 'clip_param': 0.1, 'num_sgd_iter': 30, 'sgd_batchsize': 4096, 'sgd_stepsize': 5e-5, 'use_gae': True, 'horizon': 4000, 'devices': [ '/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3', '/gpu:4', '/gpu:5', '/gpu:6', 'gpu:7' ], 'tf_session_args': { 'gpu_options': { 'allow_growth': True } } }) alg = ppo.PPOAgent(config=config, env=env_name) print("Created a PSqO object") if args.load_checkpoint is not None: print("Trying to restore from checkpoint", args.load_checkpoint) alg.restore(args.load_checkpoint) print("Restored state from checkpoint:", args.load_checkpoint) #for i in range(10): while True: try: print("Starting to train") result = alg.train() print('result = {}'.format(result)) except gre.GymRemoteError as e: print('exception', e)
"num_subpolicies": 2, "fn_choose_subpolicy": fn_choose_subpolicy, "hierarchical_fcnet_hiddens": [[32, 32]] * 2 } config["model"].update({"custom_options": options}) flow_env_name = "TwoLoopsMergePOEnv" exp_tag = "merge_two_level_policy_example" this_file = os.path.basename(__file__)[:-3] # filename without '.py' flow_params["flowenv"] = flow_env_name flow_params["exp_tag"] = exp_tag flow_params["module"] = os.path.basename(__file__)[:-3] config['model']['custom_options'].update({ 'flowenv': flow_env_name, 'exp_tag': exp_tag, 'module': this_file }) create_env, env_name = make_create_env(flow_env_name, flow_params, version=0, exp_tag=exp_tag) # Register as rllib env register_rllib_env(env_name, create_env) alg = ppo.PPOAgent(env=env_name, registry=get_registry(), config=config) for i in range(2): alg.train() if i % 20 == 0: alg.save() # save checkpoint
'num_sgd_iter': 30, 'sgd_batchsize': 4096, 'sgd_stepsize': 5e-5, 'use_gae': True, 'horizon': 4000, 'devices': [ '/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3', '/gpu:4', '/gpu:5', '/gpu:6', 'gpu:7' ], 'tf_session_args': { 'gpu_options': { 'allow_growth': True } } }) alg = ppo.PPOAgent(config=config, env=env_name) for i in range(1000): result = alg.train() print('result = {}'.format(result)) if i % 10 == 0: checkpoint = alg.save() print('checkpoint saved at', checkpoint)
def create_env(env_config): pass_params_to_gym(env_name) env = gym.envs.make(env_name) return env if __name__ == '__main__': register_env(env_name, lambda env_config: create_env(env_config)) config = ppo.DEFAULT_CONFIG.copy() horizon = 10 num_cpus = 4 ray.init(num_cpus=num_cpus, redirect_output=True) config["num_workers"] = num_cpus config["timesteps_per_batch"] = 10 config["num_sgd_iter"] = 10 config["gamma"] = 0.999 config["horizon"] = horizon config["use_gae"] = False config["model"].update({"fcnet_hiddens": [256, 256]}) options = { "multiagent_obs_shapes": [2, 2], "multiagent_act_shapes": [1, 1], "multiagent_shared_model": False, "multiagent_fcnet_hiddens": [[32, 32]] * 2 } config["model"].update({"custom_options": options}) alg = ppo.PPOAgent(env=env_name, config=config) for i in range(1): alg.train()
ray.init(redis_address=args.redis_address) def _check_and_update(config, json): for k in json.keys(): if k not in config: raise Exception( "Unknown model config `{}`, all model configs: {}".format( k, config.keys())) config.update(json) env_name = args.env if args.alg == "PPO": config = ppo.DEFAULT_CONFIG.copy() _check_and_update(config, json_config) alg = ppo.PPOAgent(env_name, config, upload_dir=args.upload_dir) elif args.alg == "ES": config = es.DEFAULT_CONFIG.copy() _check_and_update(config, json_config) alg = es.ESAgent(env_name, config, upload_dir=args.upload_dir) elif args.alg == "DQN": config = dqn.DEFAULT_CONFIG.copy() _check_and_update(config, json_config) alg = dqn.DQNAgent(env_name, config, upload_dir=args.upload_dir) elif args.alg == "A3C": config = a3c.DEFAULT_CONFIG.copy() _check_and_update(config, json_config) alg = a3c.A3CAgent(env_name, config, upload_dir=args.upload_dir) else: assert False, ("Unknown algorithm, check --alg argument. Valid " "choices are PPO, ES, DQN and A3C.")
config = ppo.DEFAULT_CONFIG.copy() config["lambda"] = 0.98 config["gamma"] = 0.99 config["kl_coeff"] = 0.05 config["kl_target"] = 0.02 config["sgd_batchsize"] = 8192 config["num_sgd_iter"] = 20 config["sgd_stepsize"] = 1e-4 config["model"] = {"fcnet_hiddens": [4, 4]} config["timesteps_per_batch"] = 80000 config["devices"] = [ "/cpu:0", "/cpu:1", "/cpu:2", "/cpu:3", "/cpu:4", "/cpu:5", "/cpu:6", "/cpu:7" ] config["observation_filter"] = "NoFilter" config["tf_session_args"] = { "device_count": { "CPU": 8 }, "log_device_placement": False, "allow_soft_placement": True, } config["num_workers"] = 32 alg = ppo.PPOAgent("SimpleSummarization-v0", config) for i in range(1000): result = alg.train() print("current status: {}".format(result)) if i % 5 == 0: print("checkpoint path: {}".format(alg.save()))
output_path = join(args.output, datetime.today().strftime('%Y-%m-%d-%H-%M-%S')) logging_path = join(output_path, 'log.txt') if not isdir(output_path): makedirs(output_path) if isfile('usernames'): remove('usernames') env_creator_name = "PokeBattleEnv-v0" register_env(env_creator_name, lambda config: PokeBattleEnv(ShowdownSimulator(self_play=False, logging_file=logging_path))) ray.init() config = ppo.DEFAULT_CONFIG.copy() config['num_workers'] = args.workers config['timesteps_per_batch'] = args.batch_steps config['horizon'] = 500 config['min_steps_per_task'] = 1 config['gamma'] = 1 config['model']['fcnet_hiddens'] = [2000, 500, 100] agent = ppo.PPOAgent(config=config, env=env_creator_name, registry=get_registry()) if args.restore is not None: agent.restore(args.restore) for i in range(args.iterations): result = agent.train() print(f"result: {result}") if i % args.save_iterations == 0: agent.save(checkpoint_dir=output_path)