config["timesteps_per_batch"] = HORIZON * N_ROLLOUTS config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [32, 32, 32]}) config["use_gae"] = True config["lambda"] = 0.97 config["sgd_batchsize"] = min(16 * 1024, config["timesteps_per_batch"]) config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 config["horizon"] = HORIZON # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) trials = run_experiments({ "highway_stabilize": { "run": "PPO", "env": env_name, "config": { **config }, "checkpoint_freq": 5, "max_failures": 999, "stop": { "training_iteration": 200,
args = parser.parse_args() result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # Run on only one cpu for rendering purposes ray.init(num_cpus=1) config["num_workers"] = 1 flow_params = get_flow_params(config) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, sumo_binary="sumo") register_env(env_name, create_env) agent_cls = get_agent_class(args.run) agent = agent_cls(env=env_name, registry=get_registry(), config=config) checkpoint = result_dir + '/checkpoint-' + args.checkpoint_num agent._restore(checkpoint) # Recreate the scenario from the pickled parameters exp_tag = flow_params["exp_tag"] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]]) scenario_class = getattr(module, flow_params["scenario"])