def testGymPreprocessors(self): p1 = ModelCatalog.get_preprocessor( get_registry(), gym.make("CartPole-v0")) assert type(p1) == NoPreprocessor p2 = ModelCatalog.get_preprocessor( get_registry(), gym.make("FrozenLake-v0")) assert type(p2) == OneHotPreprocessor
def testGymPreprocessors(self): p1 = ModelCatalog.get_preprocessor( get_registry(), gym.make("CartPole-v0")) self.assertEqual(type(p1), NoPreprocessor) p2 = ModelCatalog.get_preprocessor( get_registry(), gym.make("FrozenLake-v0")) self.assertEqual(type(p2), OneHotPreprocessor)
def test_custom_preprocessor(self): ray.init(object_store_memory=1000 * 1024 * 1024) ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor(env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "foo"}) self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "bar"}) self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(get_registry(), env) self.assertEqual(type(p3), NoPreprocessor)
def testCustomPreprocessor(self): ray.init() ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor) ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2) env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "foo"}) assert type(p1) == CustomPreprocessor p2 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "bar"}) assert type(p2) == CustomPreprocessor2 p3 = ModelCatalog.get_preprocessor(get_registry(), env) assert type(p3) == NoPreprocessor
def _init(self): policy_params = {"ac_noise_std": 0.01} env = self.env_creator() preprocessor = ModelCatalog.get_preprocessor( env.spec.id, env.observation_space.shape) preprocessor_shape = preprocessor.transform_shape( env.observation_space.shape) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(env.observation_space, env.action_space, preprocessor, **policy_params) tf_util.initialize() self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"]) self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(self.config, policy_params, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time()
def _init(self): policy_params = {"action_noise_std": 0.01} env = self.env_creator(self.config["env_config"]) preprocessor = ModelCatalog.get_preprocessor(self.registry, env) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(self.registry, self.sess, env.action_space, preprocessor, self.config["observation_filter"], **policy_params) self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"]) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote(self.config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(self.registry, self.config, policy_params, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time()
def __init__(self, env_id, env=None, options=dict()): super(RLLibPreprocessing, self).__init__(env) self.preprocessor = ModelCatalog.get_preprocessor( env_id, env.observation_space.shape, options) self._process_shape = self.preprocessor.transform_shape( env.observation_space.shape) self.observation_space = Box(-1.0, 1.0, self._process_shape)
def __init__(self, config, policy_params, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator() self.preprocessor = ModelCatalog.get_preprocessor( self.env.spec.id, self.env.observation_space.shape) self.preprocessor_shape = self.preprocessor.transform_shape( self.env.observation_space.shape) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy(self.env.observation_space, self.env.action_space, self.preprocessor, **policy_params) tf_util.initialize() self.rs = np.random.RandomState() assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"] != 0))
def __init__(self, name, batchsize): self.envs = [gym.make(name) for _ in range(batchsize)] self.observation_space = self.envs[0].observation_space self.action_space = self.envs[0].action_space self.batchsize = batchsize self.preprocessor = ModelCatalog.get_preprocessor( name, self.envs[0].observation_space.shape)
def __init__(self, env_creator, batchsize, options): self.envs = [env_creator() for _ in range(batchsize)] self.observation_space = self.envs[0].observation_space self.action_space = self.envs[0].action_space self.batchsize = batchsize self.preprocessor = ModelCatalog.get_preprocessor( self.envs[0], options["model"]) self.extra_frameskip = options.get("extra_frameskip", 1) assert self.extra_frameskip >= 1
def __init__(self, name, batchsize, options): self.envs = [gym.make(name) for _ in range(batchsize)] self.observation_space = self.envs[0].observation_space self.action_space = self.envs[0].action_space self.batchsize = batchsize self.preprocessor = ModelCatalog.get_preprocessor( name, self.envs[0].observation_space.shape, options["model"]) self.extra_frameskip = options.get("extra_frameskip", 1) assert self.extra_frameskip >= 1
def get_preprocessor_as_wrapper(env, options={}): """Returns a preprocessor as a gym observation wrapper. Args: env (gym.Env): The gym environment to wrap. options (dict): Options to pass to the preprocessor. Returns: wrapper (gym.ObservationWrapper): Preprocessor in wrapper form. """ preprocessor = ModelCatalog.get_preprocessor(env, options) return _RLlibPreprocessorWrapper(env, preprocessor)
def testTuplePreprocessor(self): ray.init() class TupleEnv(object): def __init__(self): self.observation_space = Tuple( [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)]) p1 = ModelCatalog.get_preprocessor(TupleEnv()) self.assertEqual(p1.shape, (8,)) self.assertEqual( list(p1.transform((0, [1, 2, 3]))), [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
def test_tuple_preprocessor(self): ray.init(object_store_memory=1000 * 1024 * 1024) class TupleEnv: def __init__(self): self.observation_space = Tuple( [Discrete(5), Box(0, 5, shape=(3, ), dtype=np.float32)]) p1 = ModelCatalog.get_preprocessor(TupleEnv()) self.assertEqual(p1.shape, (8, )) self.assertEqual(list(p1.transform((0, np.array([1, 2, 3])))), [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
def testTuplePreprocessor(self): ray.init() class TupleEnv(object): def __init__(self): self.observation_space = Tuple( [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)]) p1 = ModelCatalog.get_preprocessor( get_registry(), TupleEnv()) self.assertEqual(p1.shape, (8,)) self.assertEqual( list(p1.transform((0, [1, 2, 3]))), [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
def __init__(self, registry, config, policy_params, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy( registry, self.sess, self.env.action_space, self.preprocessor, config["observation_filter"], **policy_params)
def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "EvolutionStrategies"}) Algorithm.__init__(self, env_name, config, upload_dir=upload_dir) policy_params = {"ac_noise_std": 0.01} env = gym.make(env_name) preprocessor = ModelCatalog.get_preprocessor( env_name, env.observation_space.shape) preprocessor_shape = preprocessor.transform_shape( env.observation_space.shape) utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(env.observation_space, env.action_space, preprocessor, **policy_params) tf_util.initialize() self.optimizer = optimizers.Adam(self.policy, config["stepsize"]) self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(config, policy_params, env_name, noise_id) for _ in range(config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time() self.iteration = 0
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env( params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = False sim_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class( name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # check if the environment is a single or multiagent environment, and # get the right address accordingly single_agent_envs = [env for env in dir(flow.envs) if not env.startswith('__')] if flow_params['env_name'] in single_agent_envs: env_loc = 'flow.envs' else: env_loc = 'flow.multiagent_envs' # Start the environment with the gui turned on and a path for the # emission file module = __import__(env_loc, fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) _env = env_class( env_params=env_params, sim_params=sim_params, scenario=scenario, simulator=flow_params['simulator'] ) _prep = ModelCatalog.get_preprocessor(_env, options={}) env = _RLlibPreprocessorWrapper(_env, _prep) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format( np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format( np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format( np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering') dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def test_preprocessor(): ModelCatalog.register_preprocessor("FakeEnv-v0", FakePreprocessor) env = FakeEnv() preprocessor = ModelCatalog.get_preprocessor(env) assert type(preprocessor) == FakePreprocessor