def __init__(self, registry, env_creator, config, logdir, is_remote): self.registry = registry self.config = config self.logdir = logdir self.env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) if is_remote: config_proto = tf.ConfigProto() else: config_proto = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=config_proto) self.kl_coeff_val = self.config["kl_coeff"] self.kl_target = self.config["kl_target"] # Defines the training inputs: # The coefficient of the KL penalty. self.kl_coeff = tf.placeholder(name="newkl", shape=(), dtype=tf.float32) # The input observations. self.observations = tf.placeholder(tf.float32, shape=(None, ) + self.env.observation_space.shape) # Targets of the value function. self.value_targets = tf.placeholder(tf.float32, shape=(None, )) # Advantage values in the policy gradient estimator. self.advantages = tf.placeholder(tf.float32, shape=(None, )) action_space = self.env.action_space self.actions = ModelCatalog.get_action_placeholder(action_space) self.distribution_class, self.logit_dim = ModelCatalog.get_action_dist( action_space, config["model"]) # Log probabilities from the policy before the policy update. self.prev_logits = tf.placeholder(tf.float32, shape=(None, self.logit_dim)) # Value function predictions before the policy update. self.prev_vf_preds = tf.placeholder(tf.float32, shape=(None, )) self.inputs = [("obs", self.observations), ("value_targets", self.value_targets), ("advantages", self.advantages), ("actions", self.actions), ("logprobs", self.prev_logits), ("vf_preds", self.prev_vf_preds)] self.common_policy = self.build_tf_loss([ph for _, ph in self.inputs]) # References to the model weights self.variables = ray.experimental.TensorFlowVariables( self.common_policy.loss, self.sess) self.obs_filter = get_filter(config["observation_filter"], self.env.observation_space.shape) self.rew_filter = MeanStdFilter((), clip=5.0) self.filters = { "obs_filter": self.obs_filter, "rew_filter": self.rew_filter } self.sampler = SyncSampler(self.env, self.common_policy, self.obs_filter, self.config["horizon"], self.config["horizon"])
def __init__(self, registry, env_creator, config, logdir, start_sampler=True): env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) self.env = env policy_cls = get_policy_cls(config) # TODO(rliaw): should change this to be just env.observation_space self.policy = policy_cls(registry, env.observation_space.shape, env.action_space, config) self.config = config # Technically not needed when not remote self.obs_filter = get_filter(config["observation_filter"], env.observation_space.shape) self.rew_filter = get_filter(config["reward_filter"], ()) self.filters = { "obs_filter": self.obs_filter, "rew_filter": self.rew_filter } self.sampler = AsyncSampler(env, self.policy, self.obs_filter, config["batch_size"]) if start_sampler and self.sampler. async: self.sampler.start() self.logdir = logdir
def __init__(self, env_creator, config, logdir): env = ModelCatalog.get_preprocessor_as_wrapper(env_creator( config["env_config"]), config["model"]) self.dataset = ExperienceDataset(config["dataset_path"]) self.policy = BCPolicy(env.observation_space, env.action_space, config) self.config = config self.logdir = logdir self.metrics_queue = queue.Queue()
def run(args, parser): def create_environment(env_config): # This import must happen inside the method so that worker processes import this code import roboschool return gym.make(args.env) if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) # params.json is saved in the model directory during ray training by default config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() register_env(args.env, create_environment) cls = get_agent_class(args.algorithm) config = args.config config["monitor"] = False config["num_workers"] = 1 config["num_gpus"] = 0 agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_episodes = int(args.evaluate_episodes) if args.algorithm == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True) all_rewards = [] for episode in range(num_episodes): steps = 0 state = env.reset() done = False reward_total = 0.0 while not done: action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward steps += 1 state = next_state all_rewards.append(reward_total) print("Episode reward: %s. Episode steps: %s" % (reward_total, steps)) print("Mean Reward:", np.mean(all_rewards)) print("Max Reward:", np.max(all_rewards)) print("Min Reward:", np.min(all_rewards))
def run(args, parser): config = args.config if not config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.json") if not os.path.exists(config_path): config_path = os.path.join(config_dir, "../params.json") if not os.path.exists(config_path): raise ValueError( "Could not find params.json in either the checkpoint dir or " "its parent directory.") with open(config_path) as f: config = json.load(f) if "num_workers" in config: config["num_workers"] = min(2, config["num_workers"]) if not args.env: if not config.get("env"): parser.error("the following arguments are required: --env") args.env = config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_steps = int(args.steps) if hasattr(agent, "local_evaluator"): env = agent.local_evaluator.env else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward if not args.no_render: env.render() if args.out is not None: rollout.append([state, action, next_state, reward, done]) steps += 1 state = next_state if args.out is not None: rollouts.append(rollout) print("Episode reward", reward_total) if args.out is not None: pickle.dump(rollouts, open(args.out, "wb"))
def __init__(self, registry, env_creator, config, logdir): env = ModelCatalog.get_preprocessor_as_wrapper(registry, env_creator( config["env_config"]), config["model"]) self.dataset = ExperienceDataset(config["dataset_path"]) # TODO(rliaw): should change this to be just env.observation_space self.policy = BCPolicy(registry, env.observation_space.shape, env.action_space, config) self.config = config self.logdir = logdir self.metrics_queue = queue.Queue()
def wrap_dqn(env, options): """Apply a common set of wrappers for DQN.""" is_atari = hasattr(env.unwrapped, "ale") # Override atari default to use the deepmind wrappers. # TODO(ekl) this logic should be pushed to the catalog. if is_atari and not options.get("custom_preprocessor"): return wrap_deepmind(env, dim=options.get("dim", 84)) return ModelCatalog.get_preprocessor_as_wrapper(env, options)
def wrap_dqn(registry, env, options, random_starts): """Apply a common set of wrappers for DQN.""" is_atari = hasattr(env.unwrapped, "ale") # Override atari default to use the deepmind wrappers. # TODO(ekl) this logic should be pushed to the catalog. if is_atari and "custom_preprocessor" not in options: return wrap_deepmind(env, random_starts=random_starts) return ModelCatalog.get_preprocessor_as_wrapper(registry, env, options)
def run(args, parser): if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=args.config) agent.restore(args.checkpoint) num_steps = int(args.steps) if args.run == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward if not args.no_render: env.render() if args.out is not None: rollout.append([state, action, next_state, reward, done]) steps += 1 state = next_state if args.out is not None: rollouts.append(rollout) print("Episode reward", reward_total) if args.out is not None: pickle.dump(rollouts, open(args.out, "wb"))
def __init__(self, registry, env_creator, config, logdir, start_sampler=True): self.env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) self.config = config self.policy = SharedTorchPolicy(config) # Technically not needed when not remote self.filter = MyNoFilter() # Observation sampler self.sampler = MyAsyncSampler(self.env, self.policy, \ self.filter, config["batch_size"]) # Misc if start_sampler and self.sampler. async: self.sampler.start() self.logdir = logdir
def wrap_dqn(registry, env, options): """Apply a common set of wrappers for DQN.""" is_atari = hasattr(env.unwrapped, "ale") if is_atari: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, noop_max=30) if 'NoFrameskip' in env.spec.id: env = MaxAndSkipEnv(env, skip=4) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = ModelCatalog.get_preprocessor_as_wrapper(registry, env, options) if is_atari: env = FrameStack(env, 4) env = ClippedRewardsWrapper(env) return env
def wrap_dqn(env, options): """Apply a common set of wrappers for DQN.""" is_atari = (env.observation_space.shape == ModelCatalog.ATARI_OBS_SHAPE) if is_atari: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, noop_max=30) if 'NoFrameskip' in env.spec.id: env = MaxAndSkipEnv(env, skip=4) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = ModelCatalog.get_preprocessor_as_wrapper(env, options) if is_atari: env = FrameStack(env, 4) env = ClippedRewardsWrapper(env) return env
def __init__( self, registry, env_creator, config, logdir, start_sampler=True): env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) self.env = env policy_cls = get_policy_cls(config) # TODO(rliaw): should change this to be just env.observation_space self.policy = policy_cls( registry, env.observation_space.shape, env.action_space, config) self.config = config # Technically not needed when not remote self.obs_filter = get_filter( config["observation_filter"], env.observation_space.shape) self.rew_filter = get_filter(config["reward_filter"], ()) self.filters = {"obs_filter": self.obs_filter, "rew_filter": self.rew_filter} self.sampler = AsyncSampler(env, self.policy, self.obs_filter, config["batch_size"]) if start_sampler and self.sampler.async: self.sampler.start() self.logdir = logdir
def __init__(self, registry, env_creator, config, worker_index): env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) self.env = env self.config = config if isinstance(env.action_space, Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DDPG.".format( env.action_space)) tf_config = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=tf_config) self.ddpg_graph = models.DDPGGraph(registry, env, config) # Initialize the parameters and copy them to the target network. self.sess.run(tf.global_variables_initializer()) self.ddpg_graph.copy_target(self.sess) self.global_timestep = 0 self.local_timestep = 0 nb_actions = env.action_space.shape[-1] stddev = config["exploration_noise"] self.exploration_noise = OUNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) self.action_range = (-1., 1.) # Note that this encompasses both the Q and target network self.variables = ray.experimental.TensorFlowVariables( tf.group(self.ddpg_graph.td_error, self.ddpg_graph.action_lost), self.sess) self.max_action = env.action_space.high self.episode_rewards = [0.0] self.episode_lengths = [0.0] self.saved_mean_reward = None self.obs = self.env.reset()
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env( params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class( name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = 6000 #可以考虑改成6000 env_params.horizon = 6000 # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper(env_class( env_params=env_params, sumo_params=sumo_params, scenario=scenario)) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] final_outflows = [] mean_speed = [] for i in range(1):#args.num_rollouts): vel = [] state = env.reset() done = False if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(vehicles.get_speed(vehicles.get_ids())[0])#这里是整体平均速度 if multiagent: action = {} for agent_id in state.keys(): action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: print(type(state),state) action = agent.compute_action(state) print(type(action),action) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) #mean_speed.append(np.mean(vel))#注意这里 print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format( np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format( np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format( np.mean(final_outflows), np.std(final_outflows))) import matplotlib.pyplot as plt plt.figure() plt.plot(vel) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=args.config) agent.restore(args.checkpoint) num_steps = int(args.steps) if args.run == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward if not args.no_render: env.render()
def __init__(self, registry, env_creator, config, logdir, is_remote): self.registry = registry self.is_remote = is_remote if is_remote: os.environ["CUDA_VISIBLE_DEVICES"] = "" devices = ["/cpu:0"] else: devices = config["devices"] self.devices = devices self.config = config self.logdir = logdir self.env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) if is_remote: config_proto = tf.ConfigProto() else: config_proto = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=config_proto) if config["tf_debug_inf_or_nan"] and not is_remote: self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) self.sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) # Defines the training inputs: # The coefficient of the KL penalty. self.kl_coeff = tf.placeholder(name="newkl", shape=(), dtype=tf.float32) # The input observations. self.observations = tf.placeholder(tf.float32, shape=(None, ) + self.env.observation_space.shape) # Targets of the value function. self.value_targets = tf.placeholder(tf.float32, shape=(None, )) # Advantage values in the policy gradient estimator. self.advantages = tf.placeholder(tf.float32, shape=(None, )) action_space = self.env.action_space # TODO(rliaw): pull this into model_catalog if isinstance(action_space, gym.spaces.Box): self.actions = tf.placeholder(tf.float32, shape=(None, action_space.shape[0])) elif isinstance(action_space, gym.spaces.Discrete): self.actions = tf.placeholder(tf.int64, shape=(None, )) else: raise NotImplemented("action space" + str(type(action_space)) + "currently not supported") self.distribution_class, self.logit_dim = ModelCatalog.get_action_dist( action_space) # Log probabilities from the policy before the policy update. self.prev_logits = tf.placeholder(tf.float32, shape=(None, self.logit_dim)) # Value function predictions before the policy update. self.prev_vf_preds = tf.placeholder(tf.float32, shape=(None, )) assert config["sgd_batchsize"] % len(devices) == 0, \ "Batch size must be evenly divisible by devices" if is_remote: self.batch_size = config["rollout_batchsize"] self.per_device_batch_size = config["rollout_batchsize"] else: self.batch_size = config["sgd_batchsize"] self.per_device_batch_size = int(self.batch_size / len(devices)) def build_loss(obs, vtargets, advs, acts, plog, pvf_preds): return ProximalPolicyLoss(self.env.observation_space, self.env.action_space, obs, vtargets, advs, acts, plog, pvf_preds, self.logit_dim, self.kl_coeff, self.distribution_class, self.config, self.sess, self.registry) self.par_opt = LocalSyncParallelOptimizer( tf.train.AdamOptimizer(self.config["sgd_stepsize"]), self.devices, [ self.observations, self.value_targets, self.advantages, self.actions, self.prev_logits, self.prev_vf_preds ], self.per_device_batch_size, build_loss, self.logdir) # Metric ops with tf.name_scope("test_outputs"): policies = self.par_opt.get_device_losses() self.mean_loss = tf.reduce_mean( tf.stack(values=[policy.loss for policy in policies]), 0) self.mean_policy_loss = tf.reduce_mean( tf.stack( values=[policy.mean_policy_loss for policy in policies]), 0) self.mean_vf_loss = tf.reduce_mean( tf.stack(values=[policy.mean_vf_loss for policy in policies]), 0) self.mean_kl = tf.reduce_mean( tf.stack(values=[policy.mean_kl for policy in policies]), 0) self.mean_entropy = tf.reduce_mean( tf.stack(values=[policy.mean_entropy for policy in policies]), 0) # References to the model weights self.common_policy = self.par_opt.get_common_loss() self.variables = ray.experimental.TensorFlowVariables( self.common_policy.loss, self.sess) self.obs_filter = get_filter(config["observation_filter"], self.env.observation_space.shape) self.rew_filter = MeanStdFilter((), clip=5.0) self.filters = { "obs_filter": self.obs_filter, "rew_filter": self.rew_filter } self.sampler = SyncSampler(self.env, self.common_policy, self.obs_filter, self.config["horizon"], self.config["horizon"]) self.sess.run(tf.global_variables_initializer())
def __init__(self, registry, env_creator, config, logdir, is_remote): self.registry = registry self.is_remote = is_remote if is_remote: os.environ["CUDA_VISIBLE_DEVICES"] = "" devices = ["/cpu:0"] else: devices = config["devices"] self.devices = devices self.config = config self.logdir = logdir self.env = ModelCatalog.get_preprocessor_as_wrapper( registry, env_creator(config["env_config"]), config["model"]) if is_remote: config_proto = tf.ConfigProto() else: config_proto = tf.ConfigProto(**config["tf_session_args"]) self.sess = tf.Session(config=config_proto) if config["tf_debug_inf_or_nan"] and not is_remote: self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) self.sess.add_tensor_filter( "has_inf_or_nan", tf_debug.has_inf_or_nan) # Defines the training inputs: # The coefficient of the KL penalty. self.kl_coeff = tf.placeholder( name="newkl", shape=(), dtype=tf.float32) # The input observations. self.observations = tf.placeholder( tf.float32, shape=(None,) + self.env.observation_space.shape) # Targets of the value function. self.value_targets = tf.placeholder(tf.float32, shape=(None,)) # Advantage values in the policy gradient estimator. self.advantages = tf.placeholder(tf.float32, shape=(None,)) action_space = self.env.action_space self.actions = ModelCatalog.get_action_placeholder(action_space) self.distribution_class, self.logit_dim = ModelCatalog.get_action_dist( action_space) # Log probabilities from the policy before the policy update. self.prev_logits = tf.placeholder( tf.float32, shape=(None, self.logit_dim)) # Value function predictions before the policy update. self.prev_vf_preds = tf.placeholder(tf.float32, shape=(None,)) if is_remote: self.batch_size = config["rollout_batchsize"] self.per_device_batch_size = config["rollout_batchsize"] else: self.batch_size = int( config["sgd_batchsize"] / len(devices)) * len(devices) assert self.batch_size % len(devices) == 0 self.per_device_batch_size = int(self.batch_size / len(devices)) def build_loss(obs, vtargets, advs, acts, plog, pvf_preds): return ProximalPolicyLoss( self.env.observation_space, self.env.action_space, obs, vtargets, advs, acts, plog, pvf_preds, self.logit_dim, self.kl_coeff, self.distribution_class, self.config, self.sess, self.registry) self.par_opt = LocalSyncParallelOptimizer( tf.train.AdamOptimizer(self.config["sgd_stepsize"]), self.devices, [self.observations, self.value_targets, self.advantages, self.actions, self.prev_logits, self.prev_vf_preds], self.per_device_batch_size, build_loss, self.logdir) # Metric ops with tf.name_scope("test_outputs"): policies = self.par_opt.get_device_losses() self.mean_loss = tf.reduce_mean( tf.stack(values=[ policy.loss for policy in policies]), 0) self.mean_policy_loss = tf.reduce_mean( tf.stack(values=[ policy.mean_policy_loss for policy in policies]), 0) self.mean_vf_loss = tf.reduce_mean( tf.stack(values=[ policy.mean_vf_loss for policy in policies]), 0) self.mean_kl = tf.reduce_mean( tf.stack(values=[ policy.mean_kl for policy in policies]), 0) self.mean_entropy = tf.reduce_mean( tf.stack(values=[ policy.mean_entropy for policy in policies]), 0) # References to the model weights self.common_policy = self.par_opt.get_common_loss() self.variables = ray.experimental.TensorFlowVariables( self.common_policy.loss, self.sess) self.obs_filter = get_filter( config["observation_filter"], self.env.observation_space.shape) self.rew_filter = MeanStdFilter((), clip=5.0) self.filters = {"obs_filter": self.obs_filter, "rew_filter": self.rew_filter} self.sampler = SyncSampler( self.env, self.common_policy, self.obs_filter, self.config["horizon"], self.config["horizon"]) self.sess.run(tf.global_variables_initializer())
def wrap(env): env = ModelCatalog.get_preprocessor_as_wrapper( env, model_config) if monitor_path: env = _monitor(env, monitor_path) return env
def __init__(self, env_creator, policy_graph, tf_session_creator=None, batch_steps=100, batch_mode="truncate_episodes", preprocessor_pref="rllib", sample_async=False, compress_observations=False, observation_filter="NoFilter", registry=None, env_config=None, model_config=None, policy_config=None): """Initialize a policy evaluator. Arguments: env_creator (func): Function that returns a gym.Env given an env config dict. policy_graph (class): A class implementing rllib.PolicyGraph or rllib.TFPolicyGraph. tf_session_creator (func): A function that returns a TF session. This is optional and only useful with TFPolicyGraph. batch_steps (int): The target number of env transitions to include in each sample batch returned from this evaluator. batch_mode (str): One of the following choices: complete_episodes: each batch will be at least batch_steps in size, and will include one or more complete episodes. truncate_episodes: each batch will be around batch_steps in size, and include transitions from one episode only. pack_episodes: each batch will be exactly batch_steps in size, and may include transitions from multiple episodes. preprocessor_pref (str): Whether to prefer RLlib preprocessors ("rllib") or deepmind ("deepmind") when applicable. sample_async (bool): Whether to compute samples asynchronously in the background, which improves throughput but can cause samples to be slightly off-policy. compress_observations (bool): If true, compress the observations returned. observation_filter (str): Name of observation filter to use. registry (tune.Registry): User-registered objects. Pass in the value from tune.registry.get_registry() if you're having trouble resolving things like custom envs. env_config (dict): Config to pass to the env creator. model_config (dict): Config to use when creating the policy model. policy_config (dict): Config to pass to the policy. """ registry = registry or get_registry() env_config = env_config or {} policy_config = policy_config or {} model_config = model_config or {} assert batch_mode in [ "complete_episodes", "truncate_episodes", "pack_episodes" ] self.env_creator = env_creator self.policy_graph = policy_graph self.batch_steps = batch_steps self.batch_mode = batch_mode self.compress_observations = compress_observations self.env = env_creator(env_config) is_atari = hasattr(self.env.unwrapped, "ale") if is_atari and "custom_preprocessor" not in model_config and \ preprocessor_pref == "deepmind": self.env = wrap_deepmind(self.env, dim=model_config.get("dim", 80)) else: self.env = ModelCatalog.get_preprocessor_as_wrapper( registry, self.env, model_config) self.vectorized = hasattr(self.env, "vector_reset") self.policy_map = {} if issubclass(policy_graph, TFPolicyGraph): with tf.Graph().as_default(): if tf_session_creator: self.sess = tf_session_creator() else: self.sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=True))) with self.sess.as_default(): policy = policy_graph(self.env.observation_space, self.env.action_space, registry, policy_config) else: policy = policy_graph(self.env.observation_space, self.env.action_space, registry, policy_config) self.policy_map = {"default": policy} self.obs_filter = get_filter(observation_filter, self.env.observation_space.shape) self.filters = {"obs_filter": self.obs_filter} if self.vectorized: raise NotImplementedError("Vector envs not yet supported") else: if batch_mode not in [ "pack_episodes", "truncate_episodes", "complete_episodes" ]: raise NotImplementedError("Batch mode not yet supported") pack = batch_mode == "pack_episodes" if batch_mode == "complete_episodes": batch_steps = 999999 if sample_async: self.sampler = AsyncSampler(self.env, self.policy_map["default"], self.obs_filter, batch_steps, pack=pack) self.sampler.start() else: self.sampler = SyncSampler(self.env, self.policy_map["default"], self.obs_filter, batch_steps, pack=pack)
def wrap(env): return ModelCatalog.get_preprocessor_as_wrapper( env, model_config)
def __init__(self, env_creator, config, is_ext_train=False): self.local_steps = 0 self.config = config self.summarize = config.get("summarize") env = ModelCatalog.get_preprocessor_as_wrapper( env_creator(self.config["env_config"]), self.config["model"]) if is_ext_train: train_dataset = input_fn( self.config["inverse_model"]["ext_train_file_path"]) valid_dataset = input_fn( self.config["inverse_model"]["ext_valid_file_path"]) iterator = tf.data.Iterator.from_structure( train_dataset.output_types, train_dataset.output_shapes) next_element = iterator.get_next() self.x = next_element[0] self.ac = next_element[1] self.training_init_op = iterator.make_initializer(train_dataset) self.validation_init_op = iterator.make_initializer(valid_dataset) else: self.x = tf.placeholder( tf.float32, shape=[ None, numpy.prod([2] + list(env.observation_space.shape)) ]) if isinstance(env.action_space, gym.spaces.Box): self.ac = tf.placeholder(tf.float32, [None] + list(env.action_space.shape), name="ac") elif isinstance(env.action_space, gym.spaces.Discrete): self.ac = tf.placeholder(tf.int64, [None], name="ac") else: raise NotImplementedError("action space" + str(type(env.action_space)) + "currently not supported") # Setup graph dist_class, logit_dim = ModelCatalog.get_action_dist( env.action_space, self.config["model"]) self._model = FullyConnectedNetwork(self.x, logit_dim, {}) self.logits = self._model.outputs self.curr_dist = dist_class(self.logits) self.sample = self.curr_dist.sample() self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) # Setup loss log_prob = self.curr_dist.logp(self.ac) self.pi_loss = -tf.reduce_sum(log_prob) self.loss = self.pi_loss self.optimizer = tf.train.AdamOptimizer(self.config["lr"]).minimize( self.loss) # Setup similarity -> cosine similarity normalize_sample = tf.nn.l2_normalize(self.sample, 1) normalize_ac = tf.nn.l2_normalize(self.ac, 1) self.similarity = 1 - tf.losses.cosine_distance( normalize_sample, normalize_ac, dim=1) # Initialize self.initialize()
if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() cls = get_agent_class(args.run) agent = cls(env=args.env, config=args.config) agent.restore(args.checkpoint) num_steps = int(args.steps) if args.run == "DQN": env = gym.make(args.env) env = wrap_dqn(get_registry(), env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(get_registry(), gym.make(args.env)) if args.out is not None: rollouts = [] steps = 0 while steps < (num_steps or steps + 1): if args.out is not None: rollout = [] state = env.reset() done = False reward_total = 0.0 while not done and steps < (num_steps or steps + 1): action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward if not args.no_render: env.render()
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) import matplotlib.pyplot as plt from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter fig = plt.figure() h = np.linspace(0, 60, 100) Deltav = np.linspace(-6, 12, 100) Headway, DELTAV = np.meshgrid(h, Deltav) # fix v=20m/s xn, yn = Headway.shape geta = np.array(Headway) for xk in range(xn): for yk in range(yn): #输入状态 #Headway[xk,yk] #DELTAV[xk,yk] geta[xk, yk] = agent.compute_action( np.array( [3.8 / 30, DELTAV[xk, yk] / 30, Headway[xk, yk] / 260])) surf = plt.contourf(DELTAV, Headway, geta, 20, cmap=cm.coolwarm) plt.colorbar() #C = plt.contour(DELTAV, Headway, geta, 20, colors='black') # plt.clabel(C, inline = True, fontsize = 10) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # Run on only one cpu for rendering purposes config['num_workers'] = 1 flow_params = get_flow_params(config) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] if args.evaluate: env_params.evaluate = True sumo_params = flow_params['sumo'] if args.no_render: sumo_params.render = False else: sumo_params.render = True sumo_params.emission_path = './test_time_rollout/' env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) # Run the environment in the presence of the pre-trained RL agent for the # requested number of time steps / rollouts rets = [] final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) action = agent.compute_action(state) state, reward, done, _ = env.step(action) ret += reward if done: break rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) print('Round {}, Return: {}'.format(i, ret)) print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path)
def run(args, parser): if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) # params.json is saved in the model directory during ray training by default config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init(webui_host="127.0.0.1") agent_env_config = {"env_name": args.env} register_env("unity_env", lambda config: UnityEnvWrapper(agent_env_config)) if ray.__version__ >= "0.6.5": from ray.rllib.agents.registry import get_agent_class else: from ray.rllib.agents.agent import get_agent_class cls = get_agent_class(args.algorithm) config = args.config config["monitor"] = False config["num_workers"] = 0 config["num_gpus"] = 0 agent = cls(env="unity_env", config=config) # Delete unnessesary logs env_name = args.env.split('.')[0] files = glob("/opt/ml/input/data/train/{}_Data/Logs/*.csv".format(env_name), recursive=True) for file in files: os.remove(file) agent.restore(args.checkpoint) num_episodes = int(args.evaluate_episodes) env_config = {"env_name": args.env} if ray.__version__ >= "0.6.5": env = UnityEnvWrapper(env_config) else: from ray.rllib.agents.dqn.common.wrappers import wrap_dqn if args.algorithm == "DQN": env = UnityEnvWrapper(env_config) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(UnityEnvWrapper(env_config)) env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True) all_rewards = [] for episode in range(num_episodes): steps = 0 state = env.reset() done = False reward_total = 0.0 while not done: action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward steps += 1 state = next_state all_rewards.append(reward_total) print("Episode reward: %s. Episode steps: %s" % (reward_total, steps)) print("Mean Reward:", np.mean(all_rewards)) print("Max Reward:", np.max(all_rewards)) print("Min Reward:", np.min(all_rewards))
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = False sim_params.emission_path = './test_time_rollout/' # prepare for rendering if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # check if the environment is a single or multiagent environment, and # get the right address accordingly single_agent_envs = [ env for env in dir(flow.envs) if not env.startswith('__') ] if flow_params['env_name'] in single_agent_envs: env_loc = 'flow.envs' else: env_loc = 'flow.multiagent_envs' # Start the environment with the gui turned on and a path for the # emission file module = __import__(env_loc, fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sim_params=sim_params, scenario=scenario)) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def create_and_wrap(env_creator, options): env = env_creator() env = ModelCatalog.get_preprocessor_as_wrapper(env, options) #env = RayEnv(env) env = Diagnostic(env) return env