def _init(self): policy_params = {"action_noise_std": 0.01} env = self.env_creator(self.config["env_config"]) preprocessor = ModelCatalog.get_preprocessor(self.registry, env) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(self.registry, self.sess, env.action_space, preprocessor, self.config["observation_filter"], **policy_params) self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"]) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote(self.config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(self.registry, self.config, policy_params, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time()
def __init__(self, config, policy_params, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator() self.preprocessor = ModelCatalog.get_preprocessor( self.env.spec.id, self.env.observation_space.shape) self.preprocessor_shape = self.preprocessor.transform_shape( self.env.observation_space.shape) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy(self.env.observation_space, self.env.action_space, self.preprocessor, **policy_params) tf_util.initialize() self.rs = np.random.RandomState() assert (self.policy.needs_ob_stat == (self.config["calc_obstat_prob"] != 0))
def _init(self): policy_params = {"ac_noise_std": 0.01} env = self.env_creator() preprocessor = ModelCatalog.get_preprocessor( env.spec.id, env.observation_space.shape) preprocessor_shape = preprocessor.transform_shape( env.observation_space.shape) self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy(env.observation_space, env.action_space, preprocessor, **policy_params) tf_util.initialize() self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"]) self.ob_stat = utils.RunningStat(preprocessor_shape, eps=1e-2) # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. print("Creating actors.") self.workers = [ Worker.remote(self.config, policy_params, self.env_creator, noise_id) for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.tstart = time.time()
def _init(self): env = self.env_creator(self.config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(self.registry, env) self.timesteps = 0 self.num_deltas = self.config["num_deltas"] self.deltas_used = self.config["deltas_used"] self.step_size = self.config["sgd_stepsize"] self.delta_std = self.config["delta_std"] seed = self.config["seed"] self.shift = self.config["shift"] self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # Create the shared noise table. print("Creating shared noise table.") noise_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(noise_id), seed=seed + 3) # Create the actors. print("Creating actors.") self.num_workers = self.config["num_workers"] self.workers = [ Worker.remote(self.registry, self.config, self.env_creator, seed + 7 * i, deltas=noise_id, rollout_length=env.spec.max_episode_steps, delta_std=self.delta_std) for i in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.timesteps_so_far = 0 self.sess = utils.make_session(single_threaded=False) # initialize policy if self.config['policy'] == 'MLP': self.policy = MLPPolicy(self.registry, self.sess, env.action_space, preprocessor, self.config["observation_filter"]) else: self.policy = LinearPolicy(self.registry, self.sess, env.action_space, preprocessor, self.config["observation_filter"]) self.w_policy = self.policy.get_weights() # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.config["sgd_stepsize"]) print("Initialization of ARS complete.")
def __init__(self, registry, config, policy_params, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env) self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy( registry, self.sess, self.env.action_space, self.preprocessor, config["observation_filter"], **policy_params)
def __init__(self, registry, config, env_creator, env_seed, deltas=None, rollout_length=1000, delta_std=0.02): # initialize OpenAI environment for each worker self.env = env_creator(config["env_config"]) self.env.seed(env_seed) from ray.rllib import models self.preprocessor = models.ModelCatalog.get_preprocessor( registry, self.env) # each worker gets access to the shared noise table # with independent random streams for sampling # from the shared noise table. self.deltas = SharedNoiseTable(deltas, env_seed + 7) from ray.rllib import models self.preprocessor = models.ModelCatalog.get_preprocessor( registry, self.env) self.delta_std = delta_std self.rollout_length = rollout_length self.sess = utils.make_session(single_threaded=True) if config['policy'] == 'Linear': self.policy = LinearPolicy(registry, self.sess, self.env.action_space, self.preprocessor, config["observation_filter"]) else: self.policy = MLPPolicy(registry, self.sess, self.env.action_space, self.preprocessor, config["observation_filter"])