def __init__(self, config, policy_params, env_name, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = gym.make(env_name) self.sess = utils.make_session(single_threaded=True) self.policy = policies.MujocoPolicy(self.env.observation_space, self.env.action_space, **policy_params) tf_util.initialize() self.rs = np.random.RandomState() assert self.policy.needs_ob_stat == (self.config.calc_obstat_prob != 0)
} # Create the shared noise table. print("Creating shared noise table.") noise_array = create_shared_noise() noise = SharedNoiseTable(noise_array) # Create the workers. print("Creating workers.") workers = [ Worker(config, policy_params, env_name, noise_array) for _ in range(num_workers) ] env = gym.make(env_name) sess = utils.make_session(single_threaded=False) policy = policies.MujocoPolicy(env.observation_space, env.action_space, **policy_params) tf_util.initialize() optimizer = optimizers.Adam(policy, stepsize) ob_stat = utils.RunningStat(env.observation_space.shape, eps=1e-2) episodes_so_far = 0 timesteps_so_far = 0 tstart = time.time() while True: step_tstart = time.time() theta = policy.get_trainable_flat() assert theta.dtype == np.float32