def __init__(self, name, env, dynamics_model, reward_model=None, discount=1, use_cem=False, n_candidates=1024, horizon=10, num_cem_iters=8, percent_elites=0.05, use_reward_model=False): self.dynamics_model = dynamics_model self.reward_model = reward_model self.discount = discount self.n_candidates = n_candidates self.horizon = horizon self.use_cem = use_cem self.num_cem_iters = num_cem_iters self.percent_elites = percent_elites self.env = env self.use_reward_model = use_reward_model self._hidden_state = None self.unwrapped_env = env while hasattr(self.unwrapped_env, 'wrapped_env'): self.unwrapped_env = self.unwrapped_env.wrapped_env # make sure that env has reward function if not self.use_reward_model: assert hasattr(self.unwrapped_env, 'reward'), "env must have a reward function" Serializable.quick_init(self, locals())
def __init__( self, env, policy, num_rollouts, max_path_length, n_parallel=1, vae=None, ): Serializable.quick_init(self, locals()) super(Sampler, self).__init__(env, policy, n_parallel, max_path_length) self.total_samples = num_rollouts * max_path_length self.n_parallel = n_parallel self.total_timesteps_sampled = 0 self.vae = vae # setup vectorized environment if self.n_parallel > 1: self.vec_env = ParallelEnvExecutor(env, n_parallel, num_rollouts, self.max_path_length) else: self.vec_env = IterativeEnvExecutor(env, num_rollouts, self.max_path_length)
def __init__(self, advance_curriculum_func, start_index=None, **kwargs): """ :param advance_curriculum_func: Either 'one_hot' or 'smooth' depending on whether you want each level of the curriculum to be a single environment or a distribution over past environments :param start_index: what index of the curriculum to start on :param kwargs: arguments for the environment """ Serializable.quick_init(self, locals()) self.advance_curriculum_func = advance_curriculum_func # List of all the levels. There are actually a bunch more: some ones which were omitted since they were # very similar to the current ones (e.g. more Level_GoToLocal variants with different sizes and num dists) # also some harder levels with multiple instructions chained together. self.levels_list = [ Level_GoToRedBallNoDists(**kwargs), # 0 Level_GoToRedBallGrey(**kwargs), # 1 Level_GoToRedBall(**kwargs), # 2 Level_GoToObjS4(**kwargs), # 3 Level_GoToObjS6(**kwargs), # 4 # Level_GoToObj(**kwargs), # 5 # Level_GoToLocalS5N2(**kwargs), # 6 # Level_GoToLocalS6N3(**kwargs), # 7 # Level_GoToLocalS7N4(**kwargs), # 8 # Level_GoToLocalS8N7(**kwargs), # 9 # Level_GoToLocal(**kwargs), # 10 # Level_PickupLocalS5N2(**kwargs), # 11 # Level_PickupLocalS6N3(**kwargs), # 12 # Level_PickupLocalS7N4(**kwargs), # 13 # Level_PickupLocalS8N7(**kwargs), # 14 # Level_PickupLocal(**kwargs), # 15 # Level_PutNextLocalS5N3(**kwargs), # 16 # Level_PutNextLocalS6N4(**kwargs), # 17 # Level_PutNextLocal(**kwargs), # 18 # Level_OpenLocalS5N3(**kwargs), # 19 # Level_OpenLocalS6N4(**kwargs), # 20 # Level_OpenLocal(**kwargs), # 21 # Level_GoToObjMazeOpen(**kwargs), # 22 # Level_GoToOpen(**kwargs), # 23 # Level_GoToObjMazeS4R2(**kwargs), # 24 # Level_GoToObjMazeS5(**kwargs), # 25 # Level_GoToObjMaze(**kwargs), # 26 # Level_Open(**kwargs), # 27 # Level_GoTo(**kwargs), # 28 # Level_Pickup(**kwargs), # 29 # Level_Unlock(**kwargs), # 30 # Level_GoToImpUnlock(**kwargs), # 31 # Level_PutNext(**kwargs), # 32 # Level_UnblockPickup(**kwargs), # 33 ] # If start index isn't specified, start from the beginning (if we're using the pre-levels), or start # from the end of the pre-levels. if start_index is None: start_index = 0 self.distribution = np.zeros((len(self.levels_list))) self.distribution[start_index] = 1 self._wrapped_env = self.levels_list[start_index] self.index = start_index
def __init__(self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=10., ): Serializable.quick_init(self, locals()) self._wrapped_env = env
def __init__(self, latent_dim, img_size=(64, 64), channels=3, lr=1e-4, step=0, batch_size=32): """ VAE Class Args: ds (int): dimension of the latent space img_size (tuple (int, int)): size of the image channels (int): number of channels [3 for rgb, 1 for grayscale] sess (tf.Session): tf.Session lr (float): learning rate out_dir (Path): output of the data directory step (int): initial training step batch_size (int): batch size """ Serializable.quick_init(self, locals()) self.latent_dim = latent_dim self.img_size = img_size self.n_channels = channels self.do = img_size[0] * img_size[1] * channels self.batch_shape = [-1, img_size[0], img_size[1], channels] self.lr = lr self.batch_size = batch_size self._assign_ops = None self._assign_phs = None self.writer = tf.summary.FileWriter(logger.get_dir()) with tf.variable_scope('vae', reuse=tf.AUTO_REUSE): self.initialize_placeholders() self.initialize_objective() self.global_step = step with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): self.z = tf.placeholder(tf.float32, [None, self.latent_dim]) self.decoder = self.decode_sym(self.z).probs current_scope = tf.get_default_graph().get_name_scope() trainable_policy_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope) self.vae_params = OrderedDict([ (remove_scope_from_name(var.name, current_scope), var) for var in trainable_policy_vars ])
def __init__(self, env, vae=None, use_img=True, img_size=(64, 64, 3), latent_dim=None, time_steps=4): Serializable.quick_init(self, locals()) assert len(img_size) == 3 self._wrapped_env = env self._vae = vae self._use_img = use_img self._img_size = img_size self._num_chan = img_size[-1] self._latent_dim = latent_dim self._time_steps = time_steps
def __init__(self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=10., ceil_reward=False, ): self.ceil_reward = ceil_reward Serializable.quick_init(self, locals()) self._wrapped_env = env if isinstance(self._wrapped_env.action_space, Discrete): size = self._wrapped_env.action_space.n else: size = self._wrapped_env.action_space.shape self.prev_action = np.zeros(size) self.prev_reward = [0] self.prev_done = [0]
def __setstate__(self, d): Serializable.__setstate__(self, d)
def __getstate__(self): d = Serializable.__getstate__(self) return d
def __setstate__(self, state): Serializable.__setstate__(self, state['init_args']) self.policy = state['policy']
def __getstate__(self): state = dict() state['init_args'] = Serializable.__getstate__(self) # dumps policy state['policy'] = self.policy.__getstate__() return state
def __setstate__(self, state): # LayersPowered.__setstate__(self, state) Serializable.__setstate__(self, state['init_args'])
def __getstate__(self): # state = LayersPowered.__getstate__(self) state = dict() state['init_args'] = Serializable.__getstate__(self) return state
def __setstate__(self, state): Serializable.__setstate__(self, state['init_args']) tf.get_default_session().run(tf.global_variables_initializer()) self.set_params(state['network_params'])
def __getstate__(self): state = { 'init_args': Serializable.__getstate__(self), 'network_params': self.get_param_values() } return state