示例#1
0
    def __init__(self,
                 name,
                 env,
                 dynamics_model,
                 reward_model=None,
                 discount=1,
                 use_cem=False,
                 n_candidates=1024,
                 horizon=10,
                 num_cem_iters=8,
                 percent_elites=0.05,
                 use_reward_model=False):
        self.dynamics_model = dynamics_model
        self.reward_model = reward_model
        self.discount = discount
        self.n_candidates = n_candidates
        self.horizon = horizon
        self.use_cem = use_cem
        self.num_cem_iters = num_cem_iters
        self.percent_elites = percent_elites
        self.env = env
        self.use_reward_model = use_reward_model
        self._hidden_state = None

        self.unwrapped_env = env
        while hasattr(self.unwrapped_env, 'wrapped_env'):
            self.unwrapped_env = self.unwrapped_env.wrapped_env

        # make sure that env has reward function
        if not self.use_reward_model:
            assert hasattr(self.unwrapped_env,
                           'reward'), "env must have a reward function"

        Serializable.quick_init(self, locals())
示例#2
0
    def __init__(
        self,
        env,
        policy,
        num_rollouts,
        max_path_length,
        n_parallel=1,
        vae=None,
    ):
        Serializable.quick_init(self, locals())
        super(Sampler, self).__init__(env, policy, n_parallel, max_path_length)

        self.total_samples = num_rollouts * max_path_length
        self.n_parallel = n_parallel
        self.total_timesteps_sampled = 0
        self.vae = vae

        # setup vectorized environment

        if self.n_parallel > 1:
            self.vec_env = ParallelEnvExecutor(env, n_parallel, num_rollouts,
                                               self.max_path_length)
        else:
            self.vec_env = IterativeEnvExecutor(env, num_rollouts,
                                                self.max_path_length)
示例#3
0
    def __init__(self, advance_curriculum_func, start_index=None, **kwargs):
        """

        :param advance_curriculum_func: Either 'one_hot' or 'smooth' depending on whether you want each level of the
        curriculum to be a single environment or a distribution over past environments
        :param start_index: what index of the curriculum to start on
        :param kwargs: arguments for the environment
        """
        Serializable.quick_init(self, locals())
        self.advance_curriculum_func = advance_curriculum_func
        # List of all the levels.  There are actually a bunch more: some ones which were omitted since they were
        # very similar to the current ones (e.g. more Level_GoToLocal variants with different sizes and num dists)
        # also some harder levels with multiple instructions chained together.
        self.levels_list = [
            Level_GoToRedBallNoDists(**kwargs),  # 0
            Level_GoToRedBallGrey(**kwargs),  # 1
            Level_GoToRedBall(**kwargs),  # 2
            Level_GoToObjS4(**kwargs),  # 3
            Level_GoToObjS6(**kwargs),  # 4
            # Level_GoToObj(**kwargs),  # 5
            # Level_GoToLocalS5N2(**kwargs),  # 6
            # Level_GoToLocalS6N3(**kwargs),  # 7
            # Level_GoToLocalS7N4(**kwargs),  # 8
            # Level_GoToLocalS8N7(**kwargs),  # 9
            # Level_GoToLocal(**kwargs),  # 10
            # Level_PickupLocalS5N2(**kwargs),  # 11
            # Level_PickupLocalS6N3(**kwargs),  # 12
            # Level_PickupLocalS7N4(**kwargs),  # 13
            # Level_PickupLocalS8N7(**kwargs),  # 14
            # Level_PickupLocal(**kwargs), #  15
            # Level_PutNextLocalS5N3(**kwargs), # 16
            # Level_PutNextLocalS6N4(**kwargs),  # 17
            # Level_PutNextLocal(**kwargs),  # 18
            # Level_OpenLocalS5N3(**kwargs),  # 19
            # Level_OpenLocalS6N4(**kwargs),  # 20
            # Level_OpenLocal(**kwargs),  # 21
            # Level_GoToObjMazeOpen(**kwargs),  # 22
            # Level_GoToOpen(**kwargs),  # 23
            # Level_GoToObjMazeS4R2(**kwargs),  # 24
            # Level_GoToObjMazeS5(**kwargs),  # 25
            # Level_GoToObjMaze(**kwargs),  # 26
            # Level_Open(**kwargs),  # 27
            # Level_GoTo(**kwargs),  # 28
            # Level_Pickup(**kwargs),  # 29
            # Level_Unlock(**kwargs),  # 30
            # Level_GoToImpUnlock(**kwargs),  # 31
            # Level_PutNext(**kwargs),  # 32
            # Level_UnblockPickup(**kwargs),  # 33
        ]
        # If start index isn't specified, start from the beginning (if we're using the pre-levels), or start
        # from the end of the pre-levels.
        if start_index is None:
            start_index = 0
        self.distribution = np.zeros((len(self.levels_list)))
        self.distribution[start_index] = 1
        self._wrapped_env = self.levels_list[start_index]
        self.index = start_index
示例#4
0
    def __init__(self,
                 env,
                 scale_reward=1.,
                 normalize_obs=False,
                 normalize_reward=False,
                 obs_alpha=0.001,
                 reward_alpha=0.001,
                 normalization_scale=10.,
                 ):
        Serializable.quick_init(self, locals())

        self._wrapped_env = env
示例#5
0
    def __init__(self,
                 latent_dim,
                 img_size=(64, 64),
                 channels=3,
                 lr=1e-4,
                 step=0,
                 batch_size=32):
        """
        VAE Class

        Args:
            ds (int): dimension of the latent space
            img_size (tuple (int, int)): size of the image
            channels (int): number of channels [3 for rgb, 1 for grayscale]
            sess (tf.Session): tf.Session
            lr (float): learning rate
            out_dir (Path): output of the data directory
            step (int): initial training step
            batch_size (int): batch size
        """
        Serializable.quick_init(self, locals())

        self.latent_dim = latent_dim
        self.img_size = img_size
        self.n_channels = channels
        self.do = img_size[0] * img_size[1] * channels
        self.batch_shape = [-1, img_size[0], img_size[1], channels]
        self.lr = lr
        self.batch_size = batch_size

        self._assign_ops = None
        self._assign_phs = None

        self.writer = tf.summary.FileWriter(logger.get_dir())
        with tf.variable_scope('vae', reuse=tf.AUTO_REUSE):

            self.initialize_placeholders()
            self.initialize_objective()
            self.global_step = step

            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
                self.z = tf.placeholder(tf.float32, [None, self.latent_dim])
                self.decoder = self.decode_sym(self.z).probs

            current_scope = tf.get_default_graph().get_name_scope()
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.vae_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])
示例#6
0
    def __init__(self,
                 env,
                 vae=None,
                 use_img=True,
                 img_size=(64, 64, 3),
                 latent_dim=None,
                 time_steps=4):

        Serializable.quick_init(self, locals())
        assert len(img_size) == 3

        self._wrapped_env = env
        self._vae = vae
        self._use_img = use_img
        self._img_size = img_size
        self._num_chan = img_size[-1]
        self._latent_dim = latent_dim
        self._time_steps = time_steps
示例#7
0
    def __init__(self,
                 env,
                 scale_reward=1.,
                 normalize_obs=False,
                 normalize_reward=False,
                 obs_alpha=0.001,
                 reward_alpha=0.001,
                 normalization_scale=10.,
                 ceil_reward=False,
                 ):
        self.ceil_reward = ceil_reward
        Serializable.quick_init(self, locals())

        self._wrapped_env = env
        if isinstance(self._wrapped_env.action_space, Discrete):
            size = self._wrapped_env.action_space.n
        else:
            size = self._wrapped_env.action_space.shape
        self.prev_action = np.zeros(size)
        self.prev_reward = [0]
        self.prev_done = [0]
示例#8
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
示例#9
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     return d
示例#10
0
 def __setstate__(self, state):
     Serializable.__setstate__(self, state['init_args'])
     self.policy = state['policy']
示例#11
0
 def __getstate__(self):
     state = dict()
     state['init_args'] = Serializable.__getstate__(self)
     # dumps policy
     state['policy'] = self.policy.__getstate__()
     return state
示例#12
0
 def __setstate__(self, state):
     # LayersPowered.__setstate__(self, state)
     Serializable.__setstate__(self, state['init_args'])
示例#13
0
 def __getstate__(self):
     # state = LayersPowered.__getstate__(self)
     state = dict()
     state['init_args'] = Serializable.__getstate__(self)
     return state
示例#14
0
 def __setstate__(self, state):
     Serializable.__setstate__(self, state['init_args'])
     tf.get_default_session().run(tf.global_variables_initializer())
     self.set_params(state['network_params'])
示例#15
0
 def __getstate__(self):
     state = {
         'init_args': Serializable.__getstate__(self),
         'network_params': self.get_param_values()
     }
     return state