Exemplo n.º 1
0
    def __init__(self, simulated=False):
        """
        :param simulated: bool
                if the environment is for real robot or simulation
        """
        Serializable.quick_init(self, locals())

        np.random.RandomState(get_seed())

        self._initial_setup()
Exemplo n.º 2
0
 def populate_task(self, env, policy, scope=None):
     logger.log("Populating workers...")
     if singleton_pool.n_parallel > 1:
         singleton_pool.run_each(
             _worker_populate_task,
             [(pickle.dumps(env), pickle.dumps(policy), scope)] *
             singleton_pool.n_parallel)
     else:
         # avoid unnecessary copying
         g = parallel_sampler._get_scoped_g(singleton_pool.G, scope)
         g.env = env
         g.policy = policy
     parallel_sampler.set_seed(ext.get_seed())
     logger.log("Populated")
    def _build_graph(self, from_latent_input, from_obs_input):
        action_dim = self.action_space.flat_dim
        small = 1e-5

        with self._variable_scope:

            with tf.variable_scope("concat_latent_obs"):
                latent_obs_input = tf.concat(
                    [from_latent_input, from_obs_input], axis=-1)

            with tf.variable_scope("dist_params"):
                if self._std_share_network:
                    # mean and std networks share an MLP
                    b = np.concatenate([
                        np.zeros(action_dim),
                        np.full(action_dim, self._init_std_param)
                    ],
                                       axis=0)
                    b = tf.constant_initializer(b)
                    mean_std_network = mlp(
                        with_input=latent_obs_input,
                        output_dim=action_dim * 2,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        # hidden_w_init=tf.orthogonal_initializer(1.0),
                        # output_w_init=tf.orthogonal_initializer(1.0),
                        output_b_init=b,
                        name="mean_std_network")
                    with tf.variable_scope("mean_network"):
                        mean_network = mean_std_network[..., :action_dim]
                    with tf.variable_scope("std_network"):
                        std_network = mean_std_network[..., action_dim:]

                else:
                    # separate MLPs for mean and std networks
                    # mean network
                    mean_network = mlp(
                        with_input=latent_obs_input,
                        output_dim=action_dim,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        name="mean_network")

                    # std network
                    if self._adaptive_std:
                        b = tf.constant_initializer(self._init_std_param)
                        std_network = mlp(
                            with_input=latent_obs_input,
                            output_dim=action_dim,
                            hidden_sizes=self._std_hidden_sizes,
                            hidden_nonlinearity=self._std_hidden_nonlinearity,
                            output_nonlinearity=self._output_nonlinearity,
                            output_b_init=b,
                            name="std_network")
                    else:
                        p = tf.constant_initializer(self._init_std_param)
                        std_network = parameter(with_input=latent_obs_input,
                                                length=action_dim,
                                                initializer=p,
                                                trainable=self._learn_std,
                                                name="std_network")

                mean_var = mean_network
                std_param_var = std_network

                with tf.variable_scope("std_limits"):
                    if self._min_std_param:
                        std_param_var = tf.maximum(std_param_var,
                                                   self._min_std_param)
                    if self._max_std_param:
                        std_param_var = tf.minimum(std_param_var,
                                                   self._max_std_param)

            with tf.variable_scope("std_parameterization"):
                # build std_var with std parameterization
                if self._std_parameterization == "exp":
                    std_var = tf.exp(std_param_var)
                elif self._std_parameterization == "softplus":
                    std_var = tf.log(1. + tf.exp(std_param_var))
                else:
                    raise NotImplementedError

            dist = tf.contrib.distributions.MultivariateNormalDiag(
                mean_var, std_var)

            action_var = dist.sample(seed=ext.get_seed())

            return action_var, mean_var, std_param_var, dist
Exemplo n.º 4
0
    def _build(self, state_input):
        action_dim = self._output_dim

        with tf.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable
                b = tf.constant_initializer(b)
                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    output_b_init=b,
                    name='mean_std_network')
                with tf.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.variable_scope('std_network'):
                    std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    name='mean_network')

                # std network
                if self._adaptive_std:
                    b = tf.constant_initializer(self._init_std_param)
                    std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_b_init=b,
                        name='std_network')
                else:
                    p = tf.constant_initializer(self._init_std_param)
                    std_network = parameter(state_input,
                                            length=action_dim,
                                            initializer=p,
                                            trainable=self._learn_std,
                                            name='std_network')

        mean_var = mean_network
        log_std_var = std_network

        with tf.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                pass
            elif self._std_parameterization == 'softplus':
                softplus_std_var = tf.log(1. + tf.exp(log_std_var))
                log_std_var = tf.log(softplus_std_var)
            else:
                raise NotImplementedError

        with tf.variable_scope('std_limits'):
            if self._min_std_param:
                log_std_var = tf.maximum(log_std_var, self._min_std_param)
            if self._max_std_param:
                log_std_var = tf.minimum(log_std_var, self._max_std_param)

        distribution = tfp.distributions.MultivariateNormalDiag(
            mean_var, tf.exp(log_std_var))

        action_var = distribution.sample(seed=ext.get_seed())

        return action_var, log_std_var, distribution
Exemplo n.º 5
0
    def _build(self, state_input):
        action_dim = self._output_dim

        with tf.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable
                b = tf.constant_initializer(b)
                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    output_b_init=b,
                    name='mean_std_network')
                with tf.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.variable_scope('std_network'):
                    std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    name='mean_network')

                # std network
                if self._adaptive_std:
                    b = tf.constant_initializer(self._init_std_param)
                    std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_b_init=b,
                        name='std_network')
                else:
                    p = tf.constant_initializer(self._init_std_param)
                    std_network = parameter(state_input,
                                            length=action_dim,
                                            initializer=p,
                                            trainable=self._learn_std,
                                            name='std_network')

        mean_var = mean_network
        std_param_var = std_network

        with tf.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                std_param_var = std_param_var
            elif self._std_parameterization == 'softplus':
                std_param_var = tf.log(1. + tf.exp(std_param_var))
            else:
                raise NotImplementedError

        with tf.variable_scope('std_limits'):
            if self._min_std_param:
                std_var = tf.maximum(std_param_var, self._min_std_param)
            if self._max_std_param:
                std_var = tf.minimum(std_param_var, self._max_std_param)

        dist = DiagonalGaussian(action_dim)

        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:],
                               seed=ext.get_seed())
        action_var = rnd * tf.exp(std_var) + mean_var

        return action_var, mean_var, std_var, std_param_var, dist