Пример #1
0
    def _build(self,
               obs_input,
               step_obs_input,
               step_hidden,
               step_cell,
               name=None):
        return_var = tf.compat.v1.get_variable(
            'return_var', (), initializer=tf.constant_initializer(0.5))
        mean = log_std = tf.fill(
            (tf.shape(obs_input)[0], tf.shape(obs_input)[1], self.output_dim),
            return_var)
        step_mean = step_log_std = tf.fill(
            (tf.shape(step_obs_input)[0], self.output_dim), return_var)

        hidden_init_var = tf.compat.v1.get_variable(
            name='initial_hidden',
            shape=(self.hidden_dim, ),
            initializer=tf.zeros_initializer(),
            trainable=False,
            dtype=tf.float32)
        cell_init_var = tf.compat.v1.get_variable(
            name='initial_cell',
            shape=(self.hidden_dim, ),
            initializer=tf.zeros_initializer(),
            trainable=False,
            dtype=tf.float32)

        dist = DiagonalGaussian(self.output_dim)
        # sample = 0.5 * 0.5 + 0.5 = 0.75
        return (mean, step_mean, log_std, step_log_std, step_hidden, step_cell,
                hidden_init_var, cell_init_var, dist)
Пример #2
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        action_dim = self._output_dim

        with tf.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var = parameter(state_input,
                                        length=action_dim,
                                        initializer=tf.constant_initializer(
                                            self._init_std_param),
                                        trainable=self._learn_std,
                                        name='log_std_param')
                step_log_std_var = parameter(
                    step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='step_log_std_param')

        dist = DiagonalGaussian(self._output_dim)
        rnd = tf.random.normal(shape=step_mean_var.get_shape().as_list()[1:])
        action_var = rnd * tf.exp(step_log_std_var) + step_mean_var

        return (action_var, mean_var, step_mean_var, log_std_var,
                step_log_std_var, step_hidden, hidden_init_var, dist)
Пример #3
0
 def _build(self, obs_input, name=None):
     return_var = tf.get_variable('return_var', (),
                                  initializer=tf.constant_initializer(0.5))
     mean = tf.fill((tf.shape(obs_input)[0], self.output_dim), return_var)
     log_std = tf.fill((tf.shape(obs_input)[0], self.output_dim), 0.5)
     action = mean + log_std * 0.5
     dist = DiagonalGaussian(self.output_dim)
     # action will be 0.5 + 0.5 * 0.5 = 0.75
     return action, mean, log_std, log_std, dist
    def _build(self,
               obs_input,
               step_obs_input,
               step_hidden,
               step_cell,
               name=None):
        """Build model given input placeholder(s).

        Args:
            obs_input (tf.Tensor): Place holder for entire time-series
                inputs.
            step_obs_input (tf.Tensor): Place holder for step inputs.
            step_hidden (tf.Tensor): Place holder for step hidden state.
            step_cell (tf.Tensor): Place holder for step cell state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Entire time-series means.
            tf.Tensor: Step mean.
            tf.Tensor: Entire time-series log std.
            tf.Tensor: Step log std.
            tf.Tensor: Step hidden state.
            tf.Tensor: Step cell state.
            tf.Tensor: Initial hidden state.
            tf.Tensor: Initial cell state.
            garage.distributions.DiagonalGaussian: Distribution.

        """
        del name
        return_var = tf.compat.v1.get_variable(
            'return_var', (), initializer=tf.constant_initializer(0.5))
        mean = log_std = tf.fill(
            (tf.shape(obs_input)[0], tf.shape(obs_input)[1], self.output_dim),
            return_var)
        step_mean = step_log_std = tf.fill(
            (tf.shape(step_obs_input)[0], self.output_dim), return_var)

        hidden_init_var = tf.compat.v1.get_variable(
            name='initial_hidden',
            shape=(self.hidden_dim, ),
            initializer=tf.zeros_initializer(),
            trainable=False,
            dtype=tf.float32)
        cell_init_var = tf.compat.v1.get_variable(
            name='initial_cell',
            shape=(self.hidden_dim, ),
            initializer=tf.zeros_initializer(),
            trainable=False,
            dtype=tf.float32)

        dist = DiagonalGaussian(self.output_dim)
        # sample = 0.5 * 0.5 + 0.5 = 0.75
        return (mean, step_mean, log_std, step_log_std, step_hidden, step_cell,
                hidden_init_var, cell_init_var, dist)
Пример #5
0
    def _build(self, obs_input, name=None):
        """Build model.

        Args:
            obs_input (tf.Tensor): Entire time-series observation input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tf.tensor: Mean.
            tf.Tensor: Log of standard deviation.
            garage.distributions.DiagonalGaussian: Distribution.

        """
        del name
        return_var = tf.compat.v1.get_variable(
            'return_var', (), initializer=tf.constant_initializer(0.5))
        mean = tf.fill((tf.shape(obs_input)[0], self.output_dim), return_var)
        log_std = tf.fill((tf.shape(obs_input)[0], self.output_dim),
                          np.log(0.5))
        dist = DiagonalGaussian(self.output_dim)
        # action will be 0.5 + 0.5 * 0.5 = 0.75
        return mean, log_std, dist
Пример #6
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 name="GaussianMLPRegressor",
                 mean_network=None,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.tanh,
                 optimizer=None,
                 optimizer_args=None,
                 use_trust_region=True,
                 max_kl_step=0.01,
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 std_nonlinearity=None,
                 normalize_inputs=True,
                 normalize_outputs=True,
                 subsample_factor=1.0):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean
         network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
         mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param max_kl_step: KL divergence constraint for each iteration
        :param learn_std: Whether to learn the standard deviations. Only
         effective if adaptive_std is False. If adaptive_std is True, this
         parameter is ignored, and the weights for the std network are always
         earned.
        :param adaptive_std: Whether to make the std a function of the states.
        :param std_share_network: Whether to use the same network as the mean.
        :param std_hidden_sizes: Number of hidden units of each layer of the
         std network. Only used if `std_share_network` is False. It defaults to
         the same architecture as the mean.
        :param std_nonlinearity: Non-linearity used for each layer of the std
         network. Only used if `std_share_network` is False. It defaults to the
         same non-linearity as the mean.
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        self._mean_network_name = "mean_network"
        self._std_network_name = "std_network"

        with tf.variable_scope(name):
            if optimizer_args is None:
                optimizer_args = dict()

            if optimizer is None:
                if use_trust_region:
                    optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
                else:
                    optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            self._optimizer = optimizer
            self._subsample_factor = subsample_factor

            if mean_network is None:
                if std_share_network:
                    mean_network = MLP(
                        name="mean_network",
                        input_shape=input_shape,
                        output_dim=2 * output_dim,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=None,
                    )
                    l_mean = L.SliceLayer(
                        mean_network.output_layer,
                        slice(output_dim),
                        name="mean_slice",
                    )
                else:
                    mean_network = MLP(
                        name="mean_network",
                        input_shape=input_shape,
                        output_dim=output_dim,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=None,
                    )
                    l_mean = mean_network.output_layer

            if adaptive_std:
                l_log_std = MLP(
                    name="log_std_network",
                    input_shape=input_shape,
                    input_var=mean_network.input_layer.input_var,
                    output_dim=output_dim,
                    hidden_sizes=std_hidden_sizes,
                    hidden_nonlinearity=std_nonlinearity,
                    output_nonlinearity=None,
                ).output_layer
            elif std_share_network:
                l_log_std = L.SliceLayer(
                    mean_network.output_layer,
                    slice(output_dim, 2 * output_dim),
                    name="log_std_slice",
                )
            else:
                l_log_std = L.ParamLayer(
                    mean_network.input_layer,
                    num_units=output_dim,
                    param=tf.constant_initializer(np.log(init_std)),
                    name="output_log_std",
                    trainable=learn_std,
                )

            LayersPowered.__init__(self, [l_mean, l_log_std])

            xs_var = mean_network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32,
                                    name="ys",
                                    shape=(None, output_dim))
            old_means_var = tf.placeholder(dtype=tf.float32,
                                           name="ys",
                                           shape=(None, output_dim))
            old_log_stds_var = tf.placeholder(dtype=tf.float32,
                                              name="old_log_stds",
                                              shape=(None, output_dim))

            x_mean_var = tf.Variable(
                np.zeros((1, ) + input_shape, dtype=np.float32),
                name="x_mean",
            )
            x_std_var = tf.Variable(
                np.ones((1, ) + input_shape, dtype=np.float32),
                name="x_std",
            )
            y_mean_var = tf.Variable(
                np.zeros((1, output_dim), dtype=np.float32),
                name="y_mean",
            )
            y_std_var = tf.Variable(
                np.ones((1, output_dim), dtype=np.float32),
                name="y_std",
            )

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var
            normalized_ys_var = (ys_var - y_mean_var) / y_std_var

            with tf.name_scope(self._mean_network_name,
                               values=[normalized_xs_var]):
                normalized_means_var = L.get_output(
                    l_mean, {mean_network.input_layer: normalized_xs_var})
            with tf.name_scope(self._std_network_name,
                               values=[normalized_xs_var]):
                normalized_log_stds_var = L.get_output(
                    l_log_std, {mean_network.input_layer: normalized_xs_var})

            means_var = normalized_means_var * y_std_var + y_mean_var
            log_stds_var = normalized_log_stds_var + tf.log(y_std_var)

            normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var
            normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var)

            dist = self._dist = DiagonalGaussian(output_dim)

            normalized_dist_info_vars = dict(mean=normalized_means_var,
                                             log_std=normalized_log_stds_var)

            mean_kl = tf.reduce_mean(
                dist.kl_sym(
                    dict(mean=normalized_old_means_var,
                         log_std=normalized_old_log_stds_var),
                    normalized_dist_info_vars,
                ))

            loss = -tf.reduce_mean(
                dist.log_likelihood_sym(normalized_ys_var,
                                        normalized_dist_info_vars))

            self._f_predict = tensor_utils.compile_function([xs_var],
                                                            means_var)
            self._f_pdists = tensor_utils.compile_function(
                [xs_var], [means_var, log_stds_var])
            self._l_mean = l_mean
            self._l_log_std = l_log_std

            optimizer_args = dict(
                loss=loss,
                target=self,
                network_outputs=[
                    normalized_means_var, normalized_log_stds_var
                ],
            )

            if use_trust_region:
                optimizer_args["leq_constraint"] = (mean_kl, max_kl_step)
                optimizer_args["inputs"] = [
                    xs_var, ys_var, old_means_var, old_log_stds_var
                ]
            else:
                optimizer_args["inputs"] = [xs_var, ys_var]

            self._optimizer.update_opt(**optimizer_args)

            self._use_trust_region = use_trust_region
            self._name = name

            self._normalize_inputs = normalize_inputs
            self._normalize_outputs = normalize_outputs
            self._mean_network = mean_network
            self._x_mean_var = x_mean_var
            self._x_std_var = x_std_var
            self._y_mean_var = y_mean_var
            self._y_std_var = y_std_var

            # Optionally create assign operations for normalization
            if self._normalize_inputs:
                self._x_mean_var_ph = tf.placeholder(
                    shape=(1, ) + input_shape,
                    dtype=tf.float32,
                )
                self._x_std_var_ph = tf.placeholder(
                    shape=(1, ) + input_shape,
                    dtype=tf.float32,
                )
                self._assign_x_mean = tf.assign(self._x_mean_var,
                                                self._x_mean_var_ph)
                self._assign_x_std = tf.assign(self._x_std_var,
                                               self._x_std_var_ph)
            if self._normalize_outputs:
                self._y_mean_var_ph = tf.placeholder(
                    shape=(1, output_dim),
                    dtype=tf.float32,
                )
                self._y_std_var_ph = tf.placeholder(
                    shape=(1, output_dim),
                    dtype=tf.float32,
                )
                self._assign_y_mean = tf.assign(self._y_mean_var,
                                                self._y_mean_var_ph)
                self._assign_y_std = tf.assign(self._y_std_var,
                                               self._y_std_var_ph)
Пример #7
0
    def _build(self, state_input):
        action_dim = self._output_dim

        with tf.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.log(1. + tf.exp(std_param))

        with tf.variable_scope('std_limits'):
            if self._min_std_param is not None:
                log_std_var = tf.maximum(log_std_var, self._min_std_param)
            if self._max_std_param is not None:
                log_std_var = tf.minimum(log_std_var, self._max_std_param)

        dist = DiagonalGaussian(self._output_dim)

        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:],
                               seed=deterministic.get_seed())
        action_var = rnd * tf.exp(log_std_var) + mean_var

        return action_var, mean_var, log_std_var, std_param, dist
Пример #8
0
    def __init__(self,
                 env_spec,
                 name=None,
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parametrization='exp'):
        """
        :param env_spec:
        :param hidden_sizes: list of sizes for the fully-connected hidden
        layers
        :param learn_std: Is std trainable
        :param init_std: Initial std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
         for std
        :param min_std: whether to make sure that the std is at least some
         threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parametrization: how the std should be parametrized. There
         are a few options:
            - exp: the logarithm of the std will be stored, and applied a
             exponential transformation
            - softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        Serializable.quick_init(self, locals())
        assert isinstance(env_spec.action_space, Box)
        self.name = name
        self._mean_network_name = "mean_network"
        self._std_network_name = "std_network"

        with tf.variable_scope(name, "GaussianMLPPolicy"):

            obs_dim = env_spec.observation_space.flat_dim
            action_dim = env_spec.action_space.flat_dim

            # create network
            if mean_network is None:
                if std_share_network:
                    if std_parametrization == "exp":
                        init_std_param = np.log(init_std)
                    elif std_parametrization == "softplus":
                        init_std_param = np.log(np.exp(init_std) - 1)
                    else:
                        raise NotImplementedError
                    init_b = tf.constant_initializer(init_std_param)
                    with tf.variable_scope(self._mean_network_name):
                        mean_network = MLP(
                            name="mlp",
                            input_shape=(obs_dim, ),
                            output_dim=2 * action_dim,
                            hidden_sizes=hidden_sizes,
                            hidden_nonlinearity=hidden_nonlinearity,
                            output_nonlinearity=output_nonlinearity,
                            output_b_init=init_b,
                        )
                        l_mean = L.SliceLayer(
                            mean_network.output_layer,
                            slice(action_dim),
                            name="mean_slice",
                        )
                else:
                    mean_network = MLP(
                        name=self._mean_network_name,
                        input_shape=(obs_dim, ),
                        output_dim=action_dim,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity,
                    )
                    l_mean = mean_network.output_layer
            self._mean_network = mean_network

            obs_var = mean_network.input_layer.input_var

            if std_network is not None:
                l_std_param = std_network.output_layer
            else:
                if adaptive_std:
                    std_network = MLP(
                        name=self._std_network_name,
                        input_shape=(obs_dim, ),
                        input_layer=mean_network.input_layer,
                        output_dim=action_dim,
                        hidden_sizes=std_hidden_sizes,
                        hidden_nonlinearity=std_hidden_nonlinearity,
                        output_nonlinearity=None,
                    )
                    l_std_param = std_network.output_layer
                elif std_share_network:
                    with tf.variable_scope(self._std_network_name):
                        l_std_param = L.SliceLayer(
                            mean_network.output_layer,
                            slice(action_dim, 2 * action_dim),
                            name="std_slice",
                        )
                else:
                    if std_parametrization == 'exp':
                        init_std_param = np.log(init_std)
                    elif std_parametrization == 'softplus':
                        init_std_param = np.log(np.exp(init_std) - 1)
                    else:
                        raise NotImplementedError
                    with tf.variable_scope(self._std_network_name):
                        l_std_param = L.ParamLayer(
                            mean_network.input_layer,
                            num_units=action_dim,
                            param=tf.constant_initializer(init_std_param),
                            name="output_std_param",
                            trainable=learn_std,
                        )

            self.std_parametrization = std_parametrization

            if std_parametrization == 'exp':
                min_std_param = np.log(min_std)
            elif std_parametrization == 'softplus':
                min_std_param = np.log(np.exp(min_std) - 1)
            else:
                raise NotImplementedError

            self.min_std_param = min_std_param

            # mean_var, log_std_var = L.get_output([l_mean, l_std_param])
            #
            # if self.min_std_param is not None:
            #     log_std_var = tf.maximum(log_std_var, np.log(min_std))
            #
            # self._mean_var, self._log_std_var = mean_var, log_std_var

            self._l_mean = l_mean
            self._l_std_param = l_std_param

            self._dist = DiagonalGaussian(action_dim)

            LayersPowered.__init__(self, [l_mean, l_std_param])
            super(GaussianMLPPolicy, self).__init__(env_spec)

            dist_info_sym = self.dist_info_sym(
                mean_network.input_layer.input_var, dict())
            mean_var = tf.identity(dist_info_sym["mean"], name="mean")
            log_std_var = tf.identity(dist_info_sym["log_std"],
                                      name="standard_dev")

            self._f_dist = tensor_utils.compile_function(
                inputs=[obs_var],
                outputs=[mean_var, log_std_var],
            )
    def __init__(self,
                 env_spec,
                 embedding,
                 task_space,
                 name="GaussianMLPMultitaskPolicy",
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parameterization='exp'):
        """
        :param env_spec: observation space is a concatenation of task space and
          vanilla env observation space
        :param hidden_sizes: list of sizes for the fully-connected hidden
          layers
        :param learn_std: Is std trainable
        :param init_std: Initial std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
          for std
        :param min_std: whether to make sure that the std is at least some
          threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parametrization: how the std should be parametrized. There
          are a few options:
            - exp: the logarithm of the std will be stored, and applied a
                exponential transformation
            - softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        assert isinstance(env_spec.action_space, Box)
        StochasticMultitaskPolicy.__init__(self, env_spec, embedding,
                                           task_space)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        if mean_network or std_network:
            raise NotImplementedError

        self.name = name
        self._variable_scope = tf.variable_scope(self.name,
                                                 reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # TODO: eliminate
        self._dist = DiagonalGaussian(self.action_space.flat_dim)

        # Network parameters
        self._hidden_sizes = hidden_sizes
        self._learn_std = learn_std
        self._init_std = init_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._std_hidden_sizes = std_hidden_sizes
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._output_nonlinearity = output_nonlinearity
        self._mean_network = mean_network
        self._std_network = std_network
        self._std_parameterization = std_parameterization

        # Tranform std arguments to parameterized space
        self._init_std_param = None
        self._min_std_param = None
        self._max_std_param = None
        if self._std_parameterization == 'exp':
            self._init_std_param = np.log(init_std)
            if min_std:
                self._min_std_param = np.log(min_std)
            if max_std:
                self._max_std_param = np.log(max_std)
        elif self._std_parameterization == 'softplus':
            self._init_std_param = np.log(np.exp(init_std) - 1)
            if min_std:
                self._min_std_param = np.log(np.exp(min_std) - 1)
            if max_std:
                self._max_std_param = np.log(np.exp(max_std) - 1)
        else:
            raise NotImplementedError

        # Build default graph
        with self._name_scope:
            # inputs
            self._task_input = self._embedding._input
            self._latent_input = self.latent_space.new_tensor_variable(
                name="latent_input", extra_dims=1)
            self._obs_input = self.observation_space.new_tensor_variable(
                name="obs_input", extra_dims=1)

            with tf.name_scope("default",
                               values=[self._task_input, self._obs_input]):
                # network (connect with embedding)
                latent = self._embedding.latent
                latent_mean = self._embedding.latent_mean
                latent_std_param = self._embedding.latent_std_param
                action_var, mean_var, std_param_var, dist = self._build_graph(
                    latent, self._obs_input)

                # outputs
                self._action = tf.identity(action_var, name="action")
                self._action_mean = tf.identity(mean_var, name="action_mean")
                self._action_std_param = tf.identity(std_param_var,
                                                     "action_std_param")
                self._action_distribution = dist

            # special auxiliary graph for feedforward using only latents
            with tf.name_scope("from_latent",
                               values=[self._latent_input, self._obs_input]):
                action_var, mean_var, std_param_var, dist = self._build_graph(
                    self._latent_input, self._obs_input)

                # auxiliary outputs
                self._action_from_latent = action_var
                self._action_mean_from_latent = mean_var
                self._action_std_param_from_latent = std_param_var
                self._action_distribution_from_latent = dist

            # compiled functions
            with tf.variable_scope("f_dist_task_obs"):
                self.f_dist_task_obs = tensor_utils.compile_function(
                    inputs=[self._task_input, self._obs_input],
                    outputs=[
                        self._action, self._action_mean,
                        self._action_std_param, latent, latent_mean,
                        latent_std_param
                    ],
                )
            with tf.variable_scope("f_dist_latent_obs"):
                self.f_dist_latent_obs = tensor_utils.compile_function(
                    inputs=[self._latent_input, self._obs_input],
                    outputs=[
                        self._action_from_latent,
                        self._action_mean_from_latent,
                        self._action_std_param_from_latent
                    ],
                )
Пример #10
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for entire time-series
                inputs.
            step_input (tf.Tensor): Place holder for step inputs.
            hidden_input (tf.Tensor): Place holder for step hidden state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Entire time-series means.
            tf.Tensor: Step mean.
            tf.Tensor: Entire time-series std_log.
            tf.Tensor: Step std_log.
            tf.Tensor: Step hidden state.
            tf.Tensor: Initial hidden state.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, hidden_init_var, dist)
    def __init__(self,
                 embedding_spec,
                 name="GaussianMLPEmbedding",
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 mean_scale=1.,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parameterization='exp',
                 normalize=False,
                 mean_output_nonlinearity=None):
        """
        :param embedding_spec:
        :param hidden_sizes: list of sizes for the fully-connected hidden
          layers
        :param learn_std: Is std trainable?
        :param init_std: Inital std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
            for std
        :param min_std: whether to make sure that the std is at least some
            threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parameterization: how the std should be parameterized.
            There are a few options:
            -exp: the logarithm of the std will be stored, and applied an
                  exponential transformation
            -softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        assert isinstance(embedding_spec.latent_space, Box)
        StochasticEmbedding.__init__(self, embedding_spec)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        if mean_network or std_network:
            raise NotImplementedError

        self.name = name
        self._variable_scope = tf.variable_scope(
            self.name, reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # TODO: eliminate
        self._dist = DiagonalGaussian(self.latent_space.flat_dim)

        # Network parameters
        self._hidden_sizes = hidden_sizes
        self._learn_std = learn_std
        self._init_std = init_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._std_hidden_sizes = std_hidden_sizes
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._output_nonlinearity = output_nonlinearity
        self._mean_network = mean_network
        self._std_network = std_network
        self._std_parameterization = std_parameterization
        self._normalize = normalize
        self._mean_output_nonlinearity = mean_output_nonlinearity

        if self._normalize:
            latent_dim = self.latent_space.flat_dim
            self._max_std = np.sqrt(1.0 / latent_dim)
            self._init_std = self._max_std / 2.0

        # Tranform std arguments to parameterized space
        self._init_std_param = None
        self._min_std_param = None
        self._max_std_param = None
        if self._std_parameterization == 'exp':
            self._init_std_param = np.log(self._init_std)
            if self._min_std:
                self._min_std_param = np.log(self._min_std)
            if self._max_std:
                self._max_std_param = np.log(self._max_std)
        elif self._std_parameterization == 'softplus':
            self._init_std_param = np.log(np.exp(self._init_std) - 1)
            if self._min_std:
                self._min_std_param = np.log(np.exp(self._min_std) - 1)
            if self._max_std:
                self._max_std_param = np.log(np.exp(self._max_std) - 1)
        else:
            raise NotImplementedError

        self._mean_scale = mean_scale

        # Build default graph
        with self._name_scope:
            # inputs
            self._input = self.input_space.new_tensor_variable(
                name="input", extra_dims=1)

            with tf.name_scope("default", values=[self._input]):
                # network
                latent_var, mean_var, std_param_var, dist = self._build_graph(
                    self._input)

            # outputs
            self._latent = tf.identity(latent_var, name="latent")
            self._latent_mean = tf.identity(mean_var, name="latent_mean")
            self._latent_std_param = tf.identity(std_param_var,
                                                 "latent_std_param")
            self._latent_distribution = dist

            # compiled functions
            with tf.variable_scope("f_dist"):
                self._f_dist = tensor_utils.compile_function(
                    inputs=[self._input],
                    outputs=[
                        self._latent, self._latent_mean, self._latent_std_param
                    ],
                )
class GaussianMLPEmbedding(StochasticEmbedding, Parameterized, Serializable):
    def __init__(self,
                 embedding_spec,
                 name="GaussianMLPEmbedding",
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 mean_scale=1.,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parameterization='exp',
                 normalize=False,
                 mean_output_nonlinearity=None):
        """
        :param embedding_spec:
        :param hidden_sizes: list of sizes for the fully-connected hidden
          layers
        :param learn_std: Is std trainable?
        :param init_std: Inital std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
            for std
        :param min_std: whether to make sure that the std is at least some
            threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parameterization: how the std should be parameterized.
            There are a few options:
            -exp: the logarithm of the std will be stored, and applied an
                  exponential transformation
            -softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        assert isinstance(embedding_spec.latent_space, Box)
        StochasticEmbedding.__init__(self, embedding_spec)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        if mean_network or std_network:
            raise NotImplementedError

        self.name = name
        self._variable_scope = tf.variable_scope(
            self.name, reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # TODO: eliminate
        self._dist = DiagonalGaussian(self.latent_space.flat_dim)

        # Network parameters
        self._hidden_sizes = hidden_sizes
        self._learn_std = learn_std
        self._init_std = init_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._std_hidden_sizes = std_hidden_sizes
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._output_nonlinearity = output_nonlinearity
        self._mean_network = mean_network
        self._std_network = std_network
        self._std_parameterization = std_parameterization
        self._normalize = normalize
        self._mean_output_nonlinearity = mean_output_nonlinearity

        if self._normalize:
            latent_dim = self.latent_space.flat_dim
            self._max_std = np.sqrt(1.0 / latent_dim)
            self._init_std = self._max_std / 2.0

        # Tranform std arguments to parameterized space
        self._init_std_param = None
        self._min_std_param = None
        self._max_std_param = None
        if self._std_parameterization == 'exp':
            self._init_std_param = np.log(self._init_std)
            if self._min_std:
                self._min_std_param = np.log(self._min_std)
            if self._max_std:
                self._max_std_param = np.log(self._max_std)
        elif self._std_parameterization == 'softplus':
            self._init_std_param = np.log(np.exp(self._init_std) - 1)
            if self._min_std:
                self._min_std_param = np.log(np.exp(self._min_std) - 1)
            if self._max_std:
                self._max_std_param = np.log(np.exp(self._max_std) - 1)
        else:
            raise NotImplementedError

        self._mean_scale = mean_scale

        # Build default graph
        with self._name_scope:
            # inputs
            self._input = self.input_space.new_tensor_variable(
                name="input", extra_dims=1)

            with tf.name_scope("default", values=[self._input]):
                # network
                latent_var, mean_var, std_param_var, dist = self._build_graph(
                    self._input)

            # outputs
            self._latent = tf.identity(latent_var, name="latent")
            self._latent_mean = tf.identity(mean_var, name="latent_mean")
            self._latent_std_param = tf.identity(std_param_var,
                                                 "latent_std_param")
            self._latent_distribution = dist

            # compiled functions
            with tf.variable_scope("f_dist"):
                self._f_dist = tensor_utils.compile_function(
                    inputs=[self._input],
                    outputs=[
                        self._latent, self._latent_mean, self._latent_std_param
                    ],
                )

    @property
    def input(self):
        return self._input

    @property
    def latent(self):
        return self._latent

    @property
    def latent_mean(self):
        return self._latent_mean

    @property
    def latent_std_param(self):
        return self._latent_std_param

    @property
    def inputs(self):
        return self._input

    @property
    def outputs(self):
        return (self._latent, self._latent_mean, self._latent_std_param,
                self._latent_distribution)

    def _build_graph(self, from_input):
        latent_dim = self.latent_space.flat_dim
        small = 1e-5

        with self._variable_scope:
            with tf.variable_scope("dist_params"):
                if self._std_share_network:
                    # mean and std networks share an MLP
                    b = np.concatenate(
                        [
                            np.zeros(latent_dim),
                            np.full(latent_dim, self._init_std_param)
                        ],
                        axis=0)
                    b = tf.constant_initializer(b)
                    mean_std_network = mlp(
                        with_input=from_input,
                        output_dim=latent_dim * 2,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        # hidden_w_init=tf.orthogonal_initializer(1.0),
                        # output_w_init=tf.orthogonal_initializer(1.0),
                        output_b_init=b,
                        name="mean_std_network")
                    with tf.variable_scope("mean_network"):
                        mean_network = mean_std_network[..., :latent_dim]
                    with tf.variable_scope("std_network"):
                        std_network = mean_std_network[..., latent_dim:]
                else:
                    # separate MLPs for mean and std networks
                    # mean network
                    mean_network = mlp(
                        with_input=from_input,
                        output_dim=latent_dim,
                        hidden_sizes=self._hidden_sizes,
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        output_nonlinearity=self._output_nonlinearity,
                        name="mean_network")

                    # std network
                    if self._adaptive_std:
                        b = tf.constant_initializer(self._init_std_param)
                        std_network = mlp(
                            with_input=from_input,
                            output_dim=latent_dim,
                            hidden_sizes=self._std_hidden_sizes,
                            hidden_nonlinearity=self._std_hidden_nonlinearity,
                            output_nonlinearity=self._output_nonlinearity,
                            output_b_init=b,
                            name="std_network")
                    else:
                        p = tf.constant_initializer(self._init_std_param)
                        std_network = parameter(
                            with_input=from_input,
                            length=latent_dim,
                            initializer=p,
                            trainable=self._learn_std,
                            name="std_network")

                if self._mean_scale != 1.:
                    mean_var = tf.identity(mean_network * self._mean_scale,
                                           "mean_scale")
                else:
                    mean_var = mean_network

                if self._mean_output_nonlinearity is not None:
                    mean_var =self._mean_output_nonlinearity(mean_var)

                std_param_var = std_network

                with tf.variable_scope("std_limits"):
                    if self._min_std_param:
                        std_param_var = tf.maximum(std_param_var,
                                                   self._min_std_param)
                    if self._max_std_param:
                        std_param_var = tf.minimum(std_param_var,
                                                   self._max_std_param)

            with tf.variable_scope("std_parameterization"):
                # build std_var with std parameterization
                if self._std_parameterization == "exp":
                    std_var = tf.exp(std_param_var)
                elif self._std_parameterization == "softplus":
                    std_var = tf.log(1. + tf.exp(std_param_var))
                else:
                    raise NotImplementedError

            if self._normalize:
                mean_var = tf.nn.l2_normalize(mean_var)
                #std_var = tf.nn.l2_normalize(std_var)

            dist = tf.contrib.distributions.MultivariateNormalDiag(
                mean_var, std_var)

            latent_var = dist.sample(seed=ext.get_seed())

            return latent_var, mean_var, std_param_var, dist

    @overrides
    def get_params_internal(self, **tags):
        if tags.get("trainable"):
            params = [v for v in tf.trainable_variables(scope=self.name)]
        else:
            params = [v for v in tf.global_variables(scope=self.name)]

        return params

    @property
    def vectorized(self):
        return True

    def dist_info_sym(self, input_var, state_info_vars=None, name=None):
        with tf.name_scope(name, "dist_info_sym",
                           [input_var, state_info_vars]):
            _, mean, log_std, _ = self._build_graph(input_var)

            return dict(mean=mean, log_std=log_std)

    def latent_sym(self, input_var, name=None):
        with tf.name_scope(name, "latent_sym", [input_var]):
            latent, _, _, _ = self._build_graph(input_var)

            return latent

    @overrides
    def get_latent(self, an_input):
        # flat_in = self.input_space.flatten(an_input)
        # mean, log_std = [x[0] for x in self._f_dist([flat_in])]
        # rnd = np.random.normal(size=mean.shape)
        # latent = rnd * np.exp(log_std) + mean
        # return latent, dict(mean=mean, log_std=log_std)
        flat_in = self.input_space.flatten(an_input)
        latent, mean, log_std = [x[0] for x in self._f_dist([flat_in])]
        return latent, dict(mean=mean, log_std=log_std)

    def get_latents(self, inputs):
        # flat_in = self.input_space.flatten_n(inputs)
        # means, log_stds = self._f_dist(flat_in)
        # rnd = np.random.normal(size=means.shape)
        # latents = rnd * np.exp(log_stds) + means
        # return latents, dict(mean=means, log_std=log_stds)
        flat_in = self.input_space.flatten_n(inputs)
        latents, means, log_stds = self._f_dist(flat_in)
        return latents, dict(mean=means, log_std=log_stds)

    def get_reparam_latent_sym(self,
                               input_var,
                               latent_var,
                               old_dist_info_vars,
                               name=None):
        """
        Given inputs, old latent outputs, and a distribution of old latent
        outputs, return a symbolically reparameterized representation of the
        inputs in terms of the embedding parameters
        :param in_var:
        :param latent_var:
        :param old_dist_info_vars:
        :return:
        """
        with tf.name_scope(name, "get_reparam_latent_sym",
                           [input_var, latent_var, old_dist_info_vars]):
            new_dist_info_vars = self.dist_info_sym(input_var, latent_var)
            new_mean_var, new_log_std_var = new_dist_info_vars[
                "mean"], new_dist_info_vars["log_std"]
            old_mean_var, old_log_std_var = old_dist_info_vars[
                "mean"], old_dist_info_vars["log_std"]
            epsilon_var = (latent_var - old_mean_var) / (
                tf.exp(old_log_std_var) + 1e-8)
            new_latent_var = new_mean_var + epsilon_var * tf.exp(
                new_log_std_var)
        return new_latent_var

    def log_likelihood(self, an_input, latent):
        flat_in = self.input_space.flatten(an_input)
        _, mean, log_std = [x[0] for x in self._f_dist([flat_in])]
        return self._dist.log_likelihood(latent,
                                         dict(mean=mean, log_std=log_std))

    def log_likelihoods(self, inputs, latents):
        flat_in = self.input_space.flatten_n(inputs)
        _, means, log_stds = self._f_dist(flat_in)
        return self._dist.log_likelihood(latents,
                                         dict(mean=means, log_std=log_stds))

    def log_likelihood_sym(self, input_var, latent_var, name=None):
        with tf.name_scope(name, "log_likelihood_sym",
                           [input_var, latent_var]):
            # dist_info = self.dist_info_sym(input_var, latent_var)
            # means_var, log_stds_var = dist_info['mean'], dist_info['log_std']
            # return self._dist.log_likelihood_sym(
            #     latent_var, dict(mean=means_var, log_std=log_stds_var))
            _, _, _, dist = self._build_graph(input_var)
            return dist.log_prob(latent_var)

    def entropy_sym(self, input_var, name=None):
        with tf.name_scope(name, "entropy_sym", [input_var]):
            _, _, _, dist = self._build_graph(input_var)
            return dist.entropy()

    def entropy_sym_sampled(self, dist_info_vars, name=None):
        with tf.name_scope(name, "entropy_sym_sampled", [dist_info_vars]):
            return self._dist.entropy_sym(dist_info_vars)

    def log_diagnostics(self):
        log_stds = np.vstack(
            [path["agent_infos"]["log_std"] for path in paths])
        logger.record_tabular('AverageEmbeddingStd', np.mean(np.exp(log_stds)))
Пример #13
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 hidden_sizes,
                 hidden_nonlinearity=tf.nn.tanh,
                 output_nonlinearity=None,
                 name='GaussianConvRegressor',
                 mean_network=None,
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_conv_filters=[],
                 std_conv_filter_sizes=[],
                 std_conv_strides=[],
                 std_conv_pads=[],
                 std_hidden_sizes=[],
                 std_hidden_nonlinearity=None,
                 std_output_nonlinearity=None,
                 normalize_inputs=True,
                 normalize_outputs=True,
                 subsample_factor=1.,
                 optimizer=None,
                 optimizer_args=dict(),
                 use_trust_region=True,
                 max_kl_step=0.01):
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        self._mean_network_name = 'mean_network'
        self._std_network_name = 'std_network'

        with tf.compat.v1.variable_scope(name):
            if optimizer is None:
                if use_trust_region:
                    optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
                else:
                    optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            self._optimizer = optimizer
            self._subsample_factor = subsample_factor

            if mean_network is None:
                if std_share_network:
                    b = np.concatenate(
                        [
                            np.zeros(output_dim),
                            np.full(output_dim, np.log(init_std))
                        ],
                        axis=0)  # yapf: disable
                    b = tf.constant_initializer(b)
                    mean_network = ConvNetwork(
                        name=self._mean_network_name,
                        input_shape=input_shape,
                        output_dim=2 * output_dim,
                        conv_filters=conv_filters,
                        conv_filter_sizes=conv_filter_sizes,
                        conv_strides=conv_strides,
                        conv_pads=conv_pads,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity,
                        output_b_init=b)
                    l_mean = layers.SliceLayer(
                        mean_network.output_layer,
                        slice(output_dim),
                        name='mean_slice',
                    )
                else:
                    mean_network = ConvNetwork(
                        name=self._mean_network_name,
                        input_shape=input_shape,
                        output_dim=output_dim,
                        conv_filters=conv_filters,
                        conv_filter_sizes=conv_filter_sizes,
                        conv_strides=conv_strides,
                        conv_pads=conv_pads,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity)
                    l_mean = mean_network.output_layer

            if adaptive_std:
                l_log_std = ConvNetwork(
                    name=self._std_network_name,
                    input_shape=input_shape,
                    output_dim=output_dim,
                    conv_filters=std_conv_filters,
                    conv_filter_sizes=std_conv_filter_sizes,
                    conv_strides=std_conv_strides,
                    conv_pads=std_conv_pads,
                    hidden_sizes=std_hidden_sizes,
                    hidden_nonlinearity=std_hidden_nonlinearity,
                    output_nonlinearity=std_output_nonlinearity,
                    output_b_init=tf.constant_initializer(np.log(init_std)),
                ).output_layer
            elif std_share_network:
                l_log_std = layers.SliceLayer(
                    mean_network.output_layer,
                    slice(output_dim, 2 * output_dim),
                    name='log_std_slice',
                )
            else:
                l_log_std = layers.ParamLayer(
                    mean_network.input_layer,
                    num_units=output_dim,
                    param=tf.constant_initializer(np.log(init_std)),
                    trainable=learn_std,
                    name=self._std_network_name,
                )

            LayersPowered.__init__(self, [l_mean, l_log_std])

            xs_var = mean_network.input_layer.input_var
            ys_var = tf.compat.v1.placeholder(
                dtype=tf.float32, name='ys', shape=(None, output_dim))
            old_means_var = tf.compat.v1.placeholder(
                dtype=tf.float32, name='ys', shape=(None, output_dim))
            old_log_stds_var = tf.compat.v1.placeholder(
                dtype=tf.float32,
                name='old_log_stds',
                shape=(None, output_dim))

            x_mean_var = tf.Variable(
                np.zeros((1, np.prod(input_shape)), dtype=np.float32),
                name='x_mean',
            )
            x_std_var = tf.Variable(
                np.ones((1, np.prod(input_shape)), dtype=np.float32),
                name='x_std',
            )
            y_mean_var = tf.Variable(
                np.zeros((1, output_dim), dtype=np.float32),
                name='y_mean',
            )
            y_std_var = tf.Variable(
                np.ones((1, output_dim), dtype=np.float32),
                name='y_std',
            )

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var
            normalized_ys_var = (ys_var - y_mean_var) / y_std_var

            with tf.name_scope(
                    self._mean_network_name, values=[normalized_xs_var]):
                normalized_means_var = layers.get_output(
                    l_mean, {mean_network.input_layer: normalized_xs_var})
            with tf.name_scope(
                    self._std_network_name, values=[normalized_xs_var]):
                normalized_log_stds_var = layers.get_output(
                    l_log_std, {mean_network.input_layer: normalized_xs_var})

            means_var = normalized_means_var * y_std_var + y_mean_var
            log_stds_var = normalized_log_stds_var + tf.math.log(y_std_var)

            normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var
            normalized_old_log_stds_var = (
                old_log_stds_var - tf.math.log(y_std_var))

            dist = self._dist = DiagonalGaussian(output_dim)

            normalized_dist_info_vars = dict(
                mean=normalized_means_var, log_std=normalized_log_stds_var)

            mean_kl = tf.reduce_mean(
                dist.kl_sym(
                    dict(
                        mean=normalized_old_means_var,
                        log_std=normalized_old_log_stds_var),
                    normalized_dist_info_vars,
                ))

            loss = -tf.reduce_mean(
                dist.log_likelihood_sym(normalized_ys_var,
                                        normalized_dist_info_vars))

            self._f_predict = tensor_utils.compile_function([xs_var],
                                                            means_var)
            self._f_pdists = tensor_utils.compile_function(
                [xs_var], [means_var, log_stds_var])
            self._l_mean = l_mean
            self._l_log_std = l_log_std

            optimizer_args = dict(
                loss=loss,
                target=self,
                network_outputs=[
                    normalized_means_var, normalized_log_stds_var
                ],
            )

            if use_trust_region:
                optimizer_args['leq_constraint'] = (mean_kl, max_kl_step)
                optimizer_args['inputs'] = [
                    xs_var, ys_var, old_means_var, old_log_stds_var
                ]
            else:
                optimizer_args['inputs'] = [xs_var, ys_var]

            self._optimizer.update_opt(**optimizer_args)

            self._use_trust_region = use_trust_region
            self._name = name

            self._normalize_inputs = normalize_inputs
            self._normalize_outputs = normalize_outputs
            self._mean_network = mean_network
            self._x_mean_var = x_mean_var
            self._x_std_var = x_std_var
            self._y_mean_var = y_mean_var
            self._y_std_var = y_std_var
Пример #14
0
    def _build(self,
               state_input,
               step_input,
               hidden_input,
               cell_input,
               name=None):
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, step_cell,
                 hidden_init_var, cell_init_var) = lstm(
                     name='mean_std_network',
                     lstm_cell=self._mean_std_lstm_cell,
                     all_input_var=state_input,
                     step_input_var=step_input,
                     step_hidden_var=hidden_input,
                     step_cell_var=cell_input,
                     hidden_state_init=self._hidden_state_init,
                     hidden_state_init_trainable=self.
                     _hidden_state_init_trainable,
                     cell_state_init=self._cell_state_init,
                     cell_state_init_trainable=self._cell_state_init_trainable,
                     output_nonlinearity_layer=self.
                     _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, step_cell,
                 hidden_init_var, cell_init_var) = lstm(
                     name='mean_network',
                     lstm_cell=self._mean_lstm_cell,
                     all_input_var=state_input,
                     step_input_var=step_input,
                     step_hidden_var=hidden_input,
                     step_cell_var=cell_input,
                     hidden_state_init=self._hidden_state_init,
                     hidden_state_init_trainable=self.
                     _hidden_state_init_trainable,
                     cell_state_init=self._cell_state_init,
                     cell_state_init_trainable=self._cell_state_init_trainable,
                     output_nonlinearity_layer=self.
                     _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, step_cell, hidden_init_var, cell_init_var, dist)
Пример #15
0
    def _build(self, state_input):
        action_dim = self._output_dim

        with tf.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable
                b = tf.constant_initializer(b)
                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    output_b_init=b,
                    name='mean_std_network')
                with tf.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.variable_scope('std_network'):
                    std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    output_nonlinearity=self._output_nonlinearity,
                    name='mean_network')

                # std network
                if self._adaptive_std:
                    b = tf.constant_initializer(self._init_std_param)
                    std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_b_init=b,
                        name='std_network')
                else:
                    p = tf.constant_initializer(self._init_std_param)
                    std_network = parameter(state_input,
                                            length=action_dim,
                                            initializer=p,
                                            trainable=self._learn_std,
                                            name='std_network')

        mean_var = mean_network
        std_param_var = std_network

        with tf.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                std_param_var = std_param_var
            elif self._std_parameterization == 'softplus':
                std_param_var = tf.log(1. + tf.exp(std_param_var))
            else:
                raise NotImplementedError

        with tf.variable_scope('std_limits'):
            if self._min_std_param:
                std_var = tf.maximum(std_param_var, self._min_std_param)
            if self._max_std_param:
                std_var = tf.minimum(std_param_var, self._max_std_param)

        dist = DiagonalGaussian(action_dim)

        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:],
                               seed=deterministic.get_seed())
        action_var = rnd * tf.exp(std_var) + mean_var

        return action_var, mean_var, std_var, std_param_var, dist
Пример #16
0
def test_sample():
    gaussian = DiagonalGaussian(dim=2)
    dist = dict(mean=np.array([1, 1]), log_std=np.array([0, 0]))
    samples = [gaussian.sample(dist) for _ in range(10000)]
    assert np.isclose(np.mean(samples), 1, atol=0.1)
    assert np.isclose(np.var(samples), 1, atol=0.1)
Пример #17
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for state input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Mean.
            tf.Tensor: Parameterized log_std.
            tf.Tensor: log_std.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = DiagonalGaussian(self._output_dim)

        return mean_var, log_std_var, std_param, dist