def __init__(self, wrapped_env=None, wrapped_policy=None):
     self._wrapped_env = wrapped_env
     self._wrapped_policy = wrapped_policy
     self._last_obs = None
     assert isinstance(wrapped_policy, MultitaskPolicy)
     Serializable.quick_init(self, locals())
     Parameterized.__init__(self)
示例#2
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name='ContinuousMLPBaseline',
    ):
        """
        Constructor.

        :param env_spec: environment specification.
        :param subsample_factor:
        :param num_seq_inputs: number of sequence inputs.
        :param regressor_args: regressor arguments.
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(ContinuousMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = ContinuousMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name
示例#3
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name="GaussianMLPBaseline",
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        regressor_args=None,
    ):
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(GaussianConvBaseline, self).__init__(env_spec)

        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianConvRegressor(
            input_shape=env_spec.observation_space.shape,
            output_dim=1,
            name="GaussianConvBaseline",
            **regressor_args)
    def __init__(self, embedding_spec, name="OneHotEmbedding"):
        """
        :param embedding_spec:
        :return:
        """
        assert isinstance(embedding_spec.latent_space, Box)
        assert (embedding_spec.input_space.flat_dim <=
                embedding_spec.latent_space.flat_dim)
        StochasticEmbedding.__init__(self, embedding_spec)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        self.name = name
        self._variable_scope = tf.variable_scope(self.name,
                                                 reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # Build default graph
        with self._name_scope:
            # inputs
            self._input = self.input_space.new_tensor_variable(name="input",
                                                               extra_dims=1)

            with tf.name_scope("default", values=[self._input]):
                # network
                latent_var, mean_var, std_param_var, dist = self._build_graph(
                    self._input)

            # outputs
            self._latent = tf.identity(latent_var, name="latent")
            self._latent_mean = tf.identity(mean_var, name="latent_mean")
            self._latent_std_param = tf.identity(std_param_var,
                                                 "latent_std_param")
            self._latent_distribution = dist

            # compiled functions
            with tf.variable_scope("f_dist"):
                self._f_dist = tensor_utils.compile_function(
                    inputs=[self._input],
                    outputs=[
                        self._latent, self._latent_mean, self._latent_std_param
                    ],
                )
    def __init__(self, wrapped_env=None, wrapped_policy=None):
        assert isinstance(wrapped_policy, MultitaskPolicy)
        Serializable.quick_init(self, locals())
        Parameterized.__init__(self)

        self._wrapped_env = wrapped_env
        self._wrapped_policy = wrapped_policy
        self._last_obs = None

        n_task = self._wrapped_policy.task_space.flat_dim
        one_hots = np.identity(n_task)
        latents, infos = self._wrapped_policy._embedding.get_latents(one_hots)
        latents_means = infos["mean"]
        self._latents_combination_hash = list()
        for i in range(n_task):
            for j in range(i+1, n_task):
                self._latents_combination_hash.append((latents_means[i, ...], latents_means[j, ...]))

        self._latents_combination_hash = tuple(self._latents_combination_hash)
        self._n_skills = n_task
示例#7
0
文件: base.py 项目: JoleProject/Jole
 def __init__(self, input_shape, output_dim, name):
     Parameterized.__init__(self)
     self._input_shape = input_shape
     self._output_dim = output_dim
     self._name = name
     self._variable_scope = None
示例#8
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 name="GaussianMLPRegressor",
                 mean_network=None,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.tanh,
                 optimizer=None,
                 optimizer_args=None,
                 use_trust_region=True,
                 max_kl_step=0.01,
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 std_nonlinearity=None,
                 normalize_inputs=True,
                 normalize_outputs=True,
                 subsample_factor=1.0):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean
         network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
         mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param max_kl_step: KL divergence constraint for each iteration
        :param learn_std: Whether to learn the standard deviations. Only
         effective if adaptive_std is False. If adaptive_std is True, this
         parameter is ignored, and the weights for the std network are always
         earned.
        :param adaptive_std: Whether to make the std a function of the states.
        :param std_share_network: Whether to use the same network as the mean.
        :param std_hidden_sizes: Number of hidden units of each layer of the
         std network. Only used if `std_share_network` is False. It defaults to
         the same architecture as the mean.
        :param std_nonlinearity: Non-linearity used for each layer of the std
         network. Only used if `std_share_network` is False. It defaults to the
         same non-linearity as the mean.
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        self._mean_network_name = "mean_network"
        self._std_network_name = "std_network"

        with tf.variable_scope(name):
            if optimizer_args is None:
                optimizer_args = dict()

            if optimizer is None:
                if use_trust_region:
                    optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
                else:
                    optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            self._optimizer = optimizer
            self._subsample_factor = subsample_factor

            if mean_network is None:
                if std_share_network:
                    mean_network = MLP(
                        name="mean_network",
                        input_shape=input_shape,
                        output_dim=2 * output_dim,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=None,
                    )
                    l_mean = L.SliceLayer(
                        mean_network.output_layer,
                        slice(output_dim),
                        name="mean_slice",
                    )
                else:
                    mean_network = MLP(
                        name="mean_network",
                        input_shape=input_shape,
                        output_dim=output_dim,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=None,
                    )
                    l_mean = mean_network.output_layer

            if adaptive_std:
                l_log_std = MLP(
                    name="log_std_network",
                    input_shape=input_shape,
                    input_var=mean_network.input_layer.input_var,
                    output_dim=output_dim,
                    hidden_sizes=std_hidden_sizes,
                    hidden_nonlinearity=std_nonlinearity,
                    output_nonlinearity=None,
                ).output_layer
            elif std_share_network:
                l_log_std = L.SliceLayer(
                    mean_network.output_layer,
                    slice(output_dim, 2 * output_dim),
                    name="log_std_slice",
                )
            else:
                l_log_std = L.ParamLayer(
                    mean_network.input_layer,
                    num_units=output_dim,
                    param=tf.constant_initializer(np.log(init_std)),
                    name="output_log_std",
                    trainable=learn_std,
                )

            LayersPowered.__init__(self, [l_mean, l_log_std])

            xs_var = mean_network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32,
                                    name="ys",
                                    shape=(None, output_dim))
            old_means_var = tf.placeholder(dtype=tf.float32,
                                           name="ys",
                                           shape=(None, output_dim))
            old_log_stds_var = tf.placeholder(dtype=tf.float32,
                                              name="old_log_stds",
                                              shape=(None, output_dim))

            x_mean_var = tf.Variable(
                np.zeros((1, ) + input_shape, dtype=np.float32),
                name="x_mean",
            )
            x_std_var = tf.Variable(
                np.ones((1, ) + input_shape, dtype=np.float32),
                name="x_std",
            )
            y_mean_var = tf.Variable(
                np.zeros((1, output_dim), dtype=np.float32),
                name="y_mean",
            )
            y_std_var = tf.Variable(
                np.ones((1, output_dim), dtype=np.float32),
                name="y_std",
            )

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var
            normalized_ys_var = (ys_var - y_mean_var) / y_std_var

            with tf.name_scope(self._mean_network_name,
                               values=[normalized_xs_var]):
                normalized_means_var = L.get_output(
                    l_mean, {mean_network.input_layer: normalized_xs_var})
            with tf.name_scope(self._std_network_name,
                               values=[normalized_xs_var]):
                normalized_log_stds_var = L.get_output(
                    l_log_std, {mean_network.input_layer: normalized_xs_var})

            means_var = normalized_means_var * y_std_var + y_mean_var
            log_stds_var = normalized_log_stds_var + tf.log(y_std_var)

            normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var
            normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var)

            dist = self._dist = DiagonalGaussian(output_dim)

            normalized_dist_info_vars = dict(mean=normalized_means_var,
                                             log_std=normalized_log_stds_var)

            mean_kl = tf.reduce_mean(
                dist.kl_sym(
                    dict(mean=normalized_old_means_var,
                         log_std=normalized_old_log_stds_var),
                    normalized_dist_info_vars,
                ))

            loss = -tf.reduce_mean(
                dist.log_likelihood_sym(normalized_ys_var,
                                        normalized_dist_info_vars))

            self._f_predict = tensor_utils.compile_function([xs_var],
                                                            means_var)
            self._f_pdists = tensor_utils.compile_function(
                [xs_var], [means_var, log_stds_var])
            self._l_mean = l_mean
            self._l_log_std = l_log_std

            optimizer_args = dict(
                loss=loss,
                target=self,
                network_outputs=[
                    normalized_means_var, normalized_log_stds_var
                ],
            )

            if use_trust_region:
                optimizer_args["leq_constraint"] = (mean_kl, max_kl_step)
                optimizer_args["inputs"] = [
                    xs_var, ys_var, old_means_var, old_log_stds_var
                ]
            else:
                optimizer_args["inputs"] = [xs_var, ys_var]

            self._optimizer.update_opt(**optimizer_args)

            self._use_trust_region = use_trust_region
            self._name = name

            self._normalize_inputs = normalize_inputs
            self._normalize_outputs = normalize_outputs
            self._mean_network = mean_network
            self._x_mean_var = x_mean_var
            self._x_std_var = x_std_var
            self._y_mean_var = y_mean_var
            self._y_std_var = y_std_var

            # Optionally create assign operations for normalization
            if self._normalize_inputs:
                self._x_mean_var_ph = tf.placeholder(
                    shape=(1, ) + input_shape,
                    dtype=tf.float32,
                )
                self._x_std_var_ph = tf.placeholder(
                    shape=(1, ) + input_shape,
                    dtype=tf.float32,
                )
                self._assign_x_mean = tf.assign(self._x_mean_var,
                                                self._x_mean_var_ph)
                self._assign_x_std = tf.assign(self._x_std_var,
                                               self._x_std_var_ph)
            if self._normalize_outputs:
                self._y_mean_var_ph = tf.placeholder(
                    shape=(1, output_dim),
                    dtype=tf.float32,
                )
                self._y_std_var_ph = tf.placeholder(
                    shape=(1, output_dim),
                    dtype=tf.float32,
                )
                self._assign_y_mean = tf.assign(self._y_mean_var,
                                                self._y_mean_var_ph)
                self._assign_y_std = tf.assign(self._y_std_var,
                                               self._y_std_var_ph)
    def __init__(
        self,
        input_shape,
        output_dim,
        name='CategoricalMLPRegressor',
        prob_network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        optimizer=None,
        tr_optimizer=None,
        use_trust_region=True,
        max_kl_step=0.01,
        normalize_inputs=True,
        no_initial_trust_region=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean
        network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
        mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param max_kl_step: KL divergence constraint for each iteration
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        with tf.compat.v1.variable_scope(name, 'CategoricalMLPRegressor'):
            if optimizer is None:
                optimizer = LbfgsOptimizer()
            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer()

            self.output_dim = output_dim
            self.optimizer = optimizer
            self.tr_optimizer = tr_optimizer

            self._prob_network_name = 'prob_network'
            if prob_network is None:
                prob_network = MLP(input_shape=input_shape,
                                   output_dim=output_dim,
                                   hidden_sizes=hidden_sizes,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   output_nonlinearity=tf.nn.softmax,
                                   name=self._prob_network_name)

            l_prob = prob_network.output_layer

            LayersPowered.__init__(self, [l_prob])

            xs_var = prob_network.input_layer.input_var
            ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              shape=[None, output_dim],
                                              name='ys')
            old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                                    shape=[None, output_dim],
                                                    name='old_prob')

            x_mean_var = tf.compat.v1.get_variable(
                name='x_mean',
                shape=(1, ) + input_shape,
                initializer=tf.constant_initializer(0., dtype=tf.float32))
            x_std_var = tf.compat.v1.get_variable(
                name='x_std',
                shape=(1, ) + input_shape,
                initializer=tf.constant_initializer(1., dtype=tf.float32))

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            with tf.name_scope(self._prob_network_name,
                               values=[normalized_xs_var]):
                prob_var = L.get_output(
                    l_prob, {prob_network.input_layer: normalized_xs_var})

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=prob_var)

            dist = self._dist = Categorical(output_dim)

            mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))

            loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tf.one_hot(tf.argmax(prob_var, axis=1),
                                   depth=output_dim)

            self.prob_network = prob_network
            self.f_predict = tensor_utils.compile_function([xs_var], predicted)
            self.f_prob = tensor_utils.compile_function([xs_var], prob_var)
            self.l_prob = l_prob

            self.optimizer.update_opt(loss=loss,
                                      target=self,
                                      network_outputs=[prob_var],
                                      inputs=[xs_var, ys_var])
            self.tr_optimizer.update_opt(loss=loss,
                                         target=self,
                                         network_outputs=[prob_var],
                                         inputs=[xs_var, ys_var, old_prob_var],
                                         leq_constraint=(mean_kl, max_kl_step))

            self.use_trust_region = use_trust_region
            self.name = name

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
            self.first_optimized = not no_initial_trust_region
示例#10
0
 def __init__(self, output_layers, input_layers=None):
     self._output_layers = output_layers
     self._input_layers = input_layers
     Parameterized.__init__(self)
示例#11
0
    def __init__(
        self,
        input_shape,
        output_dim,
        name="BernoulliMLPRegressor",
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.relu,
        optimizer=None,
        tr_optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        normalize_inputs=True,
        no_initial_trust_region=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean
        network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
        mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):

            if optimizer is None:
                optimizer = LbfgsOptimizer()
            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer()

            self.output_dim = output_dim
            self.optimizer = optimizer
            self.tr_optimizer = tr_optimizer

            p_network = MLP(input_shape=input_shape,
                            output_dim=output_dim,
                            hidden_sizes=hidden_sizes,
                            hidden_nonlinearity=hidden_nonlinearity,
                            output_nonlinearity=tf.nn.sigmoid,
                            name="p_network")

            l_p = p_network.output_layer

            LayersPowered.__init__(self, [l_p])

            xs_var = p_network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32,
                                    shape=(None, output_dim),
                                    name="ys")
            old_p_var = tf.placeholder(dtype=tf.float32,
                                       shape=(None, output_dim),
                                       name="old_p")

            x_mean_var = tf.get_variable(name="x_mean",
                                         initializer=tf.zeros_initializer(),
                                         shape=(1, ) + input_shape)
            x_std_var = tf.get_variable(name="x_std",
                                        initializer=tf.ones_initializer(),
                                        shape=(1, ) + input_shape)

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            p_var = L.get_output(l_p,
                                 {p_network.input_layer: normalized_xs_var})

            old_info_vars = dict(p=old_p_var)
            info_vars = dict(p=p_var)

            dist = self._dist = Bernoulli(output_dim)

            mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))

            loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))

            predicted = p_var >= 0.5

            self.f_predict = tensor_utils.compile_function([xs_var], predicted)
            self.f_p = tensor_utils.compile_function([xs_var], p_var)
            self.l_p = l_p

            self.optimizer.update_opt(loss=loss,
                                      target=self,
                                      network_outputs=[p_var],
                                      inputs=[xs_var, ys_var])
            self.tr_optimizer.update_opt(loss=loss,
                                         target=self,
                                         network_outputs=[p_var],
                                         inputs=[xs_var, ys_var, old_p_var],
                                         leq_constraint=(mean_kl, step_size))

            self.use_trust_region = use_trust_region
            self.name = name

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
            self.first_optimized = not no_initial_trust_region
示例#12
0
    def __init__(
        self,
        input_shape,
        output_dim,
        name="DeterministicMLPRegressor",
        network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=tf.nn.tanh,
        output_nonlinearity=None,
        optimizer=None,
        optimizer_args=None,
        normalize_inputs=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the
        mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the
        mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name, "DeterministicMLPRegressor"):
            if optimizer_args is None:
                optimizer_args = dict()

            if optimizer is None:
                optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            self.output_dim = output_dim
            self.optimizer = optimizer

            self._network_name = "network"
            if network is None:
                network = MLP(input_shape=input_shape,
                              output_dim=output_dim,
                              hidden_sizes=hidden_sizes,
                              hidden_nonlinearity=hidden_nonlinearity,
                              output_nonlinearity=output_nonlinearity,
                              name=self._network_name)

            l_out = network.output_layer

            LayersPowered.__init__(self, [l_out])

            xs_var = network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32,
                                    shape=[None, output_dim],
                                    name="ys")

            x_mean_var = tf.get_variable(name="x_mean",
                                         shape=(1, ) + input_shape,
                                         initializer=tf.constant_initializer(
                                             0., dtype=tf.float32))
            x_std_var = tf.get_variable(name="x_std",
                                        shape=(1, ) + input_shape,
                                        initializer=tf.constant_initializer(
                                            1., dtype=tf.float32))

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            with tf.name_scope(self._network_name, values=[normalized_xs_var]):
                fit_ys_var = L.get_output(
                    l_out, {network.input_layer: normalized_xs_var})

            loss = -tf.reduce_mean(tf.square(fit_ys_var - ys_var))

            self.f_predict = tensor_utils.compile_function([xs_var],
                                                           fit_ys_var)

            optimizer_args = dict(
                loss=loss,
                target=self,
                network_outputs=[fit_ys_var],
            )

            optimizer_args["inputs"] = [xs_var, ys_var]

            self.optimizer.update_opt(**optimizer_args)

            self.name = name
            self.l_out = l_out

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
    def __init__(self,
                 env_spec,
                 embedding,
                 task_space,
                 name="GaussianMLPMultitaskPolicy",
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parameterization='exp'):
        """
        :param env_spec: observation space is a concatenation of task space and
          vanilla env observation space
        :param hidden_sizes: list of sizes for the fully-connected hidden
          layers
        :param learn_std: Is std trainable
        :param init_std: Initial std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
          for std
        :param min_std: whether to make sure that the std is at least some
          threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parametrization: how the std should be parametrized. There
          are a few options:
            - exp: the logarithm of the std will be stored, and applied a
                exponential transformation
            - softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        assert isinstance(env_spec.action_space, Box)
        StochasticMultitaskPolicy.__init__(self, env_spec, embedding,
                                           task_space)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        if mean_network or std_network:
            raise NotImplementedError

        self.name = name
        self._variable_scope = tf.variable_scope(self.name,
                                                 reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # TODO: eliminate
        self._dist = DiagonalGaussian(self.action_space.flat_dim)

        # Network parameters
        self._hidden_sizes = hidden_sizes
        self._learn_std = learn_std
        self._init_std = init_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._std_hidden_sizes = std_hidden_sizes
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._output_nonlinearity = output_nonlinearity
        self._mean_network = mean_network
        self._std_network = std_network
        self._std_parameterization = std_parameterization

        # Tranform std arguments to parameterized space
        self._init_std_param = None
        self._min_std_param = None
        self._max_std_param = None
        if self._std_parameterization == 'exp':
            self._init_std_param = np.log(init_std)
            if min_std:
                self._min_std_param = np.log(min_std)
            if max_std:
                self._max_std_param = np.log(max_std)
        elif self._std_parameterization == 'softplus':
            self._init_std_param = np.log(np.exp(init_std) - 1)
            if min_std:
                self._min_std_param = np.log(np.exp(min_std) - 1)
            if max_std:
                self._max_std_param = np.log(np.exp(max_std) - 1)
        else:
            raise NotImplementedError

        # Build default graph
        with self._name_scope:
            # inputs
            self._task_input = self._embedding._input
            self._latent_input = self.latent_space.new_tensor_variable(
                name="latent_input", extra_dims=1)
            self._obs_input = self.observation_space.new_tensor_variable(
                name="obs_input", extra_dims=1)

            with tf.name_scope("default",
                               values=[self._task_input, self._obs_input]):
                # network (connect with embedding)
                latent = self._embedding.latent
                latent_mean = self._embedding.latent_mean
                latent_std_param = self._embedding.latent_std_param
                action_var, mean_var, std_param_var, dist = self._build_graph(
                    latent, self._obs_input)

                # outputs
                self._action = tf.identity(action_var, name="action")
                self._action_mean = tf.identity(mean_var, name="action_mean")
                self._action_std_param = tf.identity(std_param_var,
                                                     "action_std_param")
                self._action_distribution = dist

            # special auxiliary graph for feedforward using only latents
            with tf.name_scope("from_latent",
                               values=[self._latent_input, self._obs_input]):
                action_var, mean_var, std_param_var, dist = self._build_graph(
                    self._latent_input, self._obs_input)

                # auxiliary outputs
                self._action_from_latent = action_var
                self._action_mean_from_latent = mean_var
                self._action_std_param_from_latent = std_param_var
                self._action_distribution_from_latent = dist

            # compiled functions
            with tf.variable_scope("f_dist_task_obs"):
                self.f_dist_task_obs = tensor_utils.compile_function(
                    inputs=[self._task_input, self._obs_input],
                    outputs=[
                        self._action, self._action_mean,
                        self._action_std_param, latent, latent_mean,
                        latent_std_param
                    ],
                )
            with tf.variable_scope("f_dist_latent_obs"):
                self.f_dist_latent_obs = tensor_utils.compile_function(
                    inputs=[self._latent_input, self._obs_input],
                    outputs=[
                        self._action_from_latent,
                        self._action_mean_from_latent,
                        self._action_std_param_from_latent
                    ],
                )
示例#14
0
 def __init__(self, embedding_spec):
     Parameterized.__init__(self)
     self._embedding_spec = embedding_spec
    def __init__(self,
                 embedding_spec,
                 name="GaussianMLPEmbedding",
                 hidden_sizes=(32, 32),
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_hidden_sizes=(32, 32),
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_nonlinearity=tf.nn.tanh,
                 hidden_nonlinearity=tf.nn.tanh,
                 mean_scale=1.,
                 output_nonlinearity=None,
                 mean_network=None,
                 std_network=None,
                 std_parameterization='exp',
                 normalize=False,
                 mean_output_nonlinearity=None):
        """
        :param embedding_spec:
        :param hidden_sizes: list of sizes for the fully-connected hidden
          layers
        :param learn_std: Is std trainable?
        :param init_std: Inital std
        :param adaptive_std:
        :param std_share_network:
        :param std_hidden_sizes: list of sizes for the fully-connected layers
            for std
        :param min_std: whether to make sure that the std is at least some
            threshold value, to avoid numerical issues
        :param std_hidden_nonlinearity:
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param output_nonlinearity: nonlinearity for the output layer
        :param mean_network: custom network for the output mean
        :param std_network: custom network for the output log std
        :param std_parameterization: how the std should be parameterized.
            There are a few options:
            -exp: the logarithm of the std will be stored, and applied an
                  exponential transformation
            -softplus: the std will be computed as log(1+exp(x))
        :return:
        """
        assert isinstance(embedding_spec.latent_space, Box)
        StochasticEmbedding.__init__(self, embedding_spec)
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        if mean_network or std_network:
            raise NotImplementedError

        self.name = name
        self._variable_scope = tf.variable_scope(
            self.name, reuse=tf.AUTO_REUSE)
        self._name_scope = tf.name_scope(self.name)

        # TODO: eliminate
        self._dist = DiagonalGaussian(self.latent_space.flat_dim)

        # Network parameters
        self._hidden_sizes = hidden_sizes
        self._learn_std = learn_std
        self._init_std = init_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._std_hidden_sizes = std_hidden_sizes
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._output_nonlinearity = output_nonlinearity
        self._mean_network = mean_network
        self._std_network = std_network
        self._std_parameterization = std_parameterization
        self._normalize = normalize
        self._mean_output_nonlinearity = mean_output_nonlinearity

        if self._normalize:
            latent_dim = self.latent_space.flat_dim
            self._max_std = np.sqrt(1.0 / latent_dim)
            self._init_std = self._max_std / 2.0

        # Tranform std arguments to parameterized space
        self._init_std_param = None
        self._min_std_param = None
        self._max_std_param = None
        if self._std_parameterization == 'exp':
            self._init_std_param = np.log(self._init_std)
            if self._min_std:
                self._min_std_param = np.log(self._min_std)
            if self._max_std:
                self._max_std_param = np.log(self._max_std)
        elif self._std_parameterization == 'softplus':
            self._init_std_param = np.log(np.exp(self._init_std) - 1)
            if self._min_std:
                self._min_std_param = np.log(np.exp(self._min_std) - 1)
            if self._max_std:
                self._max_std_param = np.log(np.exp(self._max_std) - 1)
        else:
            raise NotImplementedError

        self._mean_scale = mean_scale

        # Build default graph
        with self._name_scope:
            # inputs
            self._input = self.input_space.new_tensor_variable(
                name="input", extra_dims=1)

            with tf.name_scope("default", values=[self._input]):
                # network
                latent_var, mean_var, std_param_var, dist = self._build_graph(
                    self._input)

            # outputs
            self._latent = tf.identity(latent_var, name="latent")
            self._latent_mean = tf.identity(mean_var, name="latent_mean")
            self._latent_std_param = tf.identity(std_param_var,
                                                 "latent_std_param")
            self._latent_distribution = dist

            # compiled functions
            with tf.variable_scope("f_dist"):
                self._f_dist = tensor_utils.compile_function(
                    inputs=[self._input],
                    outputs=[
                        self._latent, self._latent_mean, self._latent_std_param
                    ],
                )
示例#16
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 conv_filters,
                 conv_filter_sizes,
                 conv_strides,
                 conv_pads,
                 hidden_sizes,
                 hidden_nonlinearity=tf.nn.tanh,
                 output_nonlinearity=None,
                 name='GaussianConvRegressor',
                 mean_network=None,
                 learn_std=True,
                 init_std=1.0,
                 adaptive_std=False,
                 std_share_network=False,
                 std_conv_filters=[],
                 std_conv_filter_sizes=[],
                 std_conv_strides=[],
                 std_conv_pads=[],
                 std_hidden_sizes=[],
                 std_hidden_nonlinearity=None,
                 std_output_nonlinearity=None,
                 normalize_inputs=True,
                 normalize_outputs=True,
                 subsample_factor=1.,
                 optimizer=None,
                 optimizer_args=dict(),
                 use_trust_region=True,
                 max_kl_step=0.01):
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        self._mean_network_name = 'mean_network'
        self._std_network_name = 'std_network'

        with tf.compat.v1.variable_scope(name):
            if optimizer is None:
                if use_trust_region:
                    optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
                else:
                    optimizer = LbfgsOptimizer(**optimizer_args)
            else:
                optimizer = optimizer(**optimizer_args)

            self._optimizer = optimizer
            self._subsample_factor = subsample_factor

            if mean_network is None:
                if std_share_network:
                    b = np.concatenate(
                        [
                            np.zeros(output_dim),
                            np.full(output_dim, np.log(init_std))
                        ],
                        axis=0)  # yapf: disable
                    b = tf.constant_initializer(b)
                    mean_network = ConvNetwork(
                        name=self._mean_network_name,
                        input_shape=input_shape,
                        output_dim=2 * output_dim,
                        conv_filters=conv_filters,
                        conv_filter_sizes=conv_filter_sizes,
                        conv_strides=conv_strides,
                        conv_pads=conv_pads,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity,
                        output_b_init=b)
                    l_mean = layers.SliceLayer(
                        mean_network.output_layer,
                        slice(output_dim),
                        name='mean_slice',
                    )
                else:
                    mean_network = ConvNetwork(
                        name=self._mean_network_name,
                        input_shape=input_shape,
                        output_dim=output_dim,
                        conv_filters=conv_filters,
                        conv_filter_sizes=conv_filter_sizes,
                        conv_strides=conv_strides,
                        conv_pads=conv_pads,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity)
                    l_mean = mean_network.output_layer

            if adaptive_std:
                l_log_std = ConvNetwork(
                    name=self._std_network_name,
                    input_shape=input_shape,
                    output_dim=output_dim,
                    conv_filters=std_conv_filters,
                    conv_filter_sizes=std_conv_filter_sizes,
                    conv_strides=std_conv_strides,
                    conv_pads=std_conv_pads,
                    hidden_sizes=std_hidden_sizes,
                    hidden_nonlinearity=std_hidden_nonlinearity,
                    output_nonlinearity=std_output_nonlinearity,
                    output_b_init=tf.constant_initializer(np.log(init_std)),
                ).output_layer
            elif std_share_network:
                l_log_std = layers.SliceLayer(
                    mean_network.output_layer,
                    slice(output_dim, 2 * output_dim),
                    name='log_std_slice',
                )
            else:
                l_log_std = layers.ParamLayer(
                    mean_network.input_layer,
                    num_units=output_dim,
                    param=tf.constant_initializer(np.log(init_std)),
                    trainable=learn_std,
                    name=self._std_network_name,
                )

            LayersPowered.__init__(self, [l_mean, l_log_std])

            xs_var = mean_network.input_layer.input_var
            ys_var = tf.compat.v1.placeholder(
                dtype=tf.float32, name='ys', shape=(None, output_dim))
            old_means_var = tf.compat.v1.placeholder(
                dtype=tf.float32, name='ys', shape=(None, output_dim))
            old_log_stds_var = tf.compat.v1.placeholder(
                dtype=tf.float32,
                name='old_log_stds',
                shape=(None, output_dim))

            x_mean_var = tf.Variable(
                np.zeros((1, np.prod(input_shape)), dtype=np.float32),
                name='x_mean',
            )
            x_std_var = tf.Variable(
                np.ones((1, np.prod(input_shape)), dtype=np.float32),
                name='x_std',
            )
            y_mean_var = tf.Variable(
                np.zeros((1, output_dim), dtype=np.float32),
                name='y_mean',
            )
            y_std_var = tf.Variable(
                np.ones((1, output_dim), dtype=np.float32),
                name='y_std',
            )

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var
            normalized_ys_var = (ys_var - y_mean_var) / y_std_var

            with tf.name_scope(
                    self._mean_network_name, values=[normalized_xs_var]):
                normalized_means_var = layers.get_output(
                    l_mean, {mean_network.input_layer: normalized_xs_var})
            with tf.name_scope(
                    self._std_network_name, values=[normalized_xs_var]):
                normalized_log_stds_var = layers.get_output(
                    l_log_std, {mean_network.input_layer: normalized_xs_var})

            means_var = normalized_means_var * y_std_var + y_mean_var
            log_stds_var = normalized_log_stds_var + tf.math.log(y_std_var)

            normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var
            normalized_old_log_stds_var = (
                old_log_stds_var - tf.math.log(y_std_var))

            dist = self._dist = DiagonalGaussian(output_dim)

            normalized_dist_info_vars = dict(
                mean=normalized_means_var, log_std=normalized_log_stds_var)

            mean_kl = tf.reduce_mean(
                dist.kl_sym(
                    dict(
                        mean=normalized_old_means_var,
                        log_std=normalized_old_log_stds_var),
                    normalized_dist_info_vars,
                ))

            loss = -tf.reduce_mean(
                dist.log_likelihood_sym(normalized_ys_var,
                                        normalized_dist_info_vars))

            self._f_predict = tensor_utils.compile_function([xs_var],
                                                            means_var)
            self._f_pdists = tensor_utils.compile_function(
                [xs_var], [means_var, log_stds_var])
            self._l_mean = l_mean
            self._l_log_std = l_log_std

            optimizer_args = dict(
                loss=loss,
                target=self,
                network_outputs=[
                    normalized_means_var, normalized_log_stds_var
                ],
            )

            if use_trust_region:
                optimizer_args['leq_constraint'] = (mean_kl, max_kl_step)
                optimizer_args['inputs'] = [
                    xs_var, ys_var, old_means_var, old_log_stds_var
                ]
            else:
                optimizer_args['inputs'] = [xs_var, ys_var]

            self._optimizer.update_opt(**optimizer_args)

            self._use_trust_region = use_trust_region
            self._name = name

            self._normalize_inputs = normalize_inputs
            self._normalize_outputs = normalize_outputs
            self._mean_network = mean_network
            self._x_mean_var = x_mean_var
            self._x_std_var = x_std_var
            self._y_mean_var = y_mean_var
            self._y_std_var = y_std_var
示例#17
0
 def __init__(self, env_spec):
     Parameterized.__init__(self)
     self._env_spec = env_spec