def __init__(self, wrapped_env=None, wrapped_policy=None): self._wrapped_env = wrapped_env self._wrapped_policy = wrapped_policy self._last_obs = None assert isinstance(wrapped_policy, MultitaskPolicy) Serializable.quick_init(self, locals()) Parameterized.__init__(self)
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='ContinuousMLPBaseline', ): """ Constructor. :param env_spec: environment specification. :param subsample_factor: :param num_seq_inputs: number of sequence inputs. :param regressor_args: regressor arguments. """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) super(ContinuousMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = ContinuousMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name="GaussianMLPBaseline", ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) super(GaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name
def __init__( self, env_spec, subsample_factor=1., regressor_args=None, ): Parameterized.__init__(self) Serializable.quick_init(self, locals()) super(GaussianConvBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianConvRegressor( input_shape=env_spec.observation_space.shape, output_dim=1, name="GaussianConvBaseline", **regressor_args)
def __init__(self, embedding_spec, name="OneHotEmbedding"): """ :param embedding_spec: :return: """ assert isinstance(embedding_spec.latent_space, Box) assert (embedding_spec.input_space.flat_dim <= embedding_spec.latent_space.flat_dim) StochasticEmbedding.__init__(self, embedding_spec) Parameterized.__init__(self) Serializable.quick_init(self, locals()) self.name = name self._variable_scope = tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # Build default graph with self._name_scope: # inputs self._input = self.input_space.new_tensor_variable(name="input", extra_dims=1) with tf.name_scope("default", values=[self._input]): # network latent_var, mean_var, std_param_var, dist = self._build_graph( self._input) # outputs self._latent = tf.identity(latent_var, name="latent") self._latent_mean = tf.identity(mean_var, name="latent_mean") self._latent_std_param = tf.identity(std_param_var, "latent_std_param") self._latent_distribution = dist # compiled functions with tf.variable_scope("f_dist"): self._f_dist = tensor_utils.compile_function( inputs=[self._input], outputs=[ self._latent, self._latent_mean, self._latent_std_param ], )
def __init__(self, wrapped_env=None, wrapped_policy=None): assert isinstance(wrapped_policy, MultitaskPolicy) Serializable.quick_init(self, locals()) Parameterized.__init__(self) self._wrapped_env = wrapped_env self._wrapped_policy = wrapped_policy self._last_obs = None n_task = self._wrapped_policy.task_space.flat_dim one_hots = np.identity(n_task) latents, infos = self._wrapped_policy._embedding.get_latents(one_hots) latents_means = infos["mean"] self._latents_combination_hash = list() for i in range(n_task): for j in range(i+1, n_task): self._latents_combination_hash.append((latents_means[i, ...], latents_means[j, ...])) self._latents_combination_hash = tuple(self._latents_combination_hash) self._n_skills = n_task
def __init__(self, input_shape, output_dim, name): Parameterized.__init__(self) self._input_shape = input_shape self._output_dim = output_dim self._name = name self._variable_scope = None
def __init__(self, input_shape, output_dim, name="GaussianMLPRegressor", mean_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, optimizer_args=None, use_trust_region=True, max_kl_step=0.01, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), std_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1.0): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If adaptive_std is True, this parameter is ignored, and the weights for the std network are always earned. :param adaptive_std: Whether to make the std a function of the states. :param std_share_network: Whether to use the same network as the mean. :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if `std_share_network` is False. It defaults to the same architecture as the mean. :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network` is False. It defaults to the same non-linearity as the mean. """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = "mean_network" self._std_network_name = "std_network" with tf.variable_scope(name): if optimizer_args is None: optimizer_args = dict() if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=2 * output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = L.SliceLayer( mean_network.output_layer, slice(output_dim), name="mean_slice", ) else: mean_network = MLP( name="mean_network", input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, ) l_mean = mean_network.output_layer if adaptive_std: l_log_std = MLP( name="log_std_network", input_shape=input_shape, input_var=mean_network.input_layer.input_var, output_dim=output_dim, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_nonlinearity, output_nonlinearity=None, ).output_layer elif std_share_network: l_log_std = L.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name="log_std_slice", ) else: l_log_std = L.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), name="output_log_std", trainable=learn_std, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_means_var = tf.placeholder(dtype=tf.float32, name="ys", shape=(None, output_dim)) old_log_stds_var = tf.placeholder(dtype=tf.float32, name="old_log_stds", shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, ) + input_shape, dtype=np.float32), name="x_mean", ) x_std_var = tf.Variable( np.ones((1, ) + input_shape, dtype=np.float32), name="x_std", ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name="y_mean", ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name="y_std", ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope(self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = L.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope(self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = L.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = old_log_stds_var - tf.log(y_std_var) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict(mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict(mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args["leq_constraint"] = (mean_kl, max_kl_step) optimizer_args["inputs"] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args["inputs"] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var # Optionally create assign operations for normalization if self._normalize_inputs: self._x_mean_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._x_std_var_ph = tf.placeholder( shape=(1, ) + input_shape, dtype=tf.float32, ) self._assign_x_mean = tf.assign(self._x_mean_var, self._x_mean_var_ph) self._assign_x_std = tf.assign(self._x_std_var, self._x_std_var_ph) if self._normalize_outputs: self._y_mean_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._y_std_var_ph = tf.placeholder( shape=(1, output_dim), dtype=tf.float32, ) self._assign_y_mean = tf.assign(self._y_mean_var, self._y_mean_var_ph) self._assign_y_std = tf.assign(self._y_std_var, self._y_std_var_ph)
def __init__( self, input_shape, output_dim, name='CategoricalMLPRegressor', prob_network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, optimizer=None, tr_optimizer=None, use_trust_region=True, max_kl_step=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param max_kl_step: KL divergence constraint for each iteration """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.compat.v1.variable_scope(name, 'CategoricalMLPRegressor'): if optimizer is None: optimizer = LbfgsOptimizer() if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer self._prob_network_name = 'prob_network' if prob_network is None: prob_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, name=self._prob_network_name) l_prob = prob_network.output_layer LayersPowered.__init__(self, [l_prob]) xs_var = prob_network.input_layer.input_var ys_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, output_dim], name='ys') old_prob_var = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, output_dim], name='old_prob') x_mean_var = tf.compat.v1.get_variable( name='x_mean', shape=(1, ) + input_shape, initializer=tf.constant_initializer(0., dtype=tf.float32)) x_std_var = tf.compat.v1.get_variable( name='x_std', shape=(1, ) + input_shape, initializer=tf.constant_initializer(1., dtype=tf.float32)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var with tf.name_scope(self._prob_network_name, values=[normalized_xs_var]): prob_var = L.get_output( l_prob, {prob_network.input_layer: normalized_xs_var}) old_info_vars = dict(prob=old_prob_var) info_vars = dict(prob=prob_var) dist = self._dist = Categorical(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = tf.one_hot(tf.argmax(prob_var, axis=1), depth=output_dim) self.prob_network = prob_network self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_prob = tensor_utils.compile_function([xs_var], prob_var) self.l_prob = l_prob self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var, old_prob_var], leq_constraint=(mean_kl, max_kl_step)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
def __init__(self, output_layers, input_layers=None): self._output_layers = output_layers self._input_layers = input_layers Parameterized.__init__(self)
def __init__( self, input_shape, output_dim, name="BernoulliMLPRegressor", hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, optimizer=None, tr_optimizer=None, use_trust_region=True, step_size=0.01, normalize_inputs=True, no_initial_trust_region=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. :param use_trust_region: Whether to use trust region constraint. :param step_size: KL divergence constraint for each iteration """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.variable_scope(name): if optimizer is None: optimizer = LbfgsOptimizer() if tr_optimizer is None: tr_optimizer = ConjugateGradientOptimizer() self.output_dim = output_dim self.optimizer = optimizer self.tr_optimizer = tr_optimizer p_network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.sigmoid, name="p_network") l_p = p_network.output_layer LayersPowered.__init__(self, [l_p]) xs_var = p_network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, shape=(None, output_dim), name="ys") old_p_var = tf.placeholder(dtype=tf.float32, shape=(None, output_dim), name="old_p") x_mean_var = tf.get_variable(name="x_mean", initializer=tf.zeros_initializer(), shape=(1, ) + input_shape) x_std_var = tf.get_variable(name="x_std", initializer=tf.ones_initializer(), shape=(1, ) + input_shape) normalized_xs_var = (xs_var - x_mean_var) / x_std_var p_var = L.get_output(l_p, {p_network.input_layer: normalized_xs_var}) old_info_vars = dict(p=old_p_var) info_vars = dict(p=p_var) dist = self._dist = Bernoulli(output_dim) mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars)) loss = -tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars)) predicted = p_var >= 0.5 self.f_predict = tensor_utils.compile_function([xs_var], predicted) self.f_p = tensor_utils.compile_function([xs_var], p_var) self.l_p = l_p self.optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var]) self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[p_var], inputs=[xs_var, ys_var, old_p_var], leq_constraint=(mean_kl, step_size)) self.use_trust_region = use_trust_region self.name = name self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var self.first_optimized = not no_initial_trust_region
def __init__( self, input_shape, output_dim, name="DeterministicMLPRegressor", network=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, optimizer=None, optimizer_args=None, normalize_inputs=True, ): """ :param input_shape: Shape of the input data. :param output_dim: Dimension of output. :param hidden_sizes: Number of hidden units of each layer of the mean network. :param hidden_nonlinearity: Non-linearity used for each layer of the mean network. :param optimizer: Optimizer for minimizing the negative log-likelihood. """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) with tf.variable_scope(name, "DeterministicMLPRegressor"): if optimizer_args is None: optimizer_args = dict() if optimizer is None: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self.output_dim = output_dim self.optimizer = optimizer self._network_name = "network" if network is None: network = MLP(input_shape=input_shape, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, name=self._network_name) l_out = network.output_layer LayersPowered.__init__(self, [l_out]) xs_var = network.input_layer.input_var ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="ys") x_mean_var = tf.get_variable(name="x_mean", shape=(1, ) + input_shape, initializer=tf.constant_initializer( 0., dtype=tf.float32)) x_std_var = tf.get_variable(name="x_std", shape=(1, ) + input_shape, initializer=tf.constant_initializer( 1., dtype=tf.float32)) normalized_xs_var = (xs_var - x_mean_var) / x_std_var with tf.name_scope(self._network_name, values=[normalized_xs_var]): fit_ys_var = L.get_output( l_out, {network.input_layer: normalized_xs_var}) loss = -tf.reduce_mean(tf.square(fit_ys_var - ys_var)) self.f_predict = tensor_utils.compile_function([xs_var], fit_ys_var) optimizer_args = dict( loss=loss, target=self, network_outputs=[fit_ys_var], ) optimizer_args["inputs"] = [xs_var, ys_var] self.optimizer.update_opt(**optimizer_args) self.name = name self.l_out = l_out self.normalize_inputs = normalize_inputs self.x_mean_var = x_mean_var self.x_std_var = x_std_var
def __init__(self, env_spec, embedding, task_space, name="GaussianMLPMultitaskPolicy", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp'): """ :param env_spec: observation space is a concatenation of task space and vanilla env observation space :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable :param init_std: Initial std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parametrization: how the std should be parametrized. There are a few options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(env_spec.action_space, Box) StochasticMultitaskPolicy.__init__(self, env_spec, embedding, task_space) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope(self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.action_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(init_std) if min_std: self._min_std_param = np.log(min_std) if max_std: self._max_std_param = np.log(max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(init_std) - 1) if min_std: self._min_std_param = np.log(np.exp(min_std) - 1) if max_std: self._max_std_param = np.log(np.exp(max_std) - 1) else: raise NotImplementedError # Build default graph with self._name_scope: # inputs self._task_input = self._embedding._input self._latent_input = self.latent_space.new_tensor_variable( name="latent_input", extra_dims=1) self._obs_input = self.observation_space.new_tensor_variable( name="obs_input", extra_dims=1) with tf.name_scope("default", values=[self._task_input, self._obs_input]): # network (connect with embedding) latent = self._embedding.latent latent_mean = self._embedding.latent_mean latent_std_param = self._embedding.latent_std_param action_var, mean_var, std_param_var, dist = self._build_graph( latent, self._obs_input) # outputs self._action = tf.identity(action_var, name="action") self._action_mean = tf.identity(mean_var, name="action_mean") self._action_std_param = tf.identity(std_param_var, "action_std_param") self._action_distribution = dist # special auxiliary graph for feedforward using only latents with tf.name_scope("from_latent", values=[self._latent_input, self._obs_input]): action_var, mean_var, std_param_var, dist = self._build_graph( self._latent_input, self._obs_input) # auxiliary outputs self._action_from_latent = action_var self._action_mean_from_latent = mean_var self._action_std_param_from_latent = std_param_var self._action_distribution_from_latent = dist # compiled functions with tf.variable_scope("f_dist_task_obs"): self.f_dist_task_obs = tensor_utils.compile_function( inputs=[self._task_input, self._obs_input], outputs=[ self._action, self._action_mean, self._action_std_param, latent, latent_mean, latent_std_param ], ) with tf.variable_scope("f_dist_latent_obs"): self.f_dist_latent_obs = tensor_utils.compile_function( inputs=[self._latent_input, self._obs_input], outputs=[ self._action_from_latent, self._action_mean_from_latent, self._action_std_param_from_latent ], )
def __init__(self, embedding_spec): Parameterized.__init__(self) self._embedding_spec = embedding_spec
def __init__(self, embedding_spec, name="GaussianMLPEmbedding", hidden_sizes=(32, 32), learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_hidden_sizes=(32, 32), min_std=1e-6, max_std=None, std_hidden_nonlinearity=tf.nn.tanh, hidden_nonlinearity=tf.nn.tanh, mean_scale=1., output_nonlinearity=None, mean_network=None, std_network=None, std_parameterization='exp', normalize=False, mean_output_nonlinearity=None): """ :param embedding_spec: :param hidden_sizes: list of sizes for the fully-connected hidden layers :param learn_std: Is std trainable? :param init_std: Inital std :param adaptive_std: :param std_share_network: :param std_hidden_sizes: list of sizes for the fully-connected layers for std :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :param std_hidden_nonlinearity: :param hidden_nonlinearity: nonlinearity used for each hidden layer :param output_nonlinearity: nonlinearity for the output layer :param mean_network: custom network for the output mean :param std_network: custom network for the output log std :param std_parameterization: how the std should be parameterized. There are a few options: -exp: the logarithm of the std will be stored, and applied an exponential transformation -softplus: the std will be computed as log(1+exp(x)) :return: """ assert isinstance(embedding_spec.latent_space, Box) StochasticEmbedding.__init__(self, embedding_spec) Parameterized.__init__(self) Serializable.quick_init(self, locals()) if mean_network or std_network: raise NotImplementedError self.name = name self._variable_scope = tf.variable_scope( self.name, reuse=tf.AUTO_REUSE) self._name_scope = tf.name_scope(self.name) # TODO: eliminate self._dist = DiagonalGaussian(self.latent_space.flat_dim) # Network parameters self._hidden_sizes = hidden_sizes self._learn_std = learn_std self._init_std = init_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._hidden_nonlinearity = hidden_nonlinearity self._output_nonlinearity = output_nonlinearity self._mean_network = mean_network self._std_network = std_network self._std_parameterization = std_parameterization self._normalize = normalize self._mean_output_nonlinearity = mean_output_nonlinearity if self._normalize: latent_dim = self.latent_space.flat_dim self._max_std = np.sqrt(1.0 / latent_dim) self._init_std = self._max_std / 2.0 # Tranform std arguments to parameterized space self._init_std_param = None self._min_std_param = None self._max_std_param = None if self._std_parameterization == 'exp': self._init_std_param = np.log(self._init_std) if self._min_std: self._min_std_param = np.log(self._min_std) if self._max_std: self._max_std_param = np.log(self._max_std) elif self._std_parameterization == 'softplus': self._init_std_param = np.log(np.exp(self._init_std) - 1) if self._min_std: self._min_std_param = np.log(np.exp(self._min_std) - 1) if self._max_std: self._max_std_param = np.log(np.exp(self._max_std) - 1) else: raise NotImplementedError self._mean_scale = mean_scale # Build default graph with self._name_scope: # inputs self._input = self.input_space.new_tensor_variable( name="input", extra_dims=1) with tf.name_scope("default", values=[self._input]): # network latent_var, mean_var, std_param_var, dist = self._build_graph( self._input) # outputs self._latent = tf.identity(latent_var, name="latent") self._latent_mean = tf.identity(mean_var, name="latent_mean") self._latent_std_param = tf.identity(std_param_var, "latent_std_param") self._latent_distribution = dist # compiled functions with tf.variable_scope("f_dist"): self._f_dist = tensor_utils.compile_function( inputs=[self._input], outputs=[ self._latent, self._latent_mean, self._latent_std_param ], )
def __init__(self, input_shape, output_dim, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, name='GaussianConvRegressor', mean_network=None, learn_std=True, init_std=1.0, adaptive_std=False, std_share_network=False, std_conv_filters=[], std_conv_filter_sizes=[], std_conv_strides=[], std_conv_pads=[], std_hidden_sizes=[], std_hidden_nonlinearity=None, std_output_nonlinearity=None, normalize_inputs=True, normalize_outputs=True, subsample_factor=1., optimizer=None, optimizer_args=dict(), use_trust_region=True, max_kl_step=0.01): Parameterized.__init__(self) Serializable.quick_init(self, locals()) self._mean_network_name = 'mean_network' self._std_network_name = 'std_network' with tf.compat.v1.variable_scope(name): if optimizer is None: if use_trust_region: optimizer = PenaltyLbfgsOptimizer(**optimizer_args) else: optimizer = LbfgsOptimizer(**optimizer_args) else: optimizer = optimizer(**optimizer_args) self._optimizer = optimizer self._subsample_factor = subsample_factor if mean_network is None: if std_share_network: b = np.concatenate( [ np.zeros(output_dim), np.full(output_dim, np.log(init_std)) ], axis=0) # yapf: disable b = tf.constant_initializer(b) mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=2 * output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, output_b_init=b) l_mean = layers.SliceLayer( mean_network.output_layer, slice(output_dim), name='mean_slice', ) else: mean_network = ConvNetwork( name=self._mean_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=conv_filters, conv_filter_sizes=conv_filter_sizes, conv_strides=conv_strides, conv_pads=conv_pads, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity) l_mean = mean_network.output_layer if adaptive_std: l_log_std = ConvNetwork( name=self._std_network_name, input_shape=input_shape, output_dim=output_dim, conv_filters=std_conv_filters, conv_filter_sizes=std_conv_filter_sizes, conv_strides=std_conv_strides, conv_pads=std_conv_pads, hidden_sizes=std_hidden_sizes, hidden_nonlinearity=std_hidden_nonlinearity, output_nonlinearity=std_output_nonlinearity, output_b_init=tf.constant_initializer(np.log(init_std)), ).output_layer elif std_share_network: l_log_std = layers.SliceLayer( mean_network.output_layer, slice(output_dim, 2 * output_dim), name='log_std_slice', ) else: l_log_std = layers.ParamLayer( mean_network.input_layer, num_units=output_dim, param=tf.constant_initializer(np.log(init_std)), trainable=learn_std, name=self._std_network_name, ) LayersPowered.__init__(self, [l_mean, l_log_std]) xs_var = mean_network.input_layer.input_var ys_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_means_var = tf.compat.v1.placeholder( dtype=tf.float32, name='ys', shape=(None, output_dim)) old_log_stds_var = tf.compat.v1.placeholder( dtype=tf.float32, name='old_log_stds', shape=(None, output_dim)) x_mean_var = tf.Variable( np.zeros((1, np.prod(input_shape)), dtype=np.float32), name='x_mean', ) x_std_var = tf.Variable( np.ones((1, np.prod(input_shape)), dtype=np.float32), name='x_std', ) y_mean_var = tf.Variable( np.zeros((1, output_dim), dtype=np.float32), name='y_mean', ) y_std_var = tf.Variable( np.ones((1, output_dim), dtype=np.float32), name='y_std', ) normalized_xs_var = (xs_var - x_mean_var) / x_std_var normalized_ys_var = (ys_var - y_mean_var) / y_std_var with tf.name_scope( self._mean_network_name, values=[normalized_xs_var]): normalized_means_var = layers.get_output( l_mean, {mean_network.input_layer: normalized_xs_var}) with tf.name_scope( self._std_network_name, values=[normalized_xs_var]): normalized_log_stds_var = layers.get_output( l_log_std, {mean_network.input_layer: normalized_xs_var}) means_var = normalized_means_var * y_std_var + y_mean_var log_stds_var = normalized_log_stds_var + tf.math.log(y_std_var) normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var normalized_old_log_stds_var = ( old_log_stds_var - tf.math.log(y_std_var)) dist = self._dist = DiagonalGaussian(output_dim) normalized_dist_info_vars = dict( mean=normalized_means_var, log_std=normalized_log_stds_var) mean_kl = tf.reduce_mean( dist.kl_sym( dict( mean=normalized_old_means_var, log_std=normalized_old_log_stds_var), normalized_dist_info_vars, )) loss = -tf.reduce_mean( dist.log_likelihood_sym(normalized_ys_var, normalized_dist_info_vars)) self._f_predict = tensor_utils.compile_function([xs_var], means_var) self._f_pdists = tensor_utils.compile_function( [xs_var], [means_var, log_stds_var]) self._l_mean = l_mean self._l_log_std = l_log_std optimizer_args = dict( loss=loss, target=self, network_outputs=[ normalized_means_var, normalized_log_stds_var ], ) if use_trust_region: optimizer_args['leq_constraint'] = (mean_kl, max_kl_step) optimizer_args['inputs'] = [ xs_var, ys_var, old_means_var, old_log_stds_var ] else: optimizer_args['inputs'] = [xs_var, ys_var] self._optimizer.update_opt(**optimizer_args) self._use_trust_region = use_trust_region self._name = name self._normalize_inputs = normalize_inputs self._normalize_outputs = normalize_outputs self._mean_network = mean_network self._x_mean_var = x_mean_var self._x_std_var = x_std_var self._y_mean_var = y_mean_var self._y_std_var = y_std_var
def __init__(self, env_spec): Parameterized.__init__(self) self._env_spec = env_spec