def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint penalty_var = TT.scalar("penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): flat_grad = flatten_tensor_variables( theano.grad( penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore')) return [ penalized_loss.astype('float64'), flat_grad.astype('float64') ] self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: compile_function( inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), log_name="f_opt"))
def update_opt(self, f, target, inputs, reg_coeff, name=None): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) with tf.name_scope(name, "FiniteDifferenceHvp", [f, inputs, params, target]): constraint_grads = tf.gradients(f, xs=params, name="gradients_constraint") for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads) def f_hx_plain(*args): with tf.name_scope("f_hx_plain", values=[inputs, self.target]): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate( [np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32']( self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) self.target.set_param_values(param_val, trainable=True) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: tensor_utils.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_hx_plain=lambda: f_hx_plain, )
def update_opt(self, f, target, inputs, reg_coeff, name=None): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) with tf.name_scope(name, "PerlmutterHvp", [f, inputs, params]): constraint_grads = tf.gradients(f, xs=params, name="gradients_constraint") for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params ]) def hx_plain(): with tf.name_scope("hx_plain", values=[constraint_grads, params, xs]): with tf.name_scope("hx_function", values=[constraint_grads, xs]): hx_f = tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), hx_plain_splits = tf.gradients(hx_f, params, name="gradients_hx_plain") for idx, (hx, param) in enumerate(zip(hx_plain_splits, params)): if hx is None: hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables( hx_plain_splits) self.opt_fun = ext.lazydict( f_hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=hx_plain(), log_name="f_hx_plain", ), )
def update_opt(self, loss, target, inputs, extra_inputs=None, name=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target params = target.get_params(trainable=True) with tf.name_scope(name, "LbfgsOptimizer", [loss, inputs, params, extra_inputs]): def get_opt_output(): with tf.name_scope("get_opt_output", [loss, params]): flat_grad = tensor_utils.flatten_tensor_variables( tf.gradients(loss, params)) return [ tf.cast(loss, tf.float64), tf.cast(flat_grad, tf.float64) ] if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(), ))
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target if gradients is None: gradients = theano.grad( loss, target.get_params(trainable=True), disconnected_inputs='ignore') updates = self._update_method( gradients, target.get_params(trainable=True)) updates = OrderedDict( [(k, v.astype(k.dtype)) for k, v in updates.items()]) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss), f_opt=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, updates=updates, ))
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default this will be computed by calling theano.grad :return: No return value. """ self._target = target def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')] if extra_inputs is None: extra_inputs = list() self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs + extra_inputs, loss), f_opt=lambda: compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(gradients), ))
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32']( self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values( param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) if self.symmetric: self.target.set_param_values( param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: self.target.set_param_values(param_val, trainable=True) flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: ext.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) def Hx_plain(): Hx_plain_splits = TT.grad( TT.sum([TT.sum(g * x) for g, x in zip(constraint_grads, xs)]), wrt=params, disconnected_inputs='warn') return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: ext.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, name=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ params = target.get_params(trainable=True) with tf.name_scope( name, "ConjugateGradientOptimizer", [loss, target, leq_constraint, inputs, extra_inputs, params]): # yapf: disable inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint with tf.name_scope("loss_gradients", values=[loss, params]): grads = tf.gradients(loss, xs=params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff, name="update_opt_" + constraint_name) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`garage.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) self._hvp_approach.update_opt( f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )