def __init__(self, inputs, outputs, grad_depth = 1, **kwargs): if not isinstance(outputs, list): raise TypeError('outputs must be list', outputs) for i in inputs + outputs: if not isinstance(i, gof.Variable): raise TypeError('inputs and outputs must be Variable instances', i) if 'updates' in kwargs: raise TypeError('updates are not allowed in kwargs') # TODO: the graph may have implicit inputs like Value and SharedVariable instances. # what impact to they have on the validity of this Op? self.fn = orig_function(inputs, outputs, **kwargs) self.inputs = inputs self.outputs = outputs self.input_types = [input.type for input in inputs] self.output_types = [output.type for output in outputs] if grad_depth > 0: output_grads = [t() for t in self.output_types] gd = G.grad_sources_inputs(zip(self.outputs, output_grads), self.inputs) gs = map(gd.get, self.inputs) self.grad_ops = [] for g in gs: if g is None: self.grad_ops.append(lambda *args: None) else: self.grad_ops.append(OpFromGraph(inputs + output_grads, [g], grad_depth = grad_depth - 1))
def __init__(self, inputs, outputs, grad_depth=1, **kwargs): if not isinstance(outputs, list): raise TypeError('outputs must be list', outputs) for i in inputs + outputs: if not isinstance(i, gof.Variable): raise TypeError( 'inputs and outputs must be Variable instances', i) if 'updates' in kwargs: raise TypeError('updates are not allowed in kwargs') # TODO: the graph may have implicit inputs like Value and SharedVariable instances. # what impact to they have on the validity of this Op? self.fn = orig_function(inputs, outputs, **kwargs) self.inputs = inputs self.outputs = outputs self.input_types = [input.type for input in inputs] self.output_types = [output.type for output in outputs] if grad_depth > 0: output_grads = [t() for t in self.output_types] gd = G.grad_sources_inputs(zip(self.outputs, output_grads), self.inputs) gs = map(gd.get, self.inputs) self.grad_ops = [] for g in gs: if g is None: self.grad_ops.append(lambda *args: None) else: self.grad_ops.append( OpFromGraph(inputs + output_grads, [g], grad_depth=grad_depth - 1))
def test_retNone1(self): """Test that it is not ok to return None from op.grad()""" class retNone(gof.op.Op): def make_node(self): inputs = [theano.tensor.vector()] outputs = [theano.tensor.vector()] return gof.Apply(self, inputs, outputs) def grad(self, inp, grads): x, = inp gz, = grads pass a = retNone().make_node() try: grad_sources_inputs([(a.out, one)], None) except TypeError, e: return
def test_wrong_rval_len1(self): """Test that it is not ok to return the wrong number of gradient terms """ class retOne(gof.op.Op): def make_node(self, *inputs): outputs = [theano.tensor.vector()] return gof.Apply(self, inputs, outputs) def grad(self, inputs, grads): return [inputs[0].zeros_like()] i = theano.tensor.vector() j = theano.tensor.vector() a1 = retOne().make_node(i) grad_sources_inputs([(a1.out, one)], None) a2 = retOne().make_node(i, j) self.assertRaises(ValueError, grad_sources_inputs, [(a2.out, one)], None)
def test_wrong_rval_len1(self): """Test that it is not ok to return the wrong number of gradient terms""" class retOne(gof.op.Op): def make_node(self, *inputs): outputs = [theano.tensor.vector()] return gof.Apply(self, inputs, outputs) def grad(self, inputs, grads): return [inputs[0].zeros_like()] i = theano.tensor.vector() j = theano.tensor.vector() a1 = retOne().make_node(i) g = grad_sources_inputs([(a1.out, one)], None) a2 = retOne().make_node(i, j) try: g = grad_sources_inputs([(a2.out, one)], None) except ValueError, e: return
def test_1in_1out(self): """Test grad is called correctly for a 1-to-1 op""" gval = theano.tensor.matrix() class O(gof.op.Op): def make_node(self): inputs = [theano.tensor.matrix()] outputs = [theano.tensor.matrix()] return gof.Apply(self, inputs, outputs) def grad(self, inp, grads): return gval, a1 = O().make_node() g = grad_sources_inputs([(a1.outputs[0], one)], None) self.assertTrue(g[a1.inputs[0]] is gval)
def test_some_None_ograds(self): """Test grad is called when some output gradients are None""" class O(gof.op.Op): def __init__(self, tst): self.tst = tst def make_node(self, *inputs): outputs = [theano.tensor.matrix(), theano.tensor.matrix()] return gof.Apply(self, inputs, outputs) def grad(self, inputs, g_out): return [one] i = theano.tensor.matrix() a1 = O(self).make_node(i) g = grad_sources_inputs([(a1.outputs[0], one)], None) self.assertTrue(g[i] is one)
def test_Nin_1out(self): """Test grad is called correctly for a many-to-1 op""" gval0 = theano.tensor.scalar() gval1 = theano.tensor.scalar() class O(gof.op.Op): def make_node(self): inputs = [theano.tensor.scalar(), theano.tensor.scalar()] outputs = [theano.tensor.matrix()] return gof.Apply(self, inputs, outputs) def grad(self, inp, grads): x0, x1 = inp gz, = grads return (gval0, gval1) a1 = O().make_node() g = grad_sources_inputs([(a1.outputs[0], one)], None) self.assertTrue(g[a1.inputs[0]] is gval0) self.assertTrue(g[a1.inputs[1]] is gval1)
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, disconnected_inputs='raise'): """ :type cost: Scalar (0-dimensional) `Variable` :type wrt: `Variable` or list of `Variable`s. :type g_cost: Scalar `Variable`, or None :param g_cost: an expression for the gradient through cost. The default is ``ones_like(cost)``. :param consider_constant: a list of expressions not to backpropagate through :param warn_type: a value of True will cause warnings to be logged for any Op that emits a gradient that does not match its input type. :type disconnected_inputs: string :param disconnected_inputs: Defines the behaviour if some of the variables in ``wrt`` are not part of the computational graph computing ``cost`` (or if all links are non-differentiable). The possible values are: - 'ignore': considers that the gradient on these parameters is zero. - 'warn': consider the gradient zero, and print a warning. - 'raise': raise an exception. :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`) :return: symbolic expression of gradient of `cost` with respect to `wrt`. If an element of `wrt` is not differentiable with respect to the output, then a zero variable is returned. If `wrt` is a list/tuple, longer then 1, a list will be returned. DEPRECATION: In Theano 0.5, grad will return an object of the same type as `wrt`: a list/tuple or TensorVariable in all case. This function is a wrapper around the more general function `theano.gradient.grad_sources_inputs``. """ if consider_constant is None: consider_constant = [] else: #error checking on consider_constant: verify that it is a collection # of theano variables # this is important, if someone accidentally passes a nested data # structure with theano variables at the leaves, only the root will # be properly considered constant if not hasattr(consider_constant, '__iter__'): raise TypeError('consider_constant must be an iterable collection,' ' got '+str(type(consider_constant))) for elem in consider_constant: if not isinstance(elem, gof.Variable): raise TypeError('Elements of consider_constant must be variables,' 'but got '+str(type(elem))) if not isinstance(cost, TensorVariable): raise TypeError('In tensor.grad(), cost argument should be a TensorVariable.', cost) if cost.type.ndim: raise TypeError( 'In tensor.grad, "cost" argument should be a scalar, but ndim' ' is %i (should be 0). If you want to compute the gradient of' ' the sum of cost, you should use cost.sum().' % cost.type.ndim) if g_cost is None: g_cost = ones_like(cost) inputs = gof.graph.inputs([cost]) gmap = gradient.grad_sources_inputs( [(cost, g_cost)], list(inputs) + list(consider_constant), warn_type=warn_type) # Note : If p is not in gmap there can be several reasons, among which # is the fact that p might not be part of the computational graph. A # simple example is that for a+b for e.g. a[0] is not part of the graph, # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0]) # such subtle cases can be fixed by a more careful implementation of the # gradient, but for now Theano needs to throw an exception, and make the # user aware that it does not know how to compute that gradient using_list = isinstance(wrt, list) using_tuple = isinstance(wrt, tuple) if not (using_list or using_tuple): wrt = [wrt] ret = [] for p in wrt: if p in gmap: ret.append(gmap[p]) else: message = ("grad method was asked to compute the gradient " "with respect to a variable that is not part of " "the computational graph of the cost, or is used " "only by a non-differentiable operator: %s" % p) if disconnected_inputs == 'ignore': pass elif disconnected_inputs == 'warn': warnings.warn(message, stacklevel=1) elif disconnected_inputs == 'raise': raise ValueError(message) else: raise ValueError("Invalid value for keyword " "'disconnected_inputs', valid values are " "'ignore', 'warn' and 'raise'.") ret.append(zeros_like(p)) if len(ret) == 1 and not (using_list or using_tuple): # `wrt` was a single Variable, so we return a single Variable too. return ret[0] else: # Ensure we preserve the original type of `wrt`. if using_tuple: return tuple(ret) else: assert using_list return ret
def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False, disconnected_inputs='raise'): """ Computes the L operation on `f` wrt to `wrt` evaluated at points given in `eval_points`. Mathematically this stands for the jacobian of `f` wrt to `wrt` left muliplied by the eval points. :type f: `Variable` or list of `Variable`s `f` stands for the output of the computational graph to which you want to apply the L operator :type wrt: `Variable` or list of `Variables`s variables for which you compute the L operator of the expression described by `f` :type eval_points: `Variable` or list of `Variable`s evalutation points for each of the variables in `f` :rtype: `Variable` or list/tuple of `Variable`s depending on type of f :return: symbolic expression such that L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i] where the indices in that expression are magic multidimensional indices that specify both the position within a list and all coordinates of the tensor element in the last If `f` is a list/tuple, then return a list/tuple with the results. """ if consider_constant is None: consider_constant = [] if not isinstance(f, TensorVariable): raise TypeError('In tensor.Lop(), cost argument should be a TensorVariable.', f) if type(eval_points) not in (list, tuple): eval_points = [eval_points] using_list = isinstance(f, list) using_tuple = isinstance(f, tuple) if not (using_list or using_tuple): f = [f] inputs = gof.graph.inputs(f) gmap = gradient.grad_sources_inputs( zip(f,eval_points), list(inputs) + list(consider_constant), warn_type=warn_type) # Note : If p is not in gmap there can be several reasons, among which # is the fact that p might not be part of the computational graph. A # simple example is that for a+b for e.g. a[0] is not part of the graph, # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0]) # such subtle cases can be fixed by a more careful implementation of the # gradient, but for now Theano needs to throw an exception, and make the # user aware that it does not know how to compute that gradient if not isinstance(wrt, (list, tuple)): wrt = [wrt] ret = [] for p in wrt: if p in gmap: ret.append(gmap[p]) else: message = ("Lop method was asked to compute the gradient " "with respect to a variable that is not part of " "the computational graph of the cost, or is used " "only by a non-differentiable operator: %s" % p) if disconnected_inputs == 'ignore': pass elif disconnected_inputs == 'warn': warnings.warn(message, stacklevel=1) elif disconnected_inputs == 'raise': raise ValueError(message) else: raise ValueError("Invalid value for keyword " "'disconnected_inputs', valid values are " "'ignore', 'warn' and 'raise'.") ret.append(zeros_like(p)) if len(ret) == 1: if using_list: return ret elif using_tuple: return tuple(ret) else: return ret[0] else: if using_tuple: return tuple(ret) return ret
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, disconnected_inputs='raise'): """ :type cost: Scalar (0-dimensional) `Variable` :type wrt: `Variable` or list of `Variable`s. :type g_cost: Scalar `Variable`, or None :param g_cost: an expression for the gradient through cost. The default is ``ones_like(cost)``. :param consider_constant: a list of expressions not to backpropagate through :param warn_type: a value of True will cause warnings to be logged for any Op that emits a gradient that does not match its input type. :type disconnected_inputs: string :param disconnected_inputs: Defines the behaviour if some of the variables in ``wrt`` are not part of the computational graph computing ``cost`` (or if all links are non-differentiable). The possible values are: - 'ignore': considers that the gradient on these parameters is zero. - 'warn': consider the gradient zero, and print a warning. - 'raise': raise an exception. :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`) :return: symbolic expression of gradient of `cost` with respect to `wrt`. If an element of `wrt` is not differentiable with respect to the output, then a zero variable is returned. It returns an object of same type as `wrt`: a list/tuple or TensorVariable in all cases. This function is a wrapper around the more general function `theano.gradient.grad_sources_inputs``. """ if consider_constant is None: consider_constant = [] else: #error checking on consider_constant: verify that it is a collection # of theano variables # this is important, if someone accidentally passes a nested data # structure with theano variables at the leaves, only the root will # be properly considered constant if not hasattr(consider_constant, '__iter__'): raise TypeError('consider_constant must be an iterable collection,' ' got '+str(type(consider_constant))) for elem in consider_constant: if not isinstance(elem, gof.Variable): raise TypeError('Elements of consider_constant must be variables,' 'but got '+str(type(elem))) if not isinstance(cost, TensorVariable): raise TypeError(('In tensor.grad(), cost argument should be ' 'a TensorVariable.'), cost) if cost.type.ndim: raise TypeError( 'In tensor.grad, "cost" argument should be a scalar, but ndim' ' is %i (should be 0). If you want to compute the gradient of' ' the sum of cost, you should use cost.sum().' % cost.type.ndim) if g_cost is None: g_cost = ones_like(cost) inputs = gof.graph.inputs([cost]) gmap = gradient.grad_sources_inputs( [(cost, g_cost)], list(inputs) + list(consider_constant), warn_type=warn_type) # Note : If p is not in gmap there can be several reasons, among which # is the fact that p might not be part of the computational graph. A # simple example is that for a+b for e.g. a[0] is not part of the graph, # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0]) # such subtle cases can be fixed by a more careful implementation of the # gradient, but for now Theano needs to throw an exception, and make the # user aware that it does not know how to compute that gradient using_list = isinstance(wrt, list) using_tuple = isinstance(wrt, tuple) if not (using_list or using_tuple): wrt = [wrt] ret = [] for p in wrt: if p in gmap: ret.append(gmap[p]) else: message = ("grad method was asked to compute the gradient " "with respect to a variable that is not part of " "the computational graph of the cost, or is used " "only by a non-differentiable operator: %s" % p) if disconnected_inputs == 'ignore': pass elif disconnected_inputs == 'warn': warnings.warn(message, stacklevel=1) elif disconnected_inputs == 'raise': raise ValueError(message) else: raise ValueError("Invalid value for keyword " "'disconnected_inputs', valid values are " "'ignore', 'warn' and 'raise'.") ret.append(zeros_like(p)) if len(ret) == 1 and not (using_list or using_tuple): # `wrt` was a single Variable, so we return a single Variable too. return ret[0] else: # Ensure we preserve the original type of `wrt`. if using_tuple: return tuple(ret) else: assert using_list return ret
def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False, disconnected_inputs='raise'): """ Computes the L operation on `f` wrt to `wrt` evaluated at points given in `eval_points`. Mathematically this stands for the jacobian of `f` wrt to `wrt` left muliplied by the eval points. :type f: `Variable` or list of `Variable`s `f` stands for the output of the computational graph to which you want to apply the L operator :type wrt: `Variable` or list of `Variables`s variables for which you compute the L operator of the expression described by `f` :type eval_points: `Variable` or list of `Variable`s evalutation points for each of the variables in `f` :rtype: `Variable` or list/tuple of `Variable`s depending on type of f :return: symbolic expression such that L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i] where the indices in that expression are magic multidimensional indices that specify both the position within a list and all coordinates of the tensor element in the last If `f` is a list/tuple, then return a list/tuple with the results. """ if consider_constant is None: consider_constant = [] if not isinstance(f, TensorVariable): raise TypeError(('In tensor.Lop(), cost argument should be ' 'a TensorVariable.'), f) if type(eval_points) not in (list, tuple): eval_points = [eval_points] using_list = isinstance(wrt, list) using_tuple = isinstance(wrt, tuple) if not isinstance(f, (list, tuple)): f = [f] inputs = gof.graph.inputs(f) gmap = gradient.grad_sources_inputs( zip(f, eval_points), list(inputs) + list(consider_constant), warn_type=warn_type) # Note : If p is not in gmap there can be several reasons, among which # is the fact that p might not be part of the computational graph. A # simple example is that for a+b for e.g. a[0] is not part of the graph, # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0]) # such subtle cases can be fixed by a more careful implementation of the # gradient, but for now Theano needs to throw an exception, and make the # user aware that it does not know how to compute that gradient if not (using_list or using_tuple): wrt = [wrt] ret = [] for p in wrt: if p in gmap: ret.append(gmap[p]) else: message = ("Lop method was asked to compute the gradient " "with respect to a variable that is not part of " "the computational graph of the cost, or is used " "only by a non-differentiable operator: %s" % p) if disconnected_inputs == 'ignore': pass elif disconnected_inputs == 'warn': warnings.warn(message, stacklevel=1) elif disconnected_inputs == 'raise': raise ValueError(message) else: raise ValueError("Invalid value for keyword " "'disconnected_inputs', valid values are " "'ignore', 'warn' and 'raise'.") ret.append(zeros_like(p)) if len(ret) == 1: if using_list: return ret elif using_tuple: return tuple(ret) else: return ret[0] else: if using_tuple: return tuple(ret) return ret
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, disconnected_inputs="raise"): """ :type cost: Scalar (0-dimensional) `Variable` :type wrt: `Variable` or list of `Variable`s. :type g_cost: Scalar `Variable`, or None :param g_cost: an expression for the gradient through cost. The default is ``ones_like(cost)``. :param consider_constant: a list of expressions not to backpropagate through :param warn_type: a value of True will cause warnings to be logged for any Op that emits a gradient that does not match its input type. :type disconnected_inputs: string :param disconnected_inputs: Defines the behaviour if some of the variables in ``wrt`` are not part of the computational graph computing ``cost`` (or if all links are non-differentiable). The possible values are: - 'ignore': considers that the gradient on these parameters is zero. - 'warn': consider the gradient zero, and print a warning. - 'raise': raise an exception. :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`) :return: symbolic expression of gradient of `cost` with respect to `wrt`. If an element of `wrt` is not differentiable with respect to the output, then a zero variable is returned. If `wrt` is a list/tuple, longer then 1, a list will be returned. DEPRECATION: In Theano 0.5, grad will return an object of the same type as `wrt`: a list/tuple or TensorVariable in all case. This function is a wrapper around the more general function `theano.gradient.grad_sources_inputs``. """ if consider_constant is None: consider_constant = [] if not isinstance(cost, TensorVariable): raise TypeError("In tensor.grad(), cost argument should be a TensorVariable.", cost) if cost.type.ndim: raise TypeError( 'In tensor.grad, "cost" argument should be a scalar, but ndim' " is %i (should be 0). If you want to compute the gradient of" " the sum of cost, you should use cost.sum()." % cost.type.ndim ) if g_cost is None: g_cost = ones_like(cost) inputs = gof.graph.inputs([cost]) gmap = gradient.grad_sources_inputs([(cost, g_cost)], list(inputs) + list(consider_constant), warn_type=warn_type) # Note : If p is not in gmap there can be several reasons, among which # is the fact that p might not be part of the computational graph. A # simple example is that for a+b for e.g. a[0] is not part of the graph, # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0]) # such subtle cases can be fixed by a more careful implementation of the # gradient, but for now Theano needs to throw an exception, and make the # user aware that it does not know how to compute that gradient using_list = isinstance(wrt, list) using_tuple = isinstance(wrt, tuple) if not (using_list or using_tuple): wrt = [wrt] ret = [] for p in wrt: if p in gmap: ret.append(gmap[p]) else: message = ( "grad method was asked to compute the gradient " "with respect to a variable that is not part of " "the computational graph of the cost, or is used " "only by a non-differentiable operator: %s" % p ) if disconnected_inputs == "ignore": pass elif disconnected_inputs == "warn": warnings.warn(message, stacklevel=1) elif disconnected_inputs == "raise": raise ValueError(message) else: raise ValueError( "Invalid value for keyword " "'disconnected_inputs', valid values are " "'ignore', 'warn' and 'raise'." ) ret.append(zeros_like(p)) if len(ret) == 1: if using_list or using_tuple: warnings.warn( ( "The return type of tensor.grad will change in this " "case. In the future grad(cost, wrt) will return an " "object of the same type as wrt. So if wrt is a " "list/tuple, list/tuple will be returned. Idem for " "TensorVariable." ), stacklevel=2, ) # TODO: when we release Theano 0.5, uncomment the following lines # and remove the warning. Don't forget the line in the currently # enabled else. # if using_list: # return ret # elif using_tuple: # return tuple(ret) # else: return ret[0] else: # if using_tuple: # return tuple(ret) return ret