示例#1
0
    def __init__(self, inputs, outputs, grad_depth = 1, **kwargs):
        if not isinstance(outputs, list):
            raise TypeError('outputs must be list', outputs)
        for i in inputs + outputs:
            if not isinstance(i, gof.Variable):
                raise TypeError('inputs and outputs must be Variable instances', i)
        if 'updates' in kwargs:
            raise TypeError('updates are not allowed in kwargs')
        # TODO: the graph may have implicit inputs like Value and SharedVariable instances.
        #       what impact to they have on the validity of this Op?
        self.fn = orig_function(inputs, outputs, **kwargs)
        self.inputs = inputs
        self.outputs = outputs
        self.input_types = [input.type for input in inputs]
        self.output_types = [output.type for output in outputs]

        if grad_depth > 0:
            output_grads = [t() for t in self.output_types]
            gd = G.grad_sources_inputs(zip(self.outputs, output_grads), self.inputs)
            gs = map(gd.get, self.inputs)
            self.grad_ops = []
            for g in gs:
                if g is None:
                    self.grad_ops.append(lambda *args: None)
                else:
                    self.grad_ops.append(OpFromGraph(inputs + output_grads,
                                                     [g],
                                                     grad_depth = grad_depth - 1))
示例#2
0
    def __init__(self, inputs, outputs, grad_depth=1, **kwargs):
        if not isinstance(outputs, list):
            raise TypeError('outputs must be list', outputs)
        for i in inputs + outputs:
            if not isinstance(i, gof.Variable):
                raise TypeError(
                    'inputs and outputs must be Variable instances', i)
        if 'updates' in kwargs:
            raise TypeError('updates are not allowed in kwargs')
        # TODO: the graph may have implicit inputs like Value and SharedVariable instances.
        #       what impact to they have on the validity of this Op?
        self.fn = orig_function(inputs, outputs, **kwargs)
        self.inputs = inputs
        self.outputs = outputs
        self.input_types = [input.type for input in inputs]
        self.output_types = [output.type for output in outputs]

        if grad_depth > 0:
            output_grads = [t() for t in self.output_types]
            gd = G.grad_sources_inputs(zip(self.outputs, output_grads),
                                       self.inputs)
            gs = map(gd.get, self.inputs)
            self.grad_ops = []
            for g in gs:
                if g is None:
                    self.grad_ops.append(lambda *args: None)
                else:
                    self.grad_ops.append(
                        OpFromGraph(inputs + output_grads, [g],
                                    grad_depth=grad_depth - 1))
示例#3
0
    def test_retNone1(self):
        """Test that it is not ok to return None from op.grad()"""
        class retNone(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.vector()]
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                x, = inp
                gz, = grads
                pass
        a = retNone().make_node()
        try:
            grad_sources_inputs([(a.out, one)], None)
        except TypeError, e:
            return
示例#4
0
    def test_wrong_rval_len1(self):
        """Test that it is not ok to return the wrong number of gradient terms
        """
        class retOne(gof.op.Op):
            def make_node(self, *inputs):
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inputs, grads):
                return [inputs[0].zeros_like()]

        i = theano.tensor.vector()
        j = theano.tensor.vector()
        a1 = retOne().make_node(i)
        grad_sources_inputs([(a1.out, one)], None)
        a2 = retOne().make_node(i, j)
        self.assertRaises(ValueError, grad_sources_inputs,
                [(a2.out, one)], None)
示例#5
0
    def test_retNone1(self):
        """Test that it is not ok to return None from op.grad()"""
        class retNone(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.vector()]
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                x, = inp
                gz, = grads
                pass

        a = retNone().make_node()
        try:
            grad_sources_inputs([(a.out, one)], None)
        except TypeError, e:
            return
示例#6
0
    def test_wrong_rval_len1(self):
        """Test that it is not ok to return the wrong number of gradient terms"""
        class retOne(gof.op.Op):
            def make_node(self, *inputs):
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inputs, grads):
                return [inputs[0].zeros_like()]

        i = theano.tensor.vector()
        j = theano.tensor.vector()
        a1 = retOne().make_node(i)
        g = grad_sources_inputs([(a1.out, one)], None)
        a2 = retOne().make_node(i, j)
        try:
            g = grad_sources_inputs([(a2.out, one)], None)
        except ValueError, e:
            return
示例#7
0
    def test_1in_1out(self):
        """Test grad is called correctly for a 1-to-1 op"""
        gval = theano.tensor.matrix()

        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.matrix()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                return gval,
        a1 = O().make_node()
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[a1.inputs[0]] is gval)
示例#8
0
    def test_some_None_ograds(self):
        """Test grad is called when some output gradients are None"""
        class O(gof.op.Op):
            def __init__(self, tst):
                self.tst = tst

            def make_node(self, *inputs):
                outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inputs, g_out):
                return [one]
        i = theano.tensor.matrix()
        a1 = O(self).make_node(i)
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[i] is one)
示例#9
0
    def test_1in_1out(self):
        """Test grad is called correctly for a 1-to-1 op"""
        gval = theano.tensor.matrix()

        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.matrix()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                return gval,

        a1 = O().make_node()
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[a1.inputs[0]] is gval)
示例#10
0
    def test_Nin_1out(self):
        """Test grad is called correctly for a many-to-1 op"""
        gval0 = theano.tensor.scalar()
        gval1 = theano.tensor.scalar()

        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.scalar(), theano.tensor.scalar()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                x0, x1 = inp
                gz, = grads
                return (gval0, gval1)
        a1 = O().make_node()
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[a1.inputs[0]] is gval0)
        self.assertTrue(g[a1.inputs[1]] is gval1)
示例#11
0
    def test_Nin_1out(self):
        """Test grad is called correctly for a many-to-1 op"""
        gval0 = theano.tensor.scalar()
        gval1 = theano.tensor.scalar()

        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.scalar(), theano.tensor.scalar()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)

            def grad(self, inp, grads):
                x0, x1 = inp
                gz, = grads
                return (gval0, gval1)

        a1 = O().make_node()
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[a1.inputs[0]] is gval0)
        self.assertTrue(g[a1.inputs[1]] is gval1)
示例#12
0
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
         disconnected_inputs='raise'):
    """
    :type cost: Scalar (0-dimensional) `Variable`
    :type wrt: `Variable` or list of `Variable`s.
    :type g_cost: Scalar `Variable`, or None
    :param g_cost: an expression for the gradient through cost.  The default is
        ``ones_like(cost)``.
    :param consider_constant: a list of expressions not to backpropagate through

    :param warn_type: a value of True will cause warnings to be logged for any Op that emits a
        gradient that does not match its input type.

    :type disconnected_inputs: string
    :param disconnected_inputs: Defines the behaviour if some of the variables
        in ``wrt`` are not part of the computational graph computing ``cost``
        (or if all links are non-differentiable). The possible values are:
        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise an exception.

    :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`)

    :return: symbolic expression of gradient of `cost` with respect to `wrt`.
             If an element of `wrt` is not differentiable with respect
             to the output, then a zero variable is returned.
             If `wrt` is a list/tuple, longer then 1, a list will be returned.
             DEPRECATION: In Theano 0.5, grad will return an object of the same
             type as `wrt`: a list/tuple or TensorVariable in all case.

    This function is a wrapper around the more general function
    `theano.gradient.grad_sources_inputs``.

    """
    if consider_constant is None:
        consider_constant = []
    else:
        #error checking on consider_constant: verify that it is a collection
        # of theano variables
        # this is important, if someone accidentally passes a nested data
        # structure with theano variables at the leaves, only the root will
        # be properly considered constant
        if not hasattr(consider_constant, '__iter__'):
            raise TypeError('consider_constant must be an iterable collection,'
                    ' got '+str(type(consider_constant)))
        for elem in consider_constant:
            if not isinstance(elem, gof.Variable):
                raise TypeError('Elements of consider_constant must be variables,'
                        'but got '+str(type(elem)))



    if not isinstance(cost, TensorVariable):
        raise TypeError('In tensor.grad(), cost argument should be a TensorVariable.', cost)

    if cost.type.ndim:
        raise TypeError(
                'In tensor.grad, "cost" argument should be a scalar, but ndim'
                ' is %i (should be 0). If you want to compute the gradient of'
                ' the sum of cost, you should use cost.sum().'
                % cost.type.ndim)

    if g_cost is None:
        g_cost = ones_like(cost)
    inputs = gof.graph.inputs([cost])
    gmap = gradient.grad_sources_inputs(
            [(cost, g_cost)],
            list(inputs) + list(consider_constant),
            warn_type=warn_type)


    # Note : If p is not in gmap there can be several reasons, among which
    # is the fact that p might not be part of the computational graph. A
    # simple example is that for a+b for e.g. a[0] is not part of the graph,
    # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
    # such subtle cases can be fixed by a more careful implementation of the
    # gradient, but for now Theano needs to throw an exception, and make the
    # user aware that it does not know how to compute that gradient
    using_list = isinstance(wrt, list)
    using_tuple = isinstance(wrt, tuple)
    if not (using_list or using_tuple):
        wrt = [wrt]
    ret = []
    for p in wrt:
        if p in gmap:
            ret.append(gmap[p])
        else:
            message = ("grad method was asked to compute the gradient "
                    "with respect to a variable that is not part of "
                    "the computational graph of the cost, or is used "
                    "only by a non-differentiable operator: %s" % p)
            if disconnected_inputs == 'ignore':
                pass
            elif disconnected_inputs == 'warn':
                warnings.warn(message, stacklevel=1)
            elif disconnected_inputs == 'raise':
                raise ValueError(message)
            else:
                raise ValueError("Invalid value for keyword "
                        "'disconnected_inputs', valid values are "
                        "'ignore', 'warn' and 'raise'.")
            ret.append(zeros_like(p))

    if len(ret) == 1 and not (using_list or using_tuple):
        # `wrt` was a single Variable, so we return a single Variable too.
            return ret[0]
    else:
        # Ensure we preserve the original type of `wrt`.
        if using_tuple:
            return tuple(ret)
        else:
            assert using_list
            return ret
示例#13
0
def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
         disconnected_inputs='raise'):
    """
    Computes the L operation on `f` wrt to `wrt` evaluated at points given
    in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
    to `wrt` left muliplied by the eval points.

    :type f: `Variable` or list of `Variable`s
        `f` stands for the output of the computational graph to which you
        want to apply the L operator
    :type wrt: `Variable` or list of `Variables`s
        variables for which you compute the L operator of the expression
        described by `f`
    :type eval_points: `Variable` or list of `Variable`s
        evalutation points for each of the variables in `f`

    :rtype: `Variable` or list/tuple of `Variable`s depending on type of f
    :return: symbolic expression such that
        L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i]
        where the indices in that expression are magic multidimensional
        indices that specify both the position within a list and all
        coordinates of the tensor element in the last
        If `f` is a list/tuple, then return a list/tuple with the results.
    """
    if consider_constant is None:
        consider_constant = []

    if not isinstance(f, TensorVariable):
        raise TypeError('In tensor.Lop(), cost argument should be a TensorVariable.', f)

    if type(eval_points) not in (list, tuple):
        eval_points = [eval_points]

    using_list = isinstance(f, list)
    using_tuple = isinstance(f, tuple)
    if not (using_list or using_tuple):
        f = [f]

    inputs = gof.graph.inputs(f)
    gmap = gradient.grad_sources_inputs(
            zip(f,eval_points),
            list(inputs) + list(consider_constant),
            warn_type=warn_type)

    # Note : If p is not in gmap there can be several reasons, among which
    # is the fact that p might not be part of the computational graph. A
    # simple example is that for a+b for e.g. a[0] is not part of the graph,
    # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
    # such subtle cases can be fixed by a more careful implementation of the
    # gradient, but for now Theano needs to throw an exception, and make the
    # user aware that it does not know how to compute that gradient
    if not isinstance(wrt, (list, tuple)):
        wrt = [wrt]
    ret = []
    for p in wrt:
        if p in gmap:
            ret.append(gmap[p])
        else:
            message = ("Lop method was asked to compute the gradient "
                    "with respect to a variable that is not part of "
                    "the computational graph of the cost, or is used "
                    "only by a non-differentiable operator: %s" % p)
            if disconnected_inputs == 'ignore':
                pass
            elif disconnected_inputs == 'warn':
                warnings.warn(message, stacklevel=1)
            elif disconnected_inputs == 'raise':
                raise ValueError(message)
            else:
                raise ValueError("Invalid value for keyword "
                        "'disconnected_inputs', valid values are "
                        "'ignore', 'warn' and 'raise'.")
            ret.append(zeros_like(p))

    if len(ret) == 1:
        if using_list:
            return ret
        elif using_tuple:
            return tuple(ret)
        else:
            return ret[0]
    else:
        if using_tuple:
            return tuple(ret)
        return ret
示例#14
0
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
         disconnected_inputs='raise'):
    """
    :type cost: Scalar (0-dimensional) `Variable`
    :type wrt: `Variable` or list of `Variable`s.
    :type g_cost: Scalar `Variable`, or None
    :param g_cost: an expression for the gradient through cost.  The default is
        ``ones_like(cost)``.
    :param consider_constant: a list of expressions not to backpropagate
        through

    :param warn_type: a value of True will cause warnings to be logged for any
        Op that emits a gradient that does not match its input type.

    :type disconnected_inputs: string
    :param disconnected_inputs: Defines the behaviour if some of the variables
        in ``wrt`` are not part of the computational graph computing ``cost``
        (or if all links are non-differentiable). The possible values are:
        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise an exception.

    :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`)

    :return: symbolic expression of gradient of `cost` with respect to `wrt`.
             If an element of `wrt` is not differentiable with respect
             to the output, then a zero variable is returned.
             It returns an object of same type as `wrt`: a list/tuple
             or TensorVariable in all cases.

    This function is a wrapper around the more general function
    `theano.gradient.grad_sources_inputs``.

    """
    if consider_constant is None:
        consider_constant = []
    else:
        #error checking on consider_constant: verify that it is a collection
        # of theano variables
        # this is important, if someone accidentally passes a nested data
        # structure with theano variables at the leaves, only the root will
        # be properly considered constant
        if not hasattr(consider_constant, '__iter__'):
            raise TypeError('consider_constant must be an iterable collection,'
                    ' got '+str(type(consider_constant)))
        for elem in consider_constant:
            if not isinstance(elem, gof.Variable):
                raise TypeError('Elements of consider_constant must be variables,'
                        'but got '+str(type(elem)))



    if not isinstance(cost, TensorVariable):
        raise TypeError(('In tensor.grad(), cost argument should be '
                         'a TensorVariable.'), cost)

    if cost.type.ndim:
        raise TypeError(
                'In tensor.grad, "cost" argument should be a scalar, but ndim'
                ' is %i (should be 0). If you want to compute the gradient of'
                ' the sum of cost, you should use cost.sum().'
                % cost.type.ndim)

    if g_cost is None:
        g_cost = ones_like(cost)
    inputs = gof.graph.inputs([cost])
    gmap = gradient.grad_sources_inputs(
            [(cost, g_cost)],
            list(inputs) + list(consider_constant),
            warn_type=warn_type)

    # Note : If p is not in gmap there can be several reasons, among which
    # is the fact that p might not be part of the computational graph. A
    # simple example is that for a+b for e.g. a[0] is not part of the graph,
    # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
    # such subtle cases can be fixed by a more careful implementation of the
    # gradient, but for now Theano needs to throw an exception, and make the
    # user aware that it does not know how to compute that gradient
    using_list = isinstance(wrt, list)
    using_tuple = isinstance(wrt, tuple)
    if not (using_list or using_tuple):
        wrt = [wrt]
    ret = []
    for p in wrt:
        if p in gmap:
            ret.append(gmap[p])
        else:
            message = ("grad method was asked to compute the gradient "
                    "with respect to a variable that is not part of "
                    "the computational graph of the cost, or is used "
                    "only by a non-differentiable operator: %s" % p)
            if disconnected_inputs == 'ignore':
                pass
            elif disconnected_inputs == 'warn':
                warnings.warn(message, stacklevel=1)
            elif disconnected_inputs == 'raise':
                raise ValueError(message)
            else:
                raise ValueError("Invalid value for keyword "
                        "'disconnected_inputs', valid values are "
                        "'ignore', 'warn' and 'raise'.")
            ret.append(zeros_like(p))

    if len(ret) == 1 and not (using_list or using_tuple):
        # `wrt` was a single Variable, so we return a single Variable too.
        return ret[0]
    else:
        # Ensure we preserve the original type of `wrt`.
        if using_tuple:
            return tuple(ret)
        else:
            assert using_list
            return ret
示例#15
0
def Lop(f, wrt, eval_points, consider_constant=None, warn_type=False,
         disconnected_inputs='raise'):
    """
    Computes the L operation on `f` wrt to `wrt` evaluated at points given
    in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
    to `wrt` left muliplied by the eval points.

    :type f: `Variable` or list of `Variable`s
        `f` stands for the output of the computational graph to which you
        want to apply the L operator
    :type wrt: `Variable` or list of `Variables`s
        variables for which you compute the L operator of the expression
        described by `f`
    :type eval_points: `Variable` or list of `Variable`s
        evalutation points for each of the variables in `f`

    :rtype: `Variable` or list/tuple of `Variable`s depending on type of f
    :return: symbolic expression such that
        L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i]
        where the indices in that expression are magic multidimensional
        indices that specify both the position within a list and all
        coordinates of the tensor element in the last
        If `f` is a list/tuple, then return a list/tuple with the results.
    """
    if consider_constant is None:
        consider_constant = []

    if not isinstance(f, TensorVariable):
        raise TypeError(('In tensor.Lop(), cost argument should be '
                        'a TensorVariable.'), f)

    if type(eval_points) not in (list, tuple):
        eval_points = [eval_points]

    using_list = isinstance(wrt, list)
    using_tuple = isinstance(wrt, tuple)

    if not isinstance(f, (list, tuple)):
        f = [f]

    inputs = gof.graph.inputs(f)
    gmap = gradient.grad_sources_inputs(
            zip(f, eval_points),
            list(inputs) + list(consider_constant),
            warn_type=warn_type)

    # Note : If p is not in gmap there can be several reasons, among which
    # is the fact that p might not be part of the computational graph. A
    # simple example is that for a+b for e.g. a[0] is not part of the graph,
    # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
    # such subtle cases can be fixed by a more careful implementation of the
    # gradient, but for now Theano needs to throw an exception, and make the
    # user aware that it does not know how to compute that gradient

    if not (using_list or using_tuple):
        wrt = [wrt]
    ret = []
    for p in wrt:
        if p in gmap:
            ret.append(gmap[p])
        else:
            message = ("Lop method was asked to compute the gradient "
                    "with respect to a variable that is not part of "
                    "the computational graph of the cost, or is used "
                    "only by a non-differentiable operator: %s" % p)
            if disconnected_inputs == 'ignore':
                pass
            elif disconnected_inputs == 'warn':
                warnings.warn(message, stacklevel=1)
            elif disconnected_inputs == 'raise':
                raise ValueError(message)
            else:
                raise ValueError("Invalid value for keyword "
                        "'disconnected_inputs', valid values are "
                        "'ignore', 'warn' and 'raise'.")
            ret.append(zeros_like(p))

    if len(ret) == 1:
        if using_list:
            return ret
        elif using_tuple:
            return tuple(ret)
        else:
            return ret[0]
    else:
        if using_tuple:
            return tuple(ret)
        return ret
示例#16
0
def grad(cost, wrt, g_cost=None, consider_constant=None, warn_type=False, disconnected_inputs="raise"):
    """
    :type cost: Scalar (0-dimensional) `Variable`
    :type wrt: `Variable` or list of `Variable`s.
    :type g_cost: Scalar `Variable`, or None
    :param g_cost: an expression for the gradient through cost.  The default is
        ``ones_like(cost)``.
    :param consider_constant: a list of expressions not to backpropagate through

    :param warn_type: a value of True will cause warnings to be logged for any Op that emits a
        gradient that does not match its input type.

    :type disconnected_inputs: string
    :param disconnected_inputs: Defines the behaviour if some of the variables
        in ``wrt`` are not part of the computational graph computing ``cost``
        (or if all links are non-differentiable). The possible values are:
        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise an exception.

    :rtype: `Variable` or list/tuple of `Variable`s (depending upon `wrt`)

    :return: symbolic expression of gradient of `cost` with respect to `wrt`.
             If an element of `wrt` is not differentiable with respect
             to the output, then a zero variable is returned.
             If `wrt` is a list/tuple, longer then 1, a list will be returned.
             DEPRECATION: In Theano 0.5, grad will return an object of the same
             type as `wrt`: a list/tuple or TensorVariable in all case.

    This function is a wrapper around the more general function
    `theano.gradient.grad_sources_inputs``.

    """
    if consider_constant is None:
        consider_constant = []

    if not isinstance(cost, TensorVariable):
        raise TypeError("In tensor.grad(), cost argument should be a TensorVariable.", cost)

    if cost.type.ndim:
        raise TypeError(
            'In tensor.grad, "cost" argument should be a scalar, but ndim'
            " is %i (should be 0). If you want to compute the gradient of"
            " the sum of cost, you should use cost.sum()." % cost.type.ndim
        )

    if g_cost is None:
        g_cost = ones_like(cost)
    inputs = gof.graph.inputs([cost])
    gmap = gradient.grad_sources_inputs([(cost, g_cost)], list(inputs) + list(consider_constant), warn_type=warn_type)

    # Note : If p is not in gmap there can be several reasons, among which
    # is the fact that p might not be part of the computational graph. A
    # simple example is that for a+b for e.g. a[0] is not part of the graph,
    # so Theano does not know how to compute TT.grad(TT.sum(a+b), a[0])
    # such subtle cases can be fixed by a more careful implementation of the
    # gradient, but for now Theano needs to throw an exception, and make the
    # user aware that it does not know how to compute that gradient
    using_list = isinstance(wrt, list)
    using_tuple = isinstance(wrt, tuple)
    if not (using_list or using_tuple):
        wrt = [wrt]
    ret = []
    for p in wrt:
        if p in gmap:
            ret.append(gmap[p])
        else:
            message = (
                "grad method was asked to compute the gradient "
                "with respect to a variable that is not part of "
                "the computational graph of the cost, or is used "
                "only by a non-differentiable operator: %s" % p
            )
            if disconnected_inputs == "ignore":
                pass
            elif disconnected_inputs == "warn":
                warnings.warn(message, stacklevel=1)
            elif disconnected_inputs == "raise":
                raise ValueError(message)
            else:
                raise ValueError(
                    "Invalid value for keyword "
                    "'disconnected_inputs', valid values are "
                    "'ignore', 'warn' and 'raise'."
                )
            ret.append(zeros_like(p))

    if len(ret) == 1:
        if using_list or using_tuple:
            warnings.warn(
                (
                    "The return type of tensor.grad will change in this "
                    "case. In the future grad(cost, wrt) will return an "
                    "object of the same type as wrt. So if wrt is a "
                    "list/tuple, list/tuple will be returned. Idem for "
                    "TensorVariable."
                ),
                stacklevel=2,
            )
        # TODO: when we release Theano 0.5, uncomment the following lines
        #       and remove the warning. Don't forget the line in the currently
        #       enabled else.
        # if using_list:
        #    return ret
        # elif using_tuple:
        #    return tuple(ret)
        # else:
        return ret[0]
    else:
        # if using_tuple:
        #    return tuple(ret)
        return ret