def mtp_hessian_grad_and_value(fun, x): """ Makes a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the 3D array of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = ∂³f / (∂x[i] ∂x[j] ∂x[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp(lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val, )
def grad_and_value(fun, x): """ Returns a function that returns both gradient and value of a function. """ vjp, val = _make_vjp(fun, x) if not vspace(val).size == 1: raise TypeError("grad_and_value only applies to real scalar-output " "functions.") return vjp(vspace(val).ones()), val
def grad_and_value(fun, x): """ Returns a function that returns both gradient and value of a function. """ vjp, val = make_vjp(fun, x) if not vspace(val).size == 1: raise TypeError("grad_and_value only applies to real scalar-output" " functions.") return vjp(vspace(val).ones()), val
def _grad_with_forward(fun, x): """This function is a replica of ``autograd.grad``, with the only difference being that it returns both the gradient *and* the forward pass value.""" vjp, ans = _make_vjp(fun, x) if not vspace(ans).size == 1: raise TypeError( "Grad only applies to real scalar-output functions. " "Try jacobian, elementwise_grad or holomorphic_grad.") grad_value = vjp(vspace(ans).ones()) return grad_value, ans
def jacobian_pkl(fun, x): vjp, ans = _make_vjp(fun, x) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(x).shape grads = map(vjp, ans_vspace.standard_basis()) grads_out = np.stack(grads) if (np.prod(jacobian_shape) == np.prod(grads_out.shape)): return np.reshape(grads_out, jacobian_shape) else: my_jacobian_shape = ans_vspace.shape + vspace(x).shape + ( 2, ) # 2 to support real/im re_im_grads = np.squeeze(np.reshape(grads_out, my_jacobian_shape)) out = re_im_grads[..., 0] + 1j * re_im_grads[..., 1] return out
def jacobian_and_value(fun, x): """ Returns a function that returns both the Jacobian and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ val = fun(x) v_vspace = vspace(val) x_vspace = vspace(x) x_rep = np.tile(x, (v_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_rep, _ = _make_vjp(fun, x_rep) jacobian_shape = v_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in v_vspace.standard_basis()]) jacobian = vjp_rep(basis_vectors) return np.reshape(jacobian, jacobian_shape), val
def get_shape(x): """ Gets the shape of x, even if it is not an array """ if isinstance(x, float) or isinstance(x, int): return (1, ) elif isinstance(x, tuple) or isinstance(x, list): return (len(x), ) else: return vspace(x).shape
def irfft_grad(get_args, rfft_fun, ans, x, *args, **kwargs): axes, gs, norm = get_args(x, *args, **kwargs) vs = vspace(x) gvs = vspace(ans) check_no_repeated_axes(axes) if gs is None: gs = [gvs.shape[i] for i in axes] check_even_shape(gs) # gs is the full fft shape # s is the compressed shape s = list(gs) s[-1] = s[-1] // 2 + 1 def vjp(g): r = match_complex(x, truncate_pad((rfft_fun(g, *args, **kwargs)), vs.shape)) fac = make_rfft_factors(axes, vs.shape, s, gs, norm) r = anp.conj(r) * fac return r return vjp
def jacobian_and_value(fun, x): """ Returns a function that returns both the Jacobian and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ val = fun(x) v_vspace = vspace(val) x_vspace = vspace(x) x_rep = np.tile(x, (v_vspace.size,) + (1,) * x_vspace.ndim) vjp_rep, _ = make_vjp(fun, x_rep) jacobian_shape = v_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in v_vspace.standard_basis()]) jacobian = vjp_rep(basis_vectors) return np.reshape(jacobian, jacobian_shape), val
def rfft_grad(get_args, irfft_fun, ans, x, *args, **kwargs): axes, s, norm = get_args(x, *args, **kwargs) vs = vspace(x) gvs = vspace(ans) check_no_repeated_axes(axes) if s is None: s = [vs.shape[i] for i in axes] check_even_shape(s) # s is the full fft shape # gs is the compressed shape gs = list(s) gs[-1] = gs[-1] // 2 + 1 fac = make_rfft_factors(axes, gvs.shape, gs, s, norm) def vjp(g): g = anp.conj(g / fac) r = match_complex(x, truncate_pad((irfft_fun(g, *args, **kwargs)), vs.shape)) return r return vjp
def fixed_point_vjp(ans, f, a, x0, distance, tol): def rev_iter(params): a, x_star, x_star_bar = params vjp_x, _ = make_vjp(f(a))(x_star) vs = vspace(x_star) return lambda g: vs.add(vjp_x(g), x_star_bar) vjp_a, _ = make_vjp(lambda x, y: f(x)(y))(a, ans) return lambda g: vjp_a(fixed_point(rev_iter, tuple((a, ans, g)), vspace(x0).zeros(), distance, tol))
def jacobian_forward(fun, x): """ Compute jacobian of fun with respect to x using forward mode differentiation""" jvp = make_jvp(fun, x) # ans = fun(x) val_grad = map(lambda b: jvp(b), vspace(x).standard_basis()) vals, grads = zip(*val_grad) ans = np.zeros((list(vals)[0].size,)) # fake answer so that dont have to compute it twice m, n = _jac_shape(x, ans) return np.reshape(np.stack(grads), (m, n)).T
def hessian_grad_and_value(fun, x): """ Returns a function that returns Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = _make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_grad, (grad, val) = _make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def hessian_grad_and_value(fun, x): """ Returns a function that returns the Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size,) + (1,) * x_vspace.ndim) vjp_grad, (grad, val) = make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def view_update(data, view_fun): view_vjp, item = make_vjp(view_fun)(data) item_vs = vspace(item) def update(new_item): assert item_vs == vspace(new_item), \ "Please ensure new_item shape and dtype match the data view." diff = view_vjp( item_vs.add(new_item, item_vs.scalar_mul(item, -np.uint64(1)))) return vspace(data).add(data, diff) return item, update
def irfft_grad(get_args, rfft_fun, ans, x, *args, **kwargs): axes, gs, norm = get_args(x, *args, **kwargs) vs = vspace(x) gvs = vspace(ans) check_no_repeated_axes(axes) if gs is None: gs = [gvs.shape[i] for i in axes] check_even_shape(gs) # gs is the full fft shape # s is the compressed shape s = list(gs) s[-1] = s[-1] // 2 + 1 def vjp(g): r = match_complex( x, truncate_pad((rfft_fun(g, *args, **kwargs)), vs.shape)) fac = make_rfft_factors(axes, vs.shape, s, gs, norm) r = anp.conj(r) * fac return r return vjp
def rfft_grad(get_args, irfft_fun, ans, x, *args, **kwargs): axes, s, norm = get_args(x, *args, **kwargs) vs = vspace(x) gvs = vspace(ans) check_no_repeated_axes(axes) if s is None: s = [vs.shape[i] for i in axes] check_even_shape(s) # s is the full fft shape # gs is the compressed shape gs = list(s) gs[-1] = gs[-1] // 2 + 1 fac = make_rfft_factors(axes, gvs.shape, gs, s, norm) def vjp(g): g = anp.conj(g / fac) r = match_complex( x, truncate_pad((irfft_fun(g, *args, **kwargs)), vs.shape)) return r return vjp
def ans_jacobian(function, argnum): """ Get the value and the jacobian of a function. This differential operator follows autograd's jacobian implementation: https://github.com/HIPS/autograd/blob/master/autograd/differential_operators.py Args: function :: any -> any - the function to differentiate argnum :: int - the argument number to differentiate with respect to Returns: ans_jacobian any -> tuple(any :: any, jacobian :: ndarray) - a function that returns the value of `function` and the jacobian of `function` evaluated at a given argument of `function` """ vjp, ans = _make_vjp(function, argnum) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(argnum).shape grads = list(map(vjp, ans_vspace.standard_basis())) jacobian = np.reshape(np.stack(grads), jacobian_shape) return ans, jacobian
def ans_jacobian(function, argnum): """ Get the value and the jacobian of a function. This differential operator supports numpy and pycuda arrays. Args: function :: any -> any - the function to differentiate argnum :: int - the argument number to differentiate with respect to Returns: ans_jacobian any -> tuple(any :: any, jacobian :: ndarray) - a function that returns the value of `function` and the jacobian of `function` evaluated at a given argument of `function` """ vjp, ans = _make_vjp(function, argnum) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(argnum).shape grads = list(map(vjp, ans_vspace.standard_basis())) if isinstance(grads[0], np.ndarray): jacobian = np.reshape(np.stack(grads), jacobian_shape) elif isinstance(grads[0], GPUArray): jacobian = stack_gpu(grads).reshape(jacobian_shape) return ans, jacobian
def mtp_hessian_grad_and_value(fun, x): """ Returns a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = d**3 f / (dx[i] * dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp( lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val)
def __new__(self, name, base, dic): cls = type.__new__(container_mateclass, name, base, dic) cls.register(_np.ndarray) for type_ in [ float, _np.float64, _np.float32, _np.float16, complex, _np.complex64, _np.complex128 ]: cls.register(type_) for method_name in nondiff_methods + diff_methods: setattr(cls, method_name, anp.__dict__[method_name]) setattr(cls, 'flatten', anp.__dict__['ravel']) defvjp(func(cls.__getitem__), lambda ans, A, idx: lambda g: untake(g, idx, vspace(A))) defjvp(func(cls.__getitem__), 'same') defjvp(untake, 'same') setattr(cls, 'reshape', wrapped_reshape) return cls
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = _make_vjp(lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = make_vjp( lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
@primitive def untake(x, idx, vs): if isinstance(idx, list) and (len(idx) == 0 or not isinstance(idx[0], slice)): idx = onp.array(idx, dtype='int64') def mut_add(A): onp.add.at(A, idx, x) return A return SparseObject(vs, mut_add) defvjp(func(ArrayBox.__getitem__), lambda ans, A, idx: lambda g: untake(g, idx, vspace(A))) defvjp(untake, lambda ans, x, idx, _: lambda g: g[idx]) def _unpad(array, width): if anp.isscalar(width): width = [[width, width]] elif anp.shape(width) == (1, ): width = [anp.concatenate((width, width))] elif anp.shape(width) == (2, ): width = [width] if anp.shape(width)[0] == 1: width = anp.repeat(width, anp.ndim(array), 0) idxs = tuple(slice(l, -u or None) for l, u in width) return array[idxs]
def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val
def grad_fun(x): vjp, val = _make_vjp(fun, x) return vjp(vspace(val).ones()), val
from . import numpy_wrapper as anp from .numpy_vjps import (untake, balanced_eq, match_complex, replace_zero, dot_adjoint_0, dot_adjoint_1, tensordot_adjoint_0, tensordot_adjoint_1, nograd_functions) from autograd.extend import (defjvp, defjvp_argnum, def_linear, vspace, JVPNode, register_notrace) from ..util import func from .numpy_boxes import ArrayBox for fun in nograd_functions: register_notrace(JVPNode, fun) defjvp(func(ArrayBox.__getitem__), 'same') defjvp(untake, 'same') defjvp_argnum(anp.array_from_args, lambda argnum, g, ans, args, kwargs: untake(g, argnum-2, vspace(ans))) defjvp(anp._array_from_scalar_or_array, None, None, lambda g, ans, args, kwargs, _: anp._array_from_scalar_or_array(args, kwargs, g)) # ----- Functions that are constant w.r.t. continuous inputs ----- defjvp(anp.nan_to_num, lambda g, ans, x: anp.where(anp.isfinite(x), g, 0.)) # ----- Binary ufuncs (linear) ----- def_linear(anp.multiply) # ----- Binary ufuncs ----- defjvp(anp.add, lambda g, ans, x, y : broadcast(g, ans), lambda g, ans, x, y : broadcast(g, ans)) defjvp(anp.subtract, lambda g, ans, x, y : broadcast(g, ans), lambda g, ans, x, y : broadcast(-g, ans)) defjvp(anp.divide, 'same',
dot_adjoint_0, dot_adjoint_1, tensordot_adjoint_0, tensordot_adjoint_1, nograd_functions) from autograd.extend import (defjvp, defjvp_argnum, def_linear, vspace, JVPNode, register_notrace) from ..util import func from .numpy_boxes import ArrayBox for fun in nograd_functions: register_notrace(JVPNode, fun) defjvp(func(ArrayBox.__getitem__), 'same') defjvp(untake, 'same') defjvp_argnum( anp.array_from_args, lambda argnum, g, ans, args, kwargs: untake(g, argnum - 2, vspace(ans))) defjvp( anp._array_from_scalar_or_array, None, None, lambda g, ans, args, kwargs, _: anp._array_from_scalar_or_array( args, kwargs, g)) # ----- Functions that are constant w.r.t. continuous inputs ----- defjvp(anp.nan_to_num, lambda g, ans, x: anp.where(anp.isfinite(x), g, 0.)) # ----- Binary ufuncs (linear) ----- def_linear(anp.multiply) # ----- Binary ufuncs ----- defjvp(anp.add, lambda g, ans, x, y: broadcast(g, ans), lambda g, ans, x, y: broadcast(g, ans)) defjvp(anp.subtract, lambda g, ans, x, y: broadcast(g, ans),
def jacobian_reverse(fun, x): """ Compute jacobian of fun with respect to x using reverse mode differentiation""" vjp, ans = make_vjp(fun, x) grads = map(vjp, vspace(ans).standard_basis()) m, n = _jac_shape(x, ans) return npa.reshape(npa.stack(grads), (n, m))
def rev_iter(params): a, x_star, x_star_bar = params vjp_x, _ = make_vjp(f(a))(x_star) vs = vspace(x_star) return lambda g: vs.add(vjp_x(g), x_star_bar)
irfft_grad(get_fftn_args, rfftn, *args, **kwargs)) defvjp(fftshift, lambda ans, x, axes=None : lambda g: match_complex(x, anp.conj(ifftshift(anp.conj(g), axes)))) defvjp(ifftshift, lambda ans, x, axes=None : lambda g: match_complex(x, anp.conj(fftshift(anp.conj(g), axes)))) @primitive def truncate_pad(x, shape): # truncate/pad x to have the appropriate shape slices = [slice(n) for n in shape] pads = list(zip(anp.zeros(len(shape), dtype=int), anp.maximum(0, anp.array(shape) - anp.array(x.shape)))) return anp.pad(x, pads, 'constant')[slices] defvjp(truncate_pad, lambda ans, x, shape: lambda g: match_complex(x, truncate_pad(g, vspace(x).shape))) ## TODO: could be made less stringent, to fail only when repeated axis has different values of s def check_no_repeated_axes(axes): axes_set = set(axes) if len(axes) != len(axes_set): raise NotImplementedError("FFT gradient for repeated axes not implemented.") def check_even_shape(shape): if shape[-1] % 2 != 0: raise NotImplementedError("Real FFT gradient for odd lengthed last axes is not implemented.") def get_fft_args(a, d=None, axis=-1, norm=None, *args, **kwargs): axes = [axis] if d is not None: d = [d] return axes, d, norm
def fft_grad(get_args, fft_fun, ans, x, *args, **kwargs): axes, s, norm = get_args(x, *args, **kwargs) check_no_repeated_axes(axes) vs = vspace(x) return lambda g: match_complex( x, truncate_pad(fft_fun(g, *args, **kwargs), vs.shape))
def balanced_eq(x, z, y): return (x == z) / (1.0 + (x == y)) def replace_zero(x, val): return anp.where(x, x, val) # ----- extra functions used internally ----- def array_from_args_gradmaker(argnum, ans, args, kwargs): return lambda g: g[argnum-2] defvjp_argnum(anp.array_from_args, array_from_args_gradmaker) def array_from_scalar_or_array_gradmaker(ans, array_args, array_kwargs, scarray): ndmin = array_kwargs.get('ndmin', 0) scarray_ndim = anp.ndim(scarray) if ndmin > scarray_ndim: return lambda g: anp.squeeze(g, axis=tuple(range(ndmin - scarray_ndim))) else: return lambda g: g defvjp(anp._array_from_scalar_or_array, array_from_scalar_or_array_gradmaker, argnums=(2,3)) @primitive def untake(x, idx, vs): def mut_add(A): onp.add.at(A, idx, x) return A return SparseObject(vs, mut_add) defvjp(func(ArrayBox.__getitem__), lambda ans, A, idx: lambda g: untake(g, idx, vspace(A))) defvjp(untake, lambda ans, x, idx, _: lambda g: g[idx])
def elementwise_grad(fun, x, initial_grad=None): vjp, ans = _make_vjp(fun, x) if vspace(ans).iscomplex: raise TypeError( "Elementwise_grad only applies to real-output functions.") return vjp(vspace(ans).ones() if initial_grad is None else initial_grad)
else: return lambda g: g defvjp( acp._array_from_scalar_or_array, array_from_scalar_or_array_gradmaker, argnums=(2, 3), ) @primitive def untake(x, idx, vs): def mut_add(A): # in numpy codebase, this used to be: # onp.add.at(A, idx, x) # according to https://docs-cupy.chainer.org/en/stable/reference/ufunc.html?highlight=ufunc.at, # scatter_add is the correct function to use. # TODO: PR into cupy codebase the ability to use scatter_add with float64? ocpx.scatter_add(A, idx, x) return A return SparseObject(vs, mut_add) defvjp( func(container.__getitem__), lambda ans, A, idx: lambda g: untake(g, idx, vspace(A)), # noqa: E501 ) defvjp(untake, lambda ans, x, idx, _: lambda g: g[idx])
def fft_grad(get_args, fft_fun, ans, x, *args, **kwargs): axes, s, norm = get_args(x, *args, **kwargs) check_no_repeated_axes(axes) vs = vspace(x) return lambda g: match_complex(x, truncate_pad(fft_fun(g, *args, **kwargs), vs.shape))
def array_from_scalar_or_array_gradmaker(ans, array_args, array_kwargs, scarray): ndmin = array_kwargs.get('ndmin', 0) scarray_ndim = anp.ndim(scarray) if ndmin > scarray_ndim: return lambda g: anp.squeeze(g, axis=tuple(range(ndmin - scarray_ndim))) else: return lambda g: g defvjp(anp._array_from_scalar_or_array, array_from_scalar_or_array_gradmaker, argnums=(2,3)) @primitive def untake(x, idx, vs): def mut_add(A): onp.add.at(A, idx, x) return A return SparseObject(vs, mut_add) defvjp(func(ArrayBox.__getitem__), lambda ans, A, idx: lambda g: untake(g, idx, vspace(A))) defvjp(untake, lambda ans, x, idx, _: lambda g: g[idx]) def _unpad(array, width): if anp.isscalar(width): width = [[width, width]] elif anp.shape(width) == (1,): width = [anp.concatenate((width, width))] elif anp.shape(width) == (2,): width = [width] if anp.shape(width)[0] == 1: width = anp.repeat(width, anp.ndim(array), 0) idxs = [slice(l, -u or None) for l, u in width] return array[idxs] def pad_vjp(ans, array, pad_width, mode, **kwargs):
@primitive def truncate_pad(x, shape): # truncate/pad x to have the appropriate shape slices = [slice(n) for n in shape] pads = list( zip(anp.zeros(len(shape), dtype=int), anp.maximum(0, anp.array(shape) - anp.array(x.shape)))) return anp.pad(x, pads, 'constant')[slices] defvjp( truncate_pad, lambda ans, x, shape: lambda g: match_complex( x, truncate_pad(g, vspace(x).shape))) ## TODO: could be made less stringent, to fail only when repeated axis has different values of s def check_no_repeated_axes(axes): axes_set = set(axes) if len(axes) != len(axes_set): raise NotImplementedError( "FFT gradient for repeated axes not implemented.") def check_even_shape(shape): if shape[-1] % 2 != 0: raise NotImplementedError( "Real FFT gradient for odd lengthed last axes is not implemented.")