def vjp(g): if np.ndim(g) == nd: # add axis if gradient was along one axis only g = g[np.newaxis] # accumulate gradient out = np.zeros(x_shape, dtype=x_dtype) for i, a in enumerate(axis): # swap gradient axis to the front g_swap = np.swapaxes(g[i], 0, a)[:, np.newaxis] out_axis = np.concatenate( ( -g_swap[0] - 0.5 * g_swap[1], g_swap[0] - 0.5 * g_swap[2], (-1.0) * np.gradient(g_swap, axis=0)[2:-2, 0], 0.5 * g_swap[-3] - g_swap[-1], 0.5 * g_swap[-2] + g_swap[-1], ), axis=0, ) out = out + np.swapaxes(out_axis, 0, a) return out
def grad_gradient(ans, x, *vargs, **kwargs): axis = kwargs.pop("axis", None) if vargs or kwargs: raise NotImplementedError( "The only optional argument currently supported for np.gradient " "is axis." ) if axis is None: axis = range(np.ndim(x)) elif type(axis) is int: axis = [axis] else: axis = list(axis) x_dtype = x.dtype x_shape = x.shape nd = np.ndim(x) def vjp(g): if np.ndim(g) == nd: # add axis if gradient was along one axis only g = g[np.newaxis] # accumulate gradient out = np.zeros(x_shape, dtype=x_dtype) for i, a in enumerate(axis): # swap gradient axis to the front g_swap = np.swapaxes(g[i], 0, a)[:, np.newaxis] out_axis = np.concatenate( ( -g_swap[0] - 0.5 * g_swap[1], g_swap[0] - 0.5 * g_swap[2], (-1.0) * np.gradient(g_swap, axis=0)[2:-2, 0], 0.5 * g_swap[-3] - g_swap[-1], 0.5 * g_swap[-2] + g_swap[-1], ), axis=0, ) out = out + np.swapaxes(out_axis, 0, a) return out return vjp
def unbroadcast(x, target_meta, broadcast_idx=0): target_shape, target_ndim, _, _ = target_meta while np.ndim(x) > target_ndim: x = np.sum(x, axis=broadcast_idx) for axis, size in enumerate(target_shape): if size == 1: x = np.sum(x, axis=axis, keepdims=True) if np.iscomplexobj(x) and not target_iscomplex: x = np.real(x) return x
def _unpad(array, width): if np.isscalar(width): width = [[width, width]] elif np.shape(width) == (1,): width = [np.concatenate((width, width))] elif np.shape(width) == (2,): width = [width] if np.shape(width)[0] == 1: width = np.repeat(width, np.ndim(array), 0) idxs = tuple(slice(l, -u or None) for l, u in width) return array[idxs]
def _block_default(arrays): import unumpy as np rec = _Recurser(recurse_if=lambda x: type(x) is list) list_ndim = None any_empty = False for index, value, entering in rec.walk(arrays): if type(value) is tuple: # not strictly necessary, but saves us from: # - more than one way to do things - no point treating tuples like # lists # - horribly confusing behaviour that results when tuples are # treated like ndarray raise TypeError( "{} is a tuple. " "Only lists can be used to arrange blocks, and np.block does " "not allow implicit conversion from tuple to ndarray.".format( index)) if not entering: curr_depth = len(index) elif len(value) == 0: curr_depth = len(index) + 1 any_empty = True else: continue if list_ndim is not None and list_ndim != curr_depth: raise ValueError( "List depths are mismatched. First element was at depth {}, " "but there is an element at depth {} ({})".format( list_ndim, curr_depth, index)) list_ndim = curr_depth # convert all the arrays to ndarrays arrays = rec.map_reduce(arrays, f_map=asarray, f_reduce=list) elem_ndim = rec.map_reduce(arrays, f_map=lambda xi: np.ndim(xi), f_reduce=builtins.max) ndim = builtins.max(list_ndim, elem_ndim) first_axis = ndim - list_ndim arrays = rec.map_reduce(arrays, f_map=lambda xi: _atleast_xd(xi, ndim), f_reduce=list) return rec.map_reduce( arrays, f_reduce=lambda xs, axis: concatenate(list(xs), axis=axis - 1), f_kwargs=lambda axis: dict(axis=axis + 1), axis=first_axis, )
def matmul_adjoint_0(B, G, A_meta, B_ndim): G_ndim = np.ndim(G) if G_ndim == 0: # A_ndim == B_ndim == 1 return unbroadcast(G * B, A_meta) _, A_ndim, _, _ = A_meta if A_ndim == 1: G = np.expand_dims(G, G_ndim - 1) if B_ndim == 1: # The result we need is an outer product B = np.expand_dims(B, 0) G = np.expand_dims(G, G_ndim) else: # We need to swap the last two axes of B B = np.swapaxes(B, B_ndim - 2, B_ndim - 1) result = np.matmul(G, B) return unbroadcast(result, A_meta)
def matmul_adjoint_1(A, G, A_ndim, B_meta): G_ndim = np.ndim(G) if G_ndim == 0: # A_ndim == B_ndim == 1 return unbroadcast(G * A, B_meta) _, B_ndim, _, _ = B_meta B_is_vec = B_ndim == 1 if B_is_vec: G = np.expand_dims(G, G_ndim) if A_ndim == 1: # The result we need is an outer product A = np.expand_dims(A, 1) G = np.expand_dims(G, G_ndim - 1) else: # We need to swap the last two axes of A A = np.swapaxes(A, A_ndim - 2, A_ndim - 1) result = np.matmul(A, G) if B_is_vec: result = np.squeeze(result, G_ndim - 1) return unbroadcast(result, B_meta)
def metadata(A): return np.shape(A), np.ndim(A), A.dtype, np.iscomplexobj(A)
def matmul_vjp_1(ans, A, B): A_ndim = np.ndim(A) B_meta = metadata(B) return lambda g: matmul_adjoint_1(A, g, A_ndim, B_meta)
def matmul_vjp_0(ans, A, B): A_meta = metadata(A) B_ndim = np.ndim(B) return lambda g: matmul_adjoint_0(B, g, A_meta, B_ndim)
def to(self, x, grad_variables=None, jacobian=False): """ Calculate the JVP or Jacobian matrix of self to x. Parameters ---------- x : JVPDiffArray The denominator in derivative. grad_variables : JVPDiffArray Gradient assigned to the x. jacobian : bool Flag identifies whether to calculate the jacobian logo. If set ``True``, it will return jacobian matrix instead of jvp. Examples -------- >>> with ua.set_backend(udiff.DiffArrayBackend(numpy_backend, mode="jvp"), coerce=True): ... ... x1 = np.array([2]) ... x2 = np.array([5]) ... y = np.log(x1) + x1 * x2 - np.sin(x2) ... x1_diff = y.to(x1) ... print(np.allclose(x1_diff, [5.5])) True """ if self._jvp and x not in self._jvp: raise ValueError("Please check if the base is correct.") if jacobian: if self._jacobian is None: self._jacobian = {} if x not in self._jacobian: self._jacobian[x] = {} for position in itertools.product( *[range(i) for i in np.shape(x)]): grad_variables = np.zeros_like(x) grad_variables.value[position] = 1 self._jacobian[x][position] = self._forward( x, grad_variables) old_axes = tuple(range(np.ndim(self) + np.ndim(x))) new_axes = old_axes[np.ndim(x):] + old_axes[:np.ndim(x)] self._jacobian[x] = np.transpose( np.reshape( np.stack(self._jacobian[x].values()), np.shape(x) + np.shape(self), ), new_axes, ) return self._jacobian[x] else: if self._diff is None: self._diff = {} if x not in self._diff: if grad_variables is None: grad_variables = np.ones_like(self) self._diff[x] = self._forward(x, grad_variables) return self._diff[x]