def jvp(g): ret = [] ng = np.broadcast_to(g, np.shape(ans)) shape = np.shape(ng) for idx in range(shape[axis]): ret.append(np.take(ng, idx, axis=axis)) return tuple(ret)
def grad_broadcast_to(ans, x, new_shape): old_shape = np.shape(x) assert np.shape(ans) == new_shape assert len(old_shape) == len(new_shape), "Can't handle extra leading dims" broadcast_axes = tuple( i for i in range(len(old_shape)) if old_shape[i] == 1 and new_shape[i] > 1 ) return lambda g: np.sum(g, axis=broadcast_axes, keepdims=True)
def _unpad(array, width): if np.isscalar(width): width = [[width, width]] elif np.shape(width) == (1,): width = [np.concatenate((width, width))] elif np.shape(width) == (2,): width = [width] if np.shape(width)[0] == 1: width = np.repeat(width, np.ndim(array), 0) idxs = tuple(slice(l, -u or None) for l, u in width) return array[idxs]
def jvp(g): if axis is None: num_reps = np.size(g) elif isinstance(axis, int): num_reps = np.shape(g)[axis] elif isinstance(axis, tuple): num_reps = np.prod(np.array(np.shape(g))[list(axis)]) if num_reps <= 1: return np.zeros_like(ans) x_minus_mean = np.conj(x - np.mean(x, axis=axis, keepdims=True)) return np.sum(np.real(g * x_minus_mean), axis=axis, keepdims=keepdims) / ((num_reps - ddof) * ans)
def grad_sort(ans, x, axis=-1, kind="quicksort", order=None): if len(np.shape(x)) > 1: raise NotImplementedError( "Gradient of sort not implemented for multi-dimensional arrays." ) sort_perm = np.argsort(x, axis, kind, order) return lambda g: unpermuter(g, sort_perm)
def grad_partition(ans, x, kth, axis=-1, kind="introselect", order=None): if len(np.shape(x)) > 1: raise NotImplementedError( "Gradient of partition not implemented for multi-dimensional arrays." ) partition_perm = np.argpartition(x, kth, axis, kind, order) return lambda g: unpermuter(g, partition_perm)
def grad_np_prod(ans, x, axis=None, keepdims=False): # TODO: Support tuples of axes. shape, dtype = np.shape(x), x.dtype def vjp(g): g_repeated, _ = repeat_to_match_shape(g * ans, shape, dtype, axis, keepdims) return g_repeated / x return vjp
def grad_tile(ans, x, reps): reps = [reps] if np.isscalar(reps) else reps x_shape = np.shape(x) def vjp(g): for axis, rep in enumerate(reps): g = sum(np.split(g, rep, axis)) return np.reshape(g, x_shape) return vjp
def _empty_like_default(prototype, dtype=None, order="K", subok=True, shape=None): import unumpy as np out_shape = np.shape(prototype) if shape is None else shape out_dtype = prototype.dtype if dtype is None else dtype return empty(out_shape, dtype=out_dtype)
def to(self, x, grad_variables=None, jacobian=False): """ Calculate the VJP or Jacobian matrix of self to x. Parameters ---------- x : VJPDiffArray The denominator in derivative. grad_variables : VJPDiffArray Gradient of the numerator in derivative. jacobian : bool Flag identifies whether to calculate the jacobian logo. If set ``True``, it will return jacobian matrix instead of vjp. Examples -------- >>> with ua.set_backend(udiff.DiffArrayBackend(numpy_backend), coerce=True): ... ... x1 = np.array([2]) ... x2 = np.array([5]) ... y = np.log(x1) + x1 * x2 - np.sin(x2) ... x1_diff = y.to(x1) ... print(np.allclose(x1_diff.value, [5.5])) True """ if jacobian: if x._jacobian is None or self not in x._jacobian: for position in itertools.product( *[range(i) for i in np.shape(self)]): grad_variables = np.zeros_like(self.value) grad_variables.value[position] = 1 self._backward_jacobian(grad_variables, self, position, x) x._jacobian[self] = np.reshape( np.stack(x._jacobian[self].values()), np.shape(self) + np.shape(x)) return x._jacobian[self] else: if x._diff is None or self not in x._diff: self._backward(grad_variables, self, x) return x._diff[self]
def grad_chooser(ans, x, axis=None, keepdims=None): shape, dtype = np.shape(x), x.dtype def vjp(g): """Builds gradient of functions that choose a single item, such as min or max.""" g_repeated, _ = repeat_to_match_shape(g, shape, dtype, axis, keepdims) argmax_locations = ( x == repeat_to_match_shape(ans, shape, dtype, axis, keepdims)[0] ) return ( g_repeated * argmax_locations / np.sum(argmax_locations, axis=axis, keepdims=True) ) return vjp
def grad_repeat(ans, x, repeats, axis=None): shape = np.shape(x) def vjp(g): if axis is None: # If axis is none, np.repeat() repeats the flattened array. expanded = np.reshape(g, (np.prod(shape),) + (repeats,)) return np.reshape(np.sum(expanded, axis=1, keepdims=False), shape) else: if shape[axis] == 1: # For this common case, the logic is simple. return np.sum(g, axis=axis, keepdims=True) else: expanded = np.reshape( g, shape[0 : axis + 1] + (repeats,) + shape[axis + 1 :] ) return np.sum(expanded, axis=axis + 1, keepdims=False) return vjp
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): """ sample a few random elements and only return numerical in this dimensions. """ for i in range(num_checks): ix = tuple([randrange(m) for m in np.shape(x)]) oldval = x[ix] x[ix] = oldval + h # increment by h fxph = f(x) # evaluate f(x + h) x[ix] = oldval - h # increment by h fxmh = f(x) # evaluate f(x - h) x[ix] = oldval # reset grad_numerical = ((fxph - fxmh) / (2 * h))[ix] grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / ( abs(grad_numerical) + abs(grad_analytic)) assert_almost_equal(rel_error, 0, decimal=5)
def metadata(A): return np.shape(A), np.ndim(A), A.dtype, np.iscomplexobj(A)
def grad_reshape_list(ans, *arys): if len(arys) > 1: raise NotImplementedError("Can't handle multiple arguments yet.") return lambda g: np.reshape(g, np.shape(arys[0]))
def to(self, x, grad_variables=None, jacobian=False): """ Calculate the JVP or Jacobian matrix of self to x. Parameters ---------- x : JVPDiffArray The denominator in derivative. grad_variables : JVPDiffArray Gradient assigned to the x. jacobian : bool Flag identifies whether to calculate the jacobian logo. If set ``True``, it will return jacobian matrix instead of jvp. Examples -------- >>> with ua.set_backend(udiff.DiffArrayBackend(numpy_backend, mode="jvp"), coerce=True): ... ... x1 = np.array([2]) ... x2 = np.array([5]) ... y = np.log(x1) + x1 * x2 - np.sin(x2) ... x1_diff = y.to(x1) ... print(np.allclose(x1_diff, [5.5])) True """ if self._jvp and x not in self._jvp: raise ValueError("Please check if the base is correct.") if jacobian: if self._jacobian is None: self._jacobian = {} if x not in self._jacobian: self._jacobian[x] = {} for position in itertools.product( *[range(i) for i in np.shape(x)]): grad_variables = np.zeros_like(x) grad_variables.value[position] = 1 self._jacobian[x][position] = self._forward( x, grad_variables) old_axes = tuple(range(np.ndim(self) + np.ndim(x))) new_axes = old_axes[np.ndim(x):] + old_axes[:np.ndim(x)] self._jacobian[x] = np.transpose( np.reshape( np.stack(self._jacobian[x].values()), np.shape(x) + np.shape(self), ), new_axes, ) return self._jacobian[x] else: if self._diff is None: self._diff = {} if x not in self._diff: if grad_variables is None: grad_variables = np.ones_like(self) self._diff[x] = self._forward(x, grad_variables) return self._diff[x]
defjvp, defjvp_argnum, def_linear, ) # ----- Functions that are constant w.r.t. continuous inputs ----- defjvp(np.nan_to_num, lambda ans, x: lambda g: np.where(np.isfinite(x), g, 0.0)) # ----- Binary ufuncs (linear) ----- def_linear(np.multiply) # ----- Binary ufuncs ----- defjvp( np.add, lambda ans, x, y: lambda g: np.broadcast_to(g, np.shape(ans)), lambda ans, x, y: lambda g: np.broadcast_to(g, np.shape(ans)), ) defjvp( np.subtract, lambda ans, x, y: lambda g: np.broadcast_to(g, np.shape(ans)), lambda ans, x, y: lambda g: np.broadcast_to(-g, np.shape(ans)), ) defjvp( np.multiply, lambda ans, x, y: lambda g: np.broadcast_to(g * y, np.shape(ans)), lambda ans, x, y: lambda g: np.broadcast_to(x * g, np.shape(ans)), ) defjvp(np.divide, "same", lambda ans, x, y: lambda g: -g * x / y**2) defjvp( np.maximum,
def grad_np_sum(ans, x, axis=None, keepdims=False, dtype=None): shape, dtype = np.shape(x.value), x.dtype return lambda g: repeat_to_match_shape(g, shape, dtype, axis, keepdims)[0]
defvjp(np.arctanh, lambda ans, x: lambda g: g / (1 - x ** 2)) defvjp(np.rad2deg, lambda ans, x: lambda g: g / np.pi * 180.0) defvjp(np.degrees, lambda ans, x: lambda g: g / np.pi * 180.0) defvjp(np.deg2rad, lambda ans, x: lambda g: g * np.pi / 180.0) defvjp(np.radians, lambda ans, x: lambda g: g * np.pi / 180.0) defvjp(np.square, lambda ans, x: lambda g: g * 2 * x) defvjp(np.sqrt, lambda ans, x: lambda g: g * 0.5 * x ** -0.5) defvjp( np.sinc, lambda ans, x: lambda g: g * (np.cos(np.pi * x) * np.pi * x - np.sin(np.pi * x)) / (np.pi * x ** 2), ) defvjp( np.reshape, lambda ans, x, shape, order=None: lambda g: np.reshape(g, np.shape(x), order=order), ) defvjp( np.roll, lambda ans, x, shift, axis=None: lambda g: np.roll(g, -shift, axis=axis) ) defvjp( np.array_split, lambda ans, ary, idxs, axis=0: lambda g: np.concatenate(g, axis=axis), ) defvjp(np.split, lambda ans, ary, idxs, axis=0: lambda g: np.concatenate(g, axis=axis)) defvjp(np.vsplit, lambda ans, ary, idxs: lambda g: np.concatenate(g, axis=0)) defvjp(np.hsplit, lambda ans, ary, idxs: lambda g: np.concatenate(g, axis=1)) defvjp(np.dsplit, lambda ans, ary, idxs: lambda g: np.concatenate(g, axis=2)) defvjp( np.ravel, lambda ans, x, order=None: lambda g: np.reshape(g, np.shape(x), order=order),
def vjp(g): ret = [] shape = np.shape(g) for idx in range(shape[axis]): ret.append(np.take(g, idx, axis=axis)) return tuple(ret)