def backward(self, grad, *, graph, _reduction=None, **kwargs): """ Back-propagates the gradient through all of the operation's inputs. Constant tensors do not propagate a gradient. Parameters ---------- grad : numpy.ndarray The back-propagated total derivative with respect to the present operation (`f`): d(out)/df graph : Set[Operation] The set of all operations relevant to the terminal node of the computational graph, which triggered back-propagation. _reduction : Optional[Callable[[ndarray, Tuple[int, ...]], ndarray]] Developer option-only. A callable used to process the gradient prior to accumulation (e.g. broadcast-reduction) """ for index, var in enumerate(self.variables): if not var.constant: if not var._ops: raise InvalidBackprop( "Part of the computational graph containing " "this tensor was 'cleared' prior to backprop.") try: backed_grad = self.backward_var(grad, index, **kwargs) except SkipGradient: continue if is_invalid_gradient(backed_grad): raise InvalidGradient( f"An invalid gradient-value was passed to:" f"\n\t`{type(self).__name__}.backward_var(<gradient>, index={index})`" f"\nGradients are expected to be real-valued scalars or " f"numpy arrays, got a gradient of type: {type(backed_grad)}" ) if var.grad is None: tmp_grad = np.asarray(backed_grad) if _reduction is not None: tmp_grad = _reduction(tmp_grad, var.shape) var.grad = (np.copy(tmp_grad) if np.shares_memory( tmp_grad, grad) else tmp_grad) else: if _reduction is None: var.grad += backed_grad else: var.grad += _reduction(backed_grad, var.shape) for var in { i for i in self.variables if not i.constant and i.creator is not None }: var._accum_ops.add(self) var._backward(graph=graph)
def backward(self, grad=None): """ Compute set or accumulate ``self.grad`` with `grad`, and pass ``self.creator.backward(grad)``. In effect, calling ``self.backward()`` will trigger a "back-propagation" from ``self`` through the preceding nodes in the computational graph. Thus a node, ``a``, will have the attribute ``self.grad`` return the total derivative `d(self)/da`. Parameters ---------- grad : Optional[array_like] The value of the incoming derivative. If self.grad is None, it is set to `grad`, otherwise its value is added with `grad`. Raises ------ Exception The configuration of the computational graph is such that ``self`` must be a 0D tensor (i.e. scalar) to invoke ``self.backward()``. Examples -------- >>> import mygrad as mg >>> x = mg.Tensor(2) >>> y = mg.Tensor(3) >>> w = x * y >>> f = 2 * w >>> f.backward() # computes df/df, df/dw, df/dy, and df/dx >>> f.grad # df/df == 1 by identity array(1.) >>> w.grad # df/dw array(2.) >>> y.grad # df/dy = df/dw * dw/dy array(4.) >>> x.grad # df/dx = df/dw * dw/dx array(6.) """ if self._constant: return if grad is not None: self.grad = np.asarray( grad.data if isinstance(grad, Tensor) else grad) if is_invalid_gradient(self.grad): raise InvalidGradient( "An invalid gradient-value was passed to " "\n\t`{call_signature}`" "\nGradients are expected to be real-valued scalars or " "numpy arrays, got a gradient of type: {_type}".format( call_signature="{name}.backward(<gradient>)".format( name=type(self).__name__), _type=type(grad), )) else: if self.ndim > 0 and self._scalar_only: raise InvalidBackprop( "Backpropagation must be invoked from a " "scalar-tensor (a 0D tensor) for this computational " "graph.") dtype = float if np.issubdtype(self.dtype, np.signedinteger) else self.dtype self.grad = (np.ones(self.shape, dtype=dtype) if self.ndim > 0 else np.asarray(1.0, dtype=dtype)) if self.creator is not None: self._backward(graph=self.creator.graph)
def test_is_invalid_gradient(grad, is_invalid, data: st.DataObject): if isinstance(grad, st.SearchStrategy): grad = data.draw(grad, label="grad") assert is_invalid_gradient(grad) is is_invalid, grad