def testShouldRecordAndStopRecord(self): with forwardprop.ForwardGradientAccumulator() as acc: c = constant_op.constant(1.) c_tangent = constant_op.constant(2.) acc.watch(c, c_tangent) with backprop.GradientTape() as tape: self.assertFalse(tape_lib.should_record_backprop([c])) self.assertEqual( 1, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c])) tape.watch(c) self.assertEqual( 2, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c])) self.assertTrue(tape_lib.should_record_backprop([c])) with tape_lib.stop_recording(): self.assertEqual( 0, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes( [c])) self.assertFalse(tape_lib.should_record_backprop([c])) d = c * 2. self.assertEqual( 2, pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c])) self.assertTrue(tape_lib.should_record_backprop([c])) self.assertFalse(tape_lib.should_record_backprop([d])) self.assertIsNone(acc.jvp(d)) self.assertIsNone(tape.gradient(d, c))
def testForwardOverBackwardMemoryEfficiency(self, forward_prop_first): # Watching depends depends on nesting, not creation order c = constant_op.constant(1.) if forward_prop_first: forward_accumulator = forwardprop.ForwardAccumulator(c, .1) gradient_tape = backprop.GradientTape() else: gradient_tape = backprop.GradientTape() forward_accumulator = forwardprop.ForwardAccumulator(c, .1) try: gc.disable() with gradient_tape as tape: # Adding and removing the tape multiple times in different nesting # patterns does not affect watch ordering. pass with forward_accumulator as acc: with gradient_tape as tape: tape.watch(c) d = math_ops.cos(c) self.assertFalse(tape_lib.should_record_backprop((acc.jvp(d),))) e = math_ops.cos(acc.jvp(d)) math_ops.cos(e) weak_e = weakref.ref(e) del e self.assertIsNone(weak_e()) self.assertIsNone(tape.gradient(acc.jvp(d), c)) finally: gc.enable()
def testBackwardOverForward(self, forward_prop_first): c = constant_op.constant(1.) # Watching depends depends on nesting, not creation order if forward_prop_first: forward_accumulator = forwardprop.ForwardAccumulator(c, .1) gradient_tape = backprop.GradientTape() else: gradient_tape = backprop.GradientTape() forward_accumulator = forwardprop.ForwardAccumulator(c, .1) with gradient_tape as tape: with forward_accumulator as acc: tape.watch(c) d = math_ops.cos(c) self.assertTrue(tape_lib.should_record_backprop((acc.jvp(d),))) self.assertAllClose(-.1 * math_ops.cos(1.), tape.gradient(acc.jvp(d), c))
def testBatchBackwardOverForward(self, forward_prop_first): x = constant_op.constant(1.) tangents = random_ops.random_normal(shape=[10], seed=1) expected = [-t * math_ops.cos(1.) for t in tangents] if forward_prop_first: batch_acc = forwardprop.ForwardAccumulator._batch_accumulator(x, tangents) gradient_tape = backprop.GradientTape(persistent=True) else: gradient_tape = backprop.GradientTape(persistent=True) batch_acc = forwardprop.ForwardAccumulator._batch_accumulator(x, tangents) with gradient_tape as tape: with batch_acc as acc: tape.watch(x) y = math_ops.cos(x) self.assertTrue(tape_lib.should_record_backprop((acc.jvp(y),))) jvps = acc.jvp(y) d2y_dx2 = [tape.gradient(dy_dx, x) for dy_dx in jvps] self.assertAllClose(expected, d2y_dx2)
def _update_trainable_params(self): params = self.get_parameters(trainable_only=False) trainable_params = set() for idx, p in enumerate(params): # Determine which input tensors/Variables are being recorded for backpropagation. # The function should_record_backprop, documented here: # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L167 # accepts lists of *Tensors* (not Variables), returning True if all are being watched by one or more # existing gradient tapes, False if not. if isinstance(p, (tf.Variable, tf.Tensor)) and should_record_backprop( # we need to convert any Variable objects to Tensors here, otherwise # should_record_backprop will raise an error [tf.convert_to_tensor(p)] ): trainable_params.add(idx) self.trainable_params = trainable_params
def requires_grad(tensor): """Returns True if the tensor is considered trainable. .. warning:: The implemetation depends on the contained tensor type, and may be context dependent. For example, Torch tensors and PennyLane tensors track trainability as a property of the tensor itself. TensorFlow, on the other hand, only tracks trainability if being watched by a gradient tape. Args: tensor (tensor_like): input tensor **Example** Calling this function on a PennyLane NumPy array: >>> x = np.array([1., 5.], requires_grad=True) >>> requires_grad(x) True >>> x.requires_grad = False >>> requires_grad(x) False PyTorch has similar behaviour. With TensorFlow, the output is dependent on whether the tensor is currently being watched by a gradient tape: >>> x = tf.Variable([0.6, 0.1]) >>> requires_grad(x) False >>> with tf.GradientTape() as tape: ... print(requires_grad(x)) True While TensorFlow constants are by default not trainable, they can be manually watched by the gradient tape: >>> x = tf.constant([0.6, 0.1]) >>> with tf.GradientTape() as tape: ... print(requires_grad(x)) False >>> with tf.GradientTape() as tape: ... tape.watch([x]) ... print(requires_grad(x)) True """ interface = get_interface(tensor) if interface == "tensorflow": import tensorflow as tf try: from tensorflow.python.eager.tape import should_record_backprop except ImportError: # pragma: no cover from tensorflow.python.eager.tape import should_record as should_record_backprop return should_record_backprop([tf.convert_to_tensor(tensor)]) if interface in ("torch", "autograd"): return tensor.requires_grad if interface == "numpy": return False if interface == "jax": return True raise ValueError(f"Argument {tensor} is an unknown object")
def requires_grad(self): return should_record_backprop([self.astensor(self.data)])
def _TFQNode(*input_, **input_kwargs): # Determine which input tensors/Variables are being recorded for backpropagation. # The function should_record_backprop, documented here: # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L163 # accepts lists of *tensors* (not Variables), returning True if all are being watched by one or more # existing gradient tape, False if not. trainable_args = { idx for idx, i in enumerate(input_) if isinstance(i, (Variable, tf.Tensor)) and should_record_backprop([tf.convert_to_tensor(i)]) } # detach all input Tensors, convert to NumPy array args = [i.numpy() if isinstance(i, (Variable, tf.Tensor)) else i for i in input_] kwargs = { k: v.numpy() if isinstance(v, (Variable, tf.Tensor)) else v for k, v in input_kwargs.items() } # if NumPy array is scalar, convert to a Python float args = [i.tolist() if (isinstance(i, np.ndarray) and not i.shape) else i for i in args] kwargs = { k: v.tolist() if (isinstance(v, np.ndarray) and not v.shape) else v for k, v in kwargs.items() } # evaluate the QNode qnode.set_trainable_args(trainable_args) res = qnode(*args, **kwargs) if not isinstance(res, np.ndarray): # scalar result, cast to NumPy scalar res = np.array(res) def grad(grad_output, **tfkwargs): """Returns the vector-Jacobian product""" # evaluate the Jacobian matrix of the QNode variables = tfkwargs.get("variables", None) qnode.set_trainable_args(trainable_args) jacobian = qnode.jacobian(args, kwargs) jacobian = tf.constant(jacobian, dtype=dtype) # Reshape gradient output array as a 2D row-vector. grad_output_row = tf.transpose(tf.reshape(grad_output, [-1, 1])) # Calculate the vector-Jacobian matrix product, and flatten the output. grad_input = tf.matmul(grad_output_row, jacobian) grad_input = tf.reshape(grad_input, [-1]) grad_input_unflattened = unflatten_tf(grad_input, input_)[0] for idx in set(range(len(args))) - trainable_args: # If a particular input argument is non-differentiable, # replace the corresponding position in the gradient with None. grad_input_unflattened[idx] = None if variables is not None: return grad_input_unflattened, variables return grad_input_unflattened return tf.convert_to_tensor(res, dtype=dtype), grad
def _TFQNode(*input_, **input_kwargs): # Determine which input tensors/Variables are being recorded for backpropagation. # The function should_record_backprop, documented here: # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L163 # accepts lists of *tensors* (not Variables), returning True if all are being watched by one or more # existing gradient tape, False if not. requires_grad = [ should_record_backprop([tf.convert_to_tensor(i)]) if isinstance(i, (Variable, tf.Tensor)) else False for i in input_ ] # detach all input Tensors, convert to NumPy array args = [i.numpy() if isinstance(i, (Variable, tf.Tensor)) else i for i in input_] kwargs = { k: v.numpy() if isinstance(v, (Variable, tf.Tensor)) else v for k, v in input_kwargs.items() } # if NumPy array is scalar, convert to a Python float args = [i.tolist() if (isinstance(i, np.ndarray) and not i.shape) else i for i in args] kwargs = { k: v.tolist() if (isinstance(v, np.ndarray) and not v.shape) else v for k, v in kwargs.items() } # evaluate the QNode res = qnode(*args, **kwargs) if not isinstance(res, np.ndarray): # scalar result, cast to NumPy scalar res = np.array(res) def grad(grad_output, **tfkwargs): """Returns the vector-Jacobian product""" diff_indices = None non_diff_indices = set() # determine the QNode variables which should be differentiated for differentiable, arg_variable in zip(requires_grad, qnode.arg_vars): if not differentiable: indices = [i.idx for i in _flatten(arg_variable)] non_diff_indices.update(indices) if non_diff_indices: diff_indices = set(range(qnode.num_variables)) - non_diff_indices # evaluate the Jacobian matrix of the QNode variables = tfkwargs.get("variables", None) jacobian = qnode.jacobian(args, kwargs, wrt=diff_indices) jacobian = tf.constant(jacobian, dtype=dtype) # Reshape gradient output array as a 2D row-vector. grad_output_row = tf.transpose(tf.reshape(grad_output, [-1, 1])) # Calculate the vector-Jacobian matrix product, and flatten the output. grad_input = tf.matmul(grad_output_row, jacobian) grad_input = tf.reshape(grad_input, [-1]) if non_diff_indices: # TensorFlow requires we return a gradient of size (num_variables,) res = np.zeros([qnode.num_variables]) indices = np.fromiter(diff_indices, dtype=np.int64) res[indices] = grad_input grad_input = tf.constant(res, dtype=dtype) grad_input_unflattened = unflatten_tf(grad_input, input_)[0] if variables is not None: return grad_input_unflattened, variables return grad_input_unflattened return tf.convert_to_tensor(res, dtype=dtype), grad