def testSimpleGraphConstructionScopeOutsideFunction(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) @def_function.function def log_2plus_unique_x(x): unique_values, unique_pos = array_ops.unique(x) return math_ops.log(2.0 + unique_values), unique_pos x = constant_op.constant([-1.0, -1.0, 0.0], dtype=dtypes.float32) y1, y2 = log_2plus_unique_x(x) self.assertAllClose(y1, [0.0, np.log(2.0)]) self.assertAllClose(y2, [0, 0, 1]) self.assertIn(_UNIQUE_OP, instrument.graph_op_types) self.assertIn(_ADD_OP, instrument.graph_op_types) self.assertIn(_LOG_OP, instrument.graph_op_types) self.assertEqual( len(instrument.graph_op_names), len(instrument.graph_op_types)) # Check the graph internal ndarrays recorded at runtime. unique_op_outputs = instrument.graph_internal_ndarrays[_UNIQUE_OP] if context.executing_eagerly(): # b/140810696: The run_in_graph_and_eager_modes decorator runs # Session.run() twice. We can't assert on the number of outputs in # that case. self.assertEqual(len(unique_op_outputs), 2) self.assertAllClose(unique_op_outputs[0], [-1.0, 0.0]) self.assertAllClose(unique_op_outputs[1], [0, 0, 1]) add_op_outputs = instrument.graph_internal_ndarrays[b"add"] if context.executing_eagerly(): self.assertEqual(len(add_op_outputs), 1) self.assertAllClose(add_op_outputs[0], [1.0, 2.0]) log_op_outputs = instrument.graph_internal_ndarrays[_LOG_OP] if context.executing_eagerly(): self.assertEqual(len(log_op_outputs), 1) self.assertAllClose(log_op_outputs[0], [0.0, np.log(2.0)])
def testEagerFunctionExecution(self): instrument = _NumpyFunctionCallback() @def_function.function def square_log(x): return math_ops.square(math_ops.log(x)) # Call the function once, so that the graph construction won't show up # in the callback. x_float32 = constant_op.constant(6.0, dtype=dtypes.float32) x_float64 = constant_op.constant(6.0, dtype=dtypes.float64) square_log(x_float32) square_log(x_float64) op_callbacks.add_op_callback(instrument.callback) y = square_log(x_float32) self.assertAllClose(y, np.square(np.log(6.0))) y = square_log(x_float64) self.assertAllClose(y, np.square(np.log(6.0))) self.assertEqual(instrument.eager_op_names, [None, None]) self.assertFalse(instrument.graph_op_types) self.assertFalse(instrument.graph_op_names) self.assertFalse(instrument.graph_inputs) # Each of the two dtypes should be associated with its own FuncGraph. self.assertIn( square_log.get_concrete_function(x_float32).name, instrument.eager_op_types) self.assertIn( square_log.get_concrete_function(x_float64).name, instrument.eager_op_types) self.assertEqual(len(instrument.eager_inputs), 2) self.assertIsInstance(instrument.eager_inputs[0], tuple) self.assertEqual(instrument.eager_inputs[0][0], x_float32) self.assertIsInstance(instrument.eager_inputs[1], tuple) self.assertEqual(instrument.eager_inputs[1][0], x_float64)
def testPadOp(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) @def_function.function def my_pad(x, padding): return array_ops.pad(x, padding) x = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float32) paddings = [[1, 1], [2, 2]] y = my_pad(x, paddings) expected_output = np.array([ [0, 0, 0, 0, 0, 0], [0, 0, 1, 2, 0, 0], [0, 0, 3, 4, 0, 0], [0, 0, 0, 0, 0, 0], ], dtype=np.float32) self.assertAllClose(y, expected_output) self.assertAllClose(instrument.graph_internal_ndarrays[b"Pad"][0], expected_output)
def testSimpleGraphConstructionWithCallbackReturningNone(self): """Test that callbacks that return None works.""" op_types = [] def no_return_callback(op_type, inputs, attrs, outputs, op_name=None, graph=None): del inputs, attrs, outputs, op_name, graph # Unused. op_types.append(compat.as_bytes(op_type)) op_callbacks.add_op_callback(no_return_callback) @def_function.function def log1p(x): return math_ops.log(1.0 + x) x = constant_op.constant(3.0) y = log1p(x) self.assertAllClose(y, np.log(4.0)) self.assertIn(_ADD_OP, op_types) self.assertIn(_LOG_OP, op_types)
def testDatasetMapTest(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) tensor = constant_op.constant( [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]) def map_fn(x): return math_ops.log(math_ops.square(x) + 1) dataset = dataset_ops.Dataset.from_tensor_slices(tensor).batch(2).map( map_fn) iterator = dataset_ops.make_one_shot_iterator(dataset) self.assertAllClose(iterator.next(), np.log([1.25, 2])) self.assertAllClose(iterator.next(), np.log([3.25, 5])) self.assertIn(_SQUARE_OP, instrument.graph_op_types) self.assertIn(_ADD_OP, instrument.graph_op_types) self.assertIn(_LOG_OP, instrument.graph_op_types) self.assertEqual( len(instrument.eager_op_types), len(instrument.eager_op_names))
def testEagerOpExecution(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) x = constant_op.constant(6.0) y = math_ops.square(math_ops.log(x)) self.assertAllClose(y, np.square(np.log(6.0))) self.assertEqual(instrument.eager_op_types, [_LOG_OP, _SQUARE_OP]) # Op names are unavailable under eager mode. self.assertEqual(instrument.eager_op_names, [None, None]) self.assertEqual(instrument.eager_graphs, [None, None]) self.assertEqual(len(instrument.eager_inputs), 2) self.assertEqual(len(instrument.eager_inputs[0]), 1) self.assertIsInstance(instrument.eager_inputs[0], tuple) self.assertEqual(instrument.eager_inputs[0][0], x) self.assertEqual(len(instrument.eager_inputs[1]), 1) self.assertIsInstance(instrument.eager_inputs[1], tuple) self.assertAllClose(instrument.eager_inputs[1][0], np.log(6.0)) self.assertFalse(instrument.graph_op_types) self.assertFalse(instrument.graph_op_names) self.assertFalse(instrument.graph_attrs) self.assertFalse(instrument.graph_graphs) self.assertFalse(instrument.graph_inputs)
def testGraphOpAttributesAreCapture(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) @def_function.function def my_matmul(m, x): return math_ops.matmul(m, x, transpose_a=True, transpose_b=False) m = constant_op.constant([[1.0, -1.0], [0.0, 1.0]]) x = constant_op.constant([[-2.0], [3.0]]) y = my_matmul(m, x) self.assertAllClose(y, [[-2.0], [5.0]]) index = instrument.graph_op_types.index(_MATMUL_OP) self.assertIsInstance(instrument.graph_attrs[index], tuple) self.assertEqual( instrument.graph_attrs[index][ instrument.graph_attrs[index].index("transpose_a") + 1].b, True) self.assertEqual( instrument.graph_attrs[index][ instrument.graph_attrs[index].index("transpose_b") + 1].b, False) if context.executing_eagerly(): self.assertEqual(len(instrument.eager_attrs), 1) self.assertIsInstance(instrument.eager_attrs[0], tuple)
def thread_1_job(): op_callbacks.add_op_callback(instrument_1.callback) x = constant_op.constant(6.0) y = math_ops.square(math_ops.log(x)) op_callbacks.remove_op_callback(instrument_1.callback) return y
def setUp(self): super().setUp() self.variables = {} self.trace_log = [] op_callbacks.add_op_callback(self._op_callback)
def log_2plus_unique_x(x): op_callbacks.add_op_callback(instrument.callback) unique_values, _ = array_ops.unique(x) y = math_ops.log(2.0 + unique_values) op_callbacks.remove_op_callback(instrument.callback) return math_ops.sin(y)
def thread_1_job(): op_callbacks.add_op_callback(instrument_1.callback) square_log(x_float32)
def enable_dumping(dump_root, tensor_debug_mode=DEFAULT_TENSOR_DEBUG_MODE, circular_buffer_size=1000): """Enable dumping debugging information from a TensorFlow program. The debugging information is dumped to a directory on the file system specified as `dump_root`. The dumped debugging information can be ingested by debugger UIs. The files in the dump directory contain the following information: - TensorFlow Function construction (e.g., compilation of Python functions decorated with @tf.function), the op types, names (if available), context, the input and output tensors, and the associated stack traces. - Execution of TensorFlow operations (ops) and Functions and their stack traces, op types, names (if available) and contexts. In addition, depending on the value of the `tensor_debug_mode` argument (see Args section below), the value(s) of the output tensors or more concise summaries of the tensor values will be dumped. - A snapshot of Python source files involved in the execution of the TensorFlow program. Once enabled, the dumping can be disabled with the corresponding `disable_dumping()` method under the same Python namespace. Calling this method more than once with the same `dump_root` is idempotent. Calling this method with a different `dump_root` abolishes the previously-enabled `dump_root`. Args: dump_root: The directory path where the dumping information will be written. tensor_debug_mode: Debug mode for tensor values, as a string. The currently supported options are: - "NO_TENSOR": (Default) Only traces the execution of ops' output tensors, while not dumping the value of the ops' output tensors or any form of concise summary of them. circular_buffer_size: Size of the circular buffers for execution events. These circular buffers are designed to reduce the overhead of debugging dumping. They hold the most recent debug events concerning eager execution of ops and `tf.function`s and traces of tensor values computed inside `tf.function`s. They are written to the file system only when the proper flushing method is called (see description of return values below). Expected to be an integer. If <= 0, the circular-buffer behavior will be disabled, i.e., the execution debug events will be written to the file writers in the same way as non-execution events such as op creations and source-file snapshots. Returns: A DebugEventsWriter instance used by the dumping callback. The caller may use its flushing methods, including `FlushNonExecutionFiles()` and `FlushExecutionFiles()`. """ # TODO(cais): Revise the "UIs (currently under construction)" part of the doc # string above. # TODO(cais): Add Python code example to the doc string above. # TODO(cais): Once UIs are ready, expose this method and the associated # `disable_` method under the `tf.debugging.*` namespace. if tensor_debug_mode not in debug_event_pb2.TensorDebugMode.keys(): raise ValueError( "Invalid value in tensor_debug_mode ('%s'). Valid options are: %s" % (tensor_debug_mode, debug_event_pb2.TensorDebugMode.keys())) if (hasattr(_state, "config") and _state.config.circular_buffer_size != circular_buffer_size): logging.warn( "There is already a dumping callback configured with a different " "circular-buffer size (%d). Therefore the newly request " "circular-buffer size (%d) will not be honored.", _state.config.circular_buffer_size, circular_buffer_size) if not hasattr(_state, "config") or _state.config.dump_root != dump_root: _state.config = TracingConfig( dump_root=dump_root, tensor_debug_mode=debug_event_pb2.TensorDebugMode.Value( tensor_debug_mode), circular_buffer_size=int(circular_buffer_size)) if (_state.config.tensor_debug_mode != debug_event_pb2.TensorDebugMode.NO_TENSOR): raise NotImplementedError( "tfdbg dumping: support for tensor debug mode %s is not " "implemented yet" % _state.config.tensor_debug_mode) _state.hostname = socket.gethostname() # A list of source-file paths. _state.source_file_paths = [] # A map from stack frame (FileLineCol) to unique ID. _state.stack_frame_to_id = dict() # Mapping op context to unique ID. _state.context_to_id = dict() op_callbacks.add_op_callback(_dumping_callback) logging.info( "Enabled dumping callback in thread %s " "(dump root: %s, tensor debug mode: %s)", threading.current_thread().name, _state.config.dump_root, tensor_debug_mode) return _get_writer()
def setUp(self): super().setUp() self.variables = {} self.trace_log = [] self.raises_cm = None op_callbacks.add_op_callback(self._op_callback)
def setUp(self): super().setUp() os.environ['AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS'] = '1' self.variables = {} self.trace_log = [] op_callbacks.add_op_callback(self._op_callback)
def testNonCallableObjectArgErrors(self): with self.assertRaisesRegex(ValueError, r"is expected to be callable"): op_callbacks.add_op_callback(1337)
def enable_dump_debug_info(dump_root, tensor_debug_mode=DEFAULT_TENSOR_DEBUG_MODE, circular_buffer_size=1000, op_regex=None, tensor_dtypes=None): """Enable dumping debugging information from a TensorFlow program. The debugging information is dumped to a directory on the file system specified as `dump_root`. The dumped debugging information can be ingested by debugger UIs. The files in the dump directory contain the following information: - TensorFlow Function construction (e.g., compilation of Python functions decorated with @tf.function), the op types, names (if available), context, the input and output tensors, and the associated stack traces. - Execution of TensorFlow operations (ops) and Functions and their stack traces, op types, names (if available) and contexts. In addition, depending on the value of the `tensor_debug_mode` argument (see Args section below), the value(s) of the output tensors or more concise summaries of the tensor values will be dumped. - A snapshot of Python source files involved in the execution of the TensorFlow program. Once enabled, the dumping can be disabled with the corresponding `disable_dump_debug_info()` method under the same Python namespace. Calling this method more than once with the same `dump_root` is idempotent. Calling this method more than once with different `tensor_debug_mode`s leads to a `ValueError`. Calling this method more than once with different `circular_buffer_size`s leads to a `ValueError`. Calling this method with a different `dump_root` abolishes the previously-enabled `dump_root`. Usage example: ```py tf.debugging.experimental.enable_dump_debug_info('/tmp/my-tfdbg-dumps') # Code to build, train and run your TensorFlow model... ``` Args: dump_root: The directory path where the dumping information will be written. tensor_debug_mode: Debug mode for tensor values, as a string. The currently supported options are: - "NO_TENSOR": (Default) Only traces the execution of ops' output tensors, while not dumping the value of the ops' output tensors or any form of concise summary of them. circular_buffer_size: Size of the circular buffers for execution events. These circular buffers are designed to reduce the overhead of debugging dumping. They hold the most recent debug events concerning eager execution of ops and `tf.function`s and traces of tensor values computed inside `tf.function`s. They are written to the file system only when the proper flushing method is called (see description of return values below). Expected to be an integer. If <= 0, the circular-buffer behavior will be disabled, i.e., the execution debug events will be written to the file writers in the same way as non-execution events such as op creations and source-file snapshots. op_regex: Dump data from only the tensors from op types that matches to the regular expression (through Python's `re.match()`). "Op type" refers to the names of the TensorFlow operations (e.g., "MatMul", "LogSoftmax"), which may repeat in a TensorFlow function. It does *not* refer to the names of nodes (e.g., "dense/MatMul", "dense_1/MatMul_1") which are unique within a function. - Example 1: Dump tensor data from only MatMul and Relu ops `op_regex="^(MatMul|Relu)$"`. - Example 2: Dump tensors from all ops *except* Relu: `op_regex="(?!^Relu$)"`. This filter operates in a logical AND relation with `tensor_dtypes`. tensor_dtypes: Dump data from only the tensors of which the specified dtypes. This optional argument can be in any of the following format: - a list or tuple of `DType` objects or strings that can be converted to `DType` objects via `tf.as_dtype()`. Examples: - `tensor_dtype=[tf.float32, tf.float64]`, - `tensor_dtype=["float32", "float64"]`, - `tensor_dtypes=(tf.int32, tf.bool)`, - `tensor_dtypes=("int32", "bool")` - a callable that takes a single `DType` argument and returns a Python `boolean` indicating whether the dtype is to be included in the data dumping. Examples: - `tensor_dtype=lambda dtype: dtype.is_integer`. This filter operates in a logical AND relation with `op_regex`. Returns: A DebugEventsWriter instance used by the dumping callback. The caller may use its flushing methods, including `FlushNonExecutionFiles()` and `FlushExecutionFiles()`. """ # TODO(cais): Revise the "UIs (currently under construction)" part of the doc # string above. # TODO(cais): Add Python code example to the doc string above. global _state tensor_debug_mode_keys = debug_event_pb2.TensorDebugMode.keys() if tensor_debug_mode not in tensor_debug_mode_keys: raise ValueError( "Invalid value in tensor_debug_mode ('%s'). Valid options are: %s" % (tensor_debug_mode, tensor_debug_mode_keys)) tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value( tensor_debug_mode) if tensor_debug_mode not in ( debug_event_pb2.TensorDebugMode.NO_TENSOR, debug_event_pb2.TensorDebugMode.CURT_HEALTH, debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, debug_event_pb2.TensorDebugMode.SHAPE, debug_event_pb2.TensorDebugMode.FULL_TENSOR): raise NotImplementedError( "tfdbg dumping: support for tensor debug mode %s is not " "implemented yet" % debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode)) # Validate the types of tensor_dtypes. if tensor_dtypes is not None: if (not isinstance(tensor_dtypes, (list, tuple)) and not callable(tensor_dtypes)): raise ValueError( "If specified, tensor_dtypes is expected to be a list, a tuple, or " "a callable that takes a DType argument and returns a boolean, " "but received %s" % (tensor_dtypes, )) if isinstance(tensor_dtypes, (list, tuple)): tensor_dtypes = [ dtypes.as_dtype(dtype_item) for dtype_item in tensor_dtypes ] if hasattr(_state, "dumping_callback"): if _state.dumping_callback.circular_buffer_size != circular_buffer_size: raise ValueError( "There is already a dumping callback configured with a different " "circular-buffer size (%d). Therefore the newly request " "circular-buffer size (%d) will not be honored." % (_state.dumping_callback.circular_buffer_size, circular_buffer_size)) if _state.dumping_callback.tensor_debug_mode != tensor_debug_mode: raise ValueError( "There is already a dumping callback configured for dump root " "%s with a different " "tensor-debug mode (%s). Therefore the newly request " "tensor-debug mode (%s) size will not be honored." % (_state.dumping_callback.dump_root, tensor_debug_mode_keys[ _state.dumping_callback.tensor_debug_mode], tensor_debug_mode_keys[tensor_debug_mode])) else: _state.dumping_callback = _DumpingCallback(dump_root, tensor_debug_mode, circular_buffer_size, op_regex, tensor_dtypes) op_callbacks.add_op_callback(_state.dumping_callback.callback) function_lib.add_function_callback( _state.dumping_callback.function_callback) if _state.dumping_callback.dump_root != dump_root: _state.dumping_callback.dump_root = dump_root logging.info( "Enabled dumping callback in thread %s " "(dump root: %s, tensor debug mode: %s)", threading.current_thread().name, _state.dumping_callback.dump_root, tensor_debug_mode) atexit.register(disable_dump_debug_info) return _state.dumping_callback.get_writer()
def enable_check_numerics(stack_height_limit=30, path_length_limit=50): r"""Enable tensor numerics checking in an eager/graph unified fashion. The numerics checking mechanism will cause any TensorFlow eager execution or graph execution to error out as soon as an op's output tensor contains infinity or NaN. This method is idempotent. Calling it multiple times has the same effect as calling it once. This method takes effect only on the thread in which it is called. When a op's float-type output tensor contains any Infinity or NaN, an `tf.errors.InvalidArgumentError` will be thrown, with an error message that reveals the following information: - The type of the op that generated the tensor with bad numerics. - Data type (dtype) of the tensor. - Shape of the tensor (to the extent known at the time of eager execution or graph construction). - Name of the containing graph (if available). - (Graph mode only): The stack trace of the intra-graph op's creation, with a stack-height limit and a path-length limit for visual clarity. The stack frames that belong to the user's code (as opposed to tensorflow's internal code) are highlighted with a text arrow ("->"). - (Eager mode only): How many of the offending tensor's elements are `Infinity` and `NaN`, respectively. Once enabled, the check-numerics mechanism can be disabled by using `tf.debugging.disable_check_numerics()`. Example usage: 1. Catching infinity during the execution of a `tf.function` graph: ```py import tensorflow as tf tf.debugging.enable_check_numerics() @tf.function def square_log_x_plus_1(x): v = tf.math.log(x + 1) return tf.math.square(v) x = -1.0 # When the following line runs, a function graph will be compiled # from the Python function `log_x_plus_1()`. Due to the # `enable_check_numerics()` call above, the graph will contain # numerics checking ops that will run during the function graph's # execution. The function call generates an -infinity when the Log # (logarithm) op operates on the output tensor of the Add op. # The program errors out at this line, printing an error message. y = log_x_plus_1(x) z = -y ``` 2. Catching NaN during eager execution: ```py import numpy as np import tensorflow as tf tf.debugging.enable_check_numerics() x = np.array([[0.0, -1.0], [4.0, 3.0]]) # The following line executes the Sqrt op eagerly. Due to the negative # element in the input array, a NaN is generated. Due to the # `enable_check_numerics()` call above, the program errors immediately # at this line, printing an error message. y = tf.math.sqrt(x) z = tf.matmul(y, y) ``` Args: stack_height_limit: Limit to the height of the printed stack trace. Applicable only to ops in `tf.function`s (graphs). path_length_limit: Limit to the file path included in the printed stack trace. Applicable only to ops in `tf.function`s (graphs). """ if not hasattr(_state, "config"): _state.config = CheckNumericsConfig( stack_height_limit=stack_height_limit, path_length_limit=path_length_limit) op_callbacks.add_op_callback(_check_numerics_callback) logging.info("Enabled check-numerics callback in thread %s", threading.current_thread().name)
def testRemovingCallbackTwiceLeadsToError(self): instrument = _NumpyFunctionCallback() op_callbacks.add_op_callback(instrument.callback) op_callbacks.remove_op_callback(instrument.callback) with self.assertRaisesRegex(KeyError, r"has not been registered"): op_callbacks.remove_op_callback(instrument.callback)
def __enter__(self, *args, **kwargs): op_callbacks.add_op_callback(self.callback) logging.info("Enabled tensor dumping")