def before_run(self, run_context): if not self._wrapper_initialized: dumping_wrapper.DumpingDebugWrapperSession.__init__( self, run_context.session, self._session_root, watch_fn=self._watch_fn, log_usage=self._log_usage) self._wrapper_initialized = True self._run_call_count += 1 (debug_urls, debug_ops, node_name_regex_whitelist, op_type_regex_whitelist) = self._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=debug_ops, node_name_regex_whitelist=node_name_regex_whitelist, op_type_regex_whitelist=op_type_regex_whitelist) run_args = session_run_hook.SessionRunArgs(None, feed_dict=None, options=run_options) return run_args
def before_run(self, run_context): if not self._wrapper_initialized: dumping_wrapper.DumpingDebugWrapperSession.__init__( self, run_context.session, self._session_root, watch_fn=self._watch_fn, log_usage=self._log_usage) self._wrapper_initialized = True self._run_call_count += 1 (debug_urls, debug_ops, node_name_regex_whitelist, op_type_regex_whitelist) = self._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=debug_ops, node_name_regex_whitelist=node_name_regex_whitelist, op_type_regex_whitelist=op_type_regex_whitelist) run_args = session_run_hook.SessionRunArgs( None, feed_dict=None, options=run_options) return run_args
def testWatchGraph_allNodes(self): debug_utils.watch_graph( self._run_options, self._graph, debug_ops=["DebugIdentity", "DebugNanCount"], debug_urls="file:///tmp/tfdbg_1") debug_watch_opts = self._run_options.debug_options.debug_tensor_watch_opts self.assertEqual(self._expected_num_nodes, len(debug_watch_opts)) # Verify that each of the nodes in the graph with output tensors in the # graph have debug tensor watch. node_names = self._verify_watches(debug_watch_opts, 0, ["DebugIdentity", "DebugNanCount"], ["file:///tmp/tfdbg_1"]) # Verify the node names. self.assertTrue("a1_init" in node_names) self.assertTrue("a1" in node_names) self.assertTrue("a1/Assign" in node_names) self.assertTrue("a1/read" in node_names) self.assertTrue("b_init" in node_names) self.assertTrue("b" in node_names) self.assertTrue("b/Assign" in node_names) self.assertTrue("b/read" in node_names) self.assertTrue("c" in node_names) self.assertTrue("p1" in node_names) self.assertTrue("s" in node_names)
def before_run(self, run_context): if not self._wrapper_initialized: # TODO(cais): Make this hook have a DumpingDebugWrapperSession property # instead of subclassing DumpingDebugWrapperSession. dumping_wrapper.DumpingDebugWrapperSession.__init__( self, run_context.session, self._session_root, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) self._wrapper_initialized = True self._run_call_count += 1 debug_urls, watch_options = self._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options. tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) run_args = session_run_hook.SessionRunArgs(None, feed_dict=None, options=run_options) return run_args
def createAndRunGraphWithWhileLoop(self): """Create and run a TensorFlow Graph with a while loop to generate dumps.""" self.dump_root = self.get_temp_dir() self.curr_file_path = os.path.abspath( tf_inspect.getfile(tf_inspect.currentframe())) # Run a simple TF graph to generate some debug dumps that can be used in # source annotation. with session.Session() as sess: loop_body = lambda i: math_ops.add(i, 2) self.traceback_first_line = line_number_above() loop_cond = lambda i: math_ops.less(i, 16) i = constant_op.constant(10, name="i") loop = control_flow_ops.while_loop(loop_cond, loop_body, [i]) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls=["file://%s" % self.dump_root]) run_metadata = config_pb2.RunMetadata() sess.run(loop, options=run_options, run_metadata=run_metadata) self.dump = debug_data.DebugDumpDir( self.dump_root, partition_graphs=run_metadata.partition_graphs) self.dump.set_python_graph(sess.graph)
def createAndRunGraphWithWhileLoop(self): """Create and run a TensorFlow Graph with a while loop to generate dumps.""" self.dump_root = self.get_temp_dir() self.curr_file_path = os.path.abspath( tf_inspect.getfile(tf_inspect.currentframe())) # Run a simple TF graph to generate some debug dumps that can be used in # source annotation. with session.Session() as sess: loop_body = lambda i: math_ops.add(i, 2) self.traceback_first_line = line_number_above() loop_cond = lambda i: math_ops.less(i, 16) i = constant_op.constant(10, name="i") loop = control_flow_ops.while_loop(loop_cond, loop_body, [i]) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_urls=["file://%s" % self.dump_root]) run_metadata = config_pb2.RunMetadata() sess.run(loop, options=run_options, run_metadata=run_metadata) self.dump = debug_data.DebugDumpDir( self.dump_root, partition_graphs=run_metadata.partition_graphs) self.dump.set_python_graph(sess.graph)
def _decorate_run_options(self, run_options, debug_urls, debug_ops="DebugIdentity", node_name_regex_whitelist=None, op_type_regex_whitelist=None): """Modify a RunOptions object for debug tensor watching. Specifies request for outputting partition graphs. Adds debug_tensor_watch_opts with proper debug URLs. Args: run_options: (RunOptions) the modified RunOptions object. debug_urls: (list of str) debug URLs to be entered in run_options. debug_tensor_watch_opts. debug_ops: (str or list of str) debug op(s) to be used by the debugger. node_name_regex_whitelist: Regular-expression whitelist for node name. op_type_regex_whitelist: Regular-expression whitelist for op type. """ run_options.output_partition_graphs = True debug_utils.watch_graph( run_options, self._sess.graph, debug_urls=debug_urls, debug_ops=debug_ops, node_name_regex_whitelist=node_name_regex_whitelist, op_type_regex_whitelist=op_type_regex_whitelist)
def before_run(self, run_context): if not self._session_wrapper: self._session_wrapper = dumping_wrapper.DumpingDebugWrapperSession( run_context.session, self._session_root, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) self._session_wrapper.increment_run_call_count() # pylint: disable=protected-access debug_urls, watch_options = self._session_wrapper._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) # pylint: enable=protected-access run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options.tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) run_args = session_run_hook.SessionRunArgs( None, feed_dict=None, options=run_options) return run_args
def testDebugNumericSummaryOnInitializedTensorGivesCorrectResult(self): with session.Session() as sess: a = variables.Variable([ np.nan, np.nan, 0.0, 0.0, 0.0, -1.0, -3.0, 3.0, 7.0, -np.inf, -np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.nan, np.nan ], dtype=np.float32, name="numeric_summary/a") b = variables.Variable([0.0] * 18, dtype=np.float32, name="numeric_summary/b") c = math_ops.add(a, b, name="numeric_summary/c") sess.run(variables.global_variables_initializer()) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=self._debug_urls()) sess.run(c, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) self.assertAllClose([[ 1.0, 18.0, 2.0, 2.0, 3.0, 2.0, 5.0, 4.0, -3.0, 7.0, 0.85714286, 8.97959184 ]], dump.get_tensors("numeric_summary/a/read", 0, "DebugNumericSummary"))
def testDebugNumericSummaryOnUninitializedTensorGivesCorrectResult(self): with session.Session() as sess: a = variables.Variable([42], dtype=np.float32, name="numeric_summary_uninit/a") run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugNumericSummary"], debug_urls=self._debug_urls()) sess.run(a.initializer, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertTrue(dump.loaded_partition_graphs()) # DebugNumericSummary output should reflect the uninitialized state of # the watched tensor. numeric_summary = dump.get_tensors("numeric_summary_uninit/a", 0, "DebugNumericSummary")[0] self.assertAllClose([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], numeric_summary[0:8]) self.assertTrue(np.isinf(numeric_summary[8])) self.assertGreater(numeric_summary[8], 0.0) self.assertTrue(np.isinf(numeric_summary[9])) self.assertLess(numeric_summary[9], 0.0) self.assertTrue(np.isnan(numeric_summary[10])) self.assertTrue(np.isnan(numeric_summary[11]))
def before_run(self, run_context): if not self._session_wrapper: self._session_wrapper = dumping_wrapper.DumpingDebugWrapperSession( run_context.session, self._session_root, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) self._session_wrapper.increment_run_call_count() # pylint: disable=protected-access debug_urls, watch_options = self._session_wrapper._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) # pylint: enable=protected-access run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options. tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) run_args = session_run_hook.SessionRunArgs(None, feed_dict=None, options=run_options) return run_args
def testWatchGraph_allNodes(self): debug_utils.watch_graph(self._run_options, self._graph, debug_ops=["DebugIdentity", "DebugNanCount"], debug_urls="file:///tmp/tfdbg_1") debug_watch_opts = self._run_options.debug_options.debug_tensor_watch_opts self.assertEqual(self._expected_num_nodes, len(debug_watch_opts)) # Verify that each of the nodes in the graph with output tensors in the # graph have debug tensor watch. node_names = self._verify_watches(debug_watch_opts, 0, ["DebugIdentity", "DebugNanCount"], ["file:///tmp/tfdbg_1"]) # Verify the node names. self.assertTrue("a1_init" in node_names) self.assertTrue("a1" in node_names) self.assertTrue("a1/Assign" in node_names) self.assertTrue("a1/read" in node_names) self.assertTrue("b_init" in node_names) self.assertTrue("b" in node_names) self.assertTrue("b/Assign" in node_names) self.assertTrue("b/read" in node_names) self.assertTrue("c" in node_names) self.assertTrue("p1" in node_names) self.assertTrue("s" in node_names)
def testOutputSlotWithoutOutgoingEdgeCanBeWatched(self): """Test watching output slots not attached to any outgoing edges.""" with session.Session() as sess: u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) u = constant_op.constant(u_init_val, shape=[2, 2], name="u") # Create a control edge from a node with an output: From u to z. # Node u will get executed only because of the control edge. The output # tensor u:0 is not attached to any outgoing edge in the graph. This test # checks that the debugger can watch such a tensor. with ops.control_dependencies([u]): z = control_flow_ops.no_op(name="z") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Assert that the DebugIdentity watch on u works properly. self.assertEqual(1, len(dump.dumped_tensor_data)) datum = dump.dumped_tensor_data[0] self.assertEqual("u", datum.node_name) self.assertEqual(0, datum.output_slot) self.assertEqual("DebugIdentity", datum.debug_op) self.assertAllClose([[5.0, 3.0], [-1.0, 0.0]], datum.get_tensor())
def _session_run_for_graph_structure_lookup(self): with session.Session() as sess: u_name = "testDumpGraphStructureLookup/u" v_name = "testDumpGraphStructureLookup/v" w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) u.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(w, options=run_options, run_metadata=run_metadata) self.assertEqual(self._expected_partition_graph_count, len(run_metadata.partition_graphs)) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) return u_name, v_name, w_name, dump
def before_run(self, run_context): if not self._wrapper_initialized: # TODO(cais): Make this hook have a DumpingDebugWrapperSession property # instead of subclassing DumpingDebugWrapperSession. dumping_wrapper.DumpingDebugWrapperSession.__init__( self, run_context.session, self._session_root, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) self._wrapper_initialized = True self._run_call_count += 1 debug_urls, watch_options = self._prepare_run_watch_config( run_context.original_args.fetches, run_context.original_args.feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=debug_urls, debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options.tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) run_args = session_run_hook.SessionRunArgs( None, feed_dict=None, options=run_options) return run_args
def testToggleBreakpointsWorks(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: v_1 = variables.VariableV1(50.0, name="v_1") v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i in (0, 2): # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2. self._server_1.request_watch( "delta_1", 0, "DebugIdentity", breakpoint=True) self._server_1.request_watch( "delta_2", 0, "DebugIdentity", breakpoint=True) else: # Disable the breakpoint in runs 1 and 3. self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") output = sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) self.assertAllClose([50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output) if i in (0, 2): # During runs 0 and 2, the server should have received the published # debug tensor delta:0:DebugIdentity. The breakpoint should have been # unblocked by EventReply reponses from the server. self.assertAllClose( [5.0], self._server_1.debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1.debug_tensor_values["delta_2:0:DebugIdentity"]) # After the runs, the server should have properly registered the # breakpoints due to the request_unwatch calls. self.assertSetEqual({("delta_1", 0, "DebugIdentity"), ("delta_2", 0, "DebugIdentity")}, self._server_1.breakpoints) else: # After the end of runs 1 and 3, the server has received the requests # to disable the breakpoint at delta:0:DebugIdentity. self.assertSetEqual(set(), self._server_1.breakpoints)
def testWatchGraph_tensorDTypeWhitelist(self): debug_utils.watch_graph(self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", tensor_dtype_regex_whitelist=".*_ref") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertItemsEqual(["a1", "a1/Assign", "b", "b/Assign"], node_names)
def testWatchGraph_opTypeWhitelist(self): debug_utils.watch_graph(self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", op_type_regex_whitelist="(Variable|MatMul)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(sorted(["a1", "b", "p1"]), sorted(node_names))
def testWatchGraph_tensorDTypeWhitelist(self): debug_utils.watch_graph( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", tensor_dtype_regex_whitelist=".*_ref") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertItemsEqual(["a1", "a1/Assign", "b", "b/Assign"], node_names)
def testWatchGraph_opTypeWhitelist(self): debug_utils.watch_graph( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", op_type_regex_whitelist="(Variable|MatMul)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(sorted(["a1", "b", "p1"]), sorted(node_names))
def _decorate_options_for_debug(self, options, graph): """Modify RunOptions.debug_options.debug_tensor_watch_opts for debugging. Args: options: (config_pb2.RunOptions) The RunOptions instance to be modified. graph: A TensorFlow Graph object. """ debug_utils.watch_graph( options, graph, debug_urls=self._get_run_debug_urls()) options.output_partition_graphs = True
def testWatchGraph_nodeNameAndOpTypeWhitelists(self): debug_utils.watch_graph(self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", node_name_regex_whitelist="([a-z]+1$)", op_type_regex_whitelist="(MatMul)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(["p1"], node_names)
def _decorate_options_for_debug(self, options, graph, watch_options): """Modify RunOptions.debug_options.debug_tensor_watch_opts for debugging.""" debug_utils.watch_graph( options, graph, debug_urls=self._get_run_debug_urls(), node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options.tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) options.output_partition_graphs = True
def testWatchGraph_nodeNameAndOpTypeWhitelists(self): debug_utils.watch_graph( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", node_name_regex_whitelist="([a-z]+1$)", op_type_regex_whitelist="(MatMul)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(["p1"], node_names)
def _decorate_options_for_debug(self, options, graph): """Modify RunOptions.debug_options.debug_tensor_watch_opts for debugging. Args: options: (config_pb2.RunOptions) The RunOptions instance to be modified. graph: A TensorFlow Graph object. """ debug_utils.watch_graph(options, graph, debug_urls=self._get_run_debug_urls()) options.output_partition_graphs = True
def testToggleBreakpointWorks(self): with session.Session(config=no_rewrite_session_config()) as sess: v = variables.Variable(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() # N.B.: These requests will be fulfilled not in this debugged # Session.run() invocation, but in the next one. if i in (0, 2): # Enable breakpoint at delta:0:DebugIdentity in runs 0 and 2. self._server_1.request_watch("delta", 0, "DebugIdentity", breakpoint=True) else: # Disable the breakpoint in runs 1 and 3. self._server_1.request_unwatch("delta", 0, "DebugIdentity") output = sess.run(inc_v, options=run_options, run_metadata=run_metadata) self.assertAllClose(50.0 + 5.0 * (i + 1), output) if i in (0, 2): # After the end of runs 0 and 2, the server has received the requests # to enable the breakpoint at delta:0:DebugIdentity. So the server # should keep track of the correct breakpoints. self.assertSetEqual({("delta", 0, "DebugIdentity")}, self._server_1.breakpoints) else: # During runs 1 and 3, the server should have received the published # debug tensor delta:0:DebugIdentity. The breakpoint should have been # unblocked by EventReply reponses from the server. self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta:0:DebugIdentity"]) # After the runs, the server should have properly removed the # breakpoints due to the request_unwatch calls. self.assertSetEqual(set(), self._server_1.breakpoints)
def testWatchingVariableUpdateOpsSeesUpdatedValues(self): """Watch output slots on Variable-updating ops, with no emitted edges.""" with session.Session() as sess: u_init = constant_op.constant(10.0) u = variables.Variable(u_init, name="gdo/u") v_init = constant_op.constant(20.0) v = variables.Variable(v_init, name="gdo/v") w = math_ops.multiply(u, v, name="gdo/w") # gdo stands for GradientDescentOptimizer. train_op = gradient_descent.GradientDescentOptimizer( learning_rate=0.1).minimize(w, name="gdo/train") u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) run_metadata = config_pb2.RunMetadata() sess.run(train_op, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) update_u_data = dump.watch_key_to_data( "gdo/train/update_gdo/u/ApplyGradientDescent:0:DebugIdentity") self.assertEqual(1, len(update_u_data)) # Gradient descent on u: w = u * v, so dw / du = v. # Updated value of u should be: # 10.0 - learning_rate * v = 10.0 - 0.1 * 20.0 = 8.0 self.assertAllClose(8.0, update_u_data[0].get_tensor()) update_v_data = dump.watch_key_to_data( "gdo/train/update_gdo/v/ApplyGradientDescent:0:DebugIdentity") self.assertEqual(1, len(update_v_data)) # Gradient descent on u: w = u * v, so dw / dv = u. # Updated value of u should be: # 20.0 - learning_rate * u = 20.0 - 0.1 * 10.0 = 19.0 self.assertAllClose(19.0, update_v_data[0].get_tensor()) # Verify that the Variables u and v are updated properly. self.assertAllClose(8.0, sess.run(u)) self.assertAllClose(19.0, sess.run(v))
def _decorate_options_for_debug(self, options, graph, watch_options): """Modify RunOptions.debug_options.debug_tensor_watch_opts for debugging.""" debug_utils.watch_graph( options, graph, debug_urls=self._get_run_debug_urls(), node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options. tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) options.output_partition_graphs = True
def testWatchGraph_nodeNameWhitelist(self): debug_utils.watch_graph( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", node_name_regex_whitelist="(a1$|a1_init$|a1/.*|p1$)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual( sorted(["a1_init", "a1", "a1/Assign", "a1/read", "p1"]), sorted(node_names))
def testWatchGraph_nodeNameWhitelist(self): debug_utils.watch_graph( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", node_name_regex_whitelist="(a1$|a1_init$|a1/.*|p1$)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual( sorted(["a1_init", "a1", "a1/Assign", "a1/read", "p1"]), sorted(node_names))
def testGradientsValuesFromDumpWorks(self): y = math_ops.add(self.w, -1.0, name="y") z = math_ops.square(y, name="z") grad_debugger = debug_gradients.GradientsDebugger() with grad_debugger.watch_gradients_by_tensors(self.sess.graph, [self.w, self.u, y]): train_op = gradient_descent.GradientDescentOptimizer(0.1).minimize( z) self.sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) dump_dir = tempfile.mkdtemp() debug_url = "file://" + dump_dir debug_utils.watch_graph(run_options, self.sess.graph, debug_urls=debug_url) run_metadata = config_pb2.RunMetadata() self.assertAllClose(2.0, self.sess.run(self.u)) self.sess.run(train_op, options=run_options, run_metadata=run_metadata) self.assertAllClose(-1.0, self.sess.run(self.u)) dump = debug_data.DebugDumpDir( dump_dir, partition_graphs=run_metadata.partition_graphs) dump.set_python_graph(self.sess.graph) y_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, y, dump) self.assertEqual(1, len(y_grad_values)) self.assertAllClose(10.0, y_grad_values[0]) w_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, self.w, dump) self.assertEqual(1, len(w_grad_values)) self.assertAllClose(10.0, w_grad_values[0]) u_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, self.u, dump) self.assertEqual(1, len(u_grad_values)) self.assertAllClose(30.0, u_grad_values[0]) with self.assertRaisesRegexp( LookupError, r"This GradientsDebugger has not received any gradient tensor for " r"x-tensor v:0"): debug_gradients.gradient_values_from_dump(grad_debugger, self.v, dump) # Cleanup. shutil.rmtree(dump_dir)
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session(config=no_rewrite_session_config()) as sess: v = variables.Variable(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=[ "DebugIdentity(gated_grpc=true)", "DebugNumericSummary(gated_grpc=true)" ], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() # N.B.: These requests will be fulfilled not in this debugged # Session.run() invocation, but in the next one. if i % 2 == 0: self._server_1.request_watch("delta", 0, "DebugIdentity") self._server_1.request_unwatch("delta", 0, "DebugNumericSummary") else: self._server_1.request_unwatch("delta", 0, "DebugIdentity") self._server_1.request_watch("delta", 0, "DebugNumericSummary") sess.run(inc_v, options=run_options, run_metadata=run_metadata) if i == 0: self.assertEqual(0, len(self._server_1.debug_tensor_values)) else: self.assertEqual(1, len(self._server_1.debug_tensor_values)) if i % 2 == 1: self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta:0:DebugIdentity"]) else: self.assertAllClose( [[ 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0 ]], self._server_1. debug_tensor_values["delta:0:DebugNumericSummary"])
def testGradientsValuesFromDumpWorks(self): y = math_ops.add(self.w, -1.0, name="y") z = math_ops.square(y, name="z") grad_debugger = debug_gradients.GradientsDebugger() with grad_debugger.watch_gradients_by_tensors( self.sess.graph, [self.w, self.u, y]): train_op = gradient_descent.GradientDescentOptimizer(0.1).minimize(z) self.sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) dump_dir = tempfile.mkdtemp() debug_url = "file://" + dump_dir debug_utils.watch_graph( run_options, self.sess.graph, debug_urls=debug_url) run_metadata = config_pb2.RunMetadata() self.assertAllClose(2.0, self.sess.run(self.u)) self.sess.run(train_op, options=run_options, run_metadata=run_metadata) self.assertAllClose(-1.0, self.sess.run(self.u)) dump = debug_data.DebugDumpDir( dump_dir, partition_graphs=run_metadata.partition_graphs) dump.set_python_graph(self.sess.graph) y_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, y, dump) self.assertEqual(1, len(y_grad_values)) self.assertAllClose(10.0, y_grad_values[0]) w_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, self.w, dump) self.assertEqual(1, len(w_grad_values)) self.assertAllClose(10.0, w_grad_values[0]) u_grad_values = debug_gradients.gradient_values_from_dump( grad_debugger, self.u, dump) self.assertEqual(1, len(u_grad_values)) self.assertAllClose(30.0, u_grad_values[0]) with self.assertRaisesRegexp( LookupError, r"This GradientsDebugger has not received any gradient tensor for " r"x-tensor v:0"): debug_gradients.gradient_values_from_dump(grad_debugger, self.v, dump) # Cleanup. shutil.rmtree(dump_dir)
def testLookUpNodePythonTracebackWorks(self): with session.Session() as sess: u_init = constant_op.constant(10.0) u = variables.Variable(u_init, name="traceback/u") v_init = constant_op.constant(20.0) v = variables.Variable(v_init, name="traceback/v") w = math_ops.multiply(u, v, name="traceback/w") sess.run(variables.global_variables_initializer()) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls=self._debug_urls()) sess.run(w, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) # Prior to setting the Python graph, attempts to do traceback lookup # should lead to exceptions. with self.assertRaisesRegexp( LookupError, "Python graph is not available for traceback lookup"): dump.node_traceback("traceback/w") dump.set_python_graph(sess.graph) # After setting the Python graph, attempts to look up nonexistent nodes # should lead to exceptions. with self.assertRaisesRegexp( KeyError, r"Cannot find node \"foo\" in Python graph"): dump.node_traceback("foo") # Lookup should work with node name input. traceback = dump.node_traceback("traceback/w") self.assertIsInstance(traceback, list) self.assertGreater(len(traceback), 0) for trace in traceback: self.assertIsInstance(trace, tuple) # Lookup should also work with tensor name input. traceback = dump.node_traceback("traceback/w:0") self.assertIsInstance(traceback, list) self.assertGreater(len(traceback), 0) for trace in traceback: self.assertIsInstance(trace, tuple)
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenServers(self): with session.Session(config=session_debug_testlib. no_rewrite_session_config()) as sess: v = variables.Variable(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[ self._debug_server_url_1, self._debug_server_url_2 ]) for i in xrange(4): self._server_1.clear_data() self._server_2.clear_data() if i % 2 == 0: self._server_1.request_watch("delta", 0, "DebugIdentity") self._server_2.request_watch("v", 0, "DebugIdentity") else: self._server_1.request_unwatch("delta", 0, "DebugIdentity") self._server_2.request_unwatch("v", 0, "DebugIdentity") sess.run(inc_v, options=run_options, run_metadata=run_metadata) if i % 2 == 0: self.assertEqual(1, len(self._server_1.debug_tensor_values)) self.assertEqual(1, len(self._server_2.debug_tensor_values)) self.assertAllClose( [5.0], self._server_1. debug_tensor_values["delta:0:DebugIdentity"]) self.assertAllClose([ 50 + 5.0 * i ], self._server_2.debug_tensor_values["v:0:DebugIdentity"]) else: self.assertEqual(0, len(self._server_1.debug_tensor_values)) self.assertEqual(0, len(self._server_2.debug_tensor_values))
def testToggleBreakpointWorks(self): with session.Session(config=no_rewrite_session_config()) as sess: v = variables.Variable(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() # N.B.: These requests will be fulfilled not in this debugged # Session.run() invocation, but in the next one. if i in (0, 2): # Enable breakpoint at delta:0:DebugIdentity in runs 0 and 2. self._server_1.request_watch( "delta", 0, "DebugIdentity", breakpoint=True) else: # Disable the breakpoint in runs 1 and 3. self._server_1.request_unwatch("delta", 0, "DebugIdentity") output = sess.run(inc_v, options=run_options, run_metadata=run_metadata) self.assertAllClose(50.0 + 5.0 * (i + 1), output) if i in (0, 2): # After the end of runs 0 and 2, the server has received the requests # to enable the breakpoint at delta:0:DebugIdentity. So the server # should keep track of the correct breakpoints. self.assertSetEqual({("delta", 0, "DebugIdentity")}, self._server_1.breakpoints) else: # During runs 1 and 3, the server should have received the published # debug tensor delta:0:DebugIdentity. The breakpoint should have been # unblocked by EventReply reponses from the server. self.assertAllClose( [5.0], self._server_1.debug_tensor_values["delta:0:DebugIdentity"]) # After the runs, the server should have properly removed the # breakpoints due to the request_unwatch calls. self.assertSetEqual(set(), self._server_1.breakpoints)
def testMultiGPUSessionRun(self): local_devices = device_lib.list_local_devices() gpu_device_names = [] for device in local_devices: if device.device_type == "GPU": gpu_device_names.append(device.name) gpu_device_names = sorted(gpu_device_names) if len(gpu_device_names) < 2: self.skipTest( "This test requires at least 2 GPUs, but only %d is available." % len(gpu_device_names)) with session.Session() as sess: v = variables.Variable([10.0, 15.0], dtype=dtypes.float32, name="v") with ops.device(gpu_device_names[0]): u0 = math_ops.add(v, v, name="u0") with ops.device(gpu_device_names[1]): u1 = math_ops.multiply(v, v, name="u1") w = math_ops.subtract(u1, u0, name="w") sess.run(v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls="file://" + self._dump_root) run_metadata = config_pb2.RunMetadata() self.assertAllClose([80.0, 195.0], sess.run(w, options=run_options, run_metadata=run_metadata)) debug_dump_dir = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(3, len(debug_dump_dir.devices())) self.assertAllClose([10.0, 15.0], debug_dump_dir.get_tensors( "v", 0, "DebugIdentity")[0]) self.assertAllClose([20.0, 30.0], debug_dump_dir.get_tensors( "u0", 0, "DebugIdentity")[0]) self.assertAllClose([100.0, 225.0], debug_dump_dir.get_tensors( "u1", 0, "DebugIdentity")[0])
def testToggleWatchesOnCoreMetadata(self): (_, debug_server_url, _, server_thread, server) = grpc_debug_test_server.start_server_on_separate_thread( dump_to_filesystem=False, toggle_watch_on_core_metadata=[("toggled_1", 0, "DebugIdentity"), ("toggled_2", 0, "DebugIdentity")]) self._servers_and_threads.append((server, server_thread)) with session.Session(config=session_debug_testlib. no_rewrite_session_config()) as sess: v_1 = variables.Variable(50.0, name="v_1") v_2 = variables.Variable(-50.0, name="v_1") # These two nodes have names that match those in the # toggle_watch_on_core_metadata argument used when calling # start_server_on_separate_thread(). toggled_1 = constant_op.constant(5.0, name="toggled_1") toggled_2 = constant_op.constant(-5.0, name="toggled_2") inc_v_1 = state_ops.assign_add(v_1, toggled_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, toggled_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[debug_server_url]) for i in xrange(4): server.clear_data() sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) if i % 2 == 0: self.assertEqual(2, len(server.debug_tensor_values)) self.assertAllClose([ 5.0 ], server.debug_tensor_values["toggled_1:0:DebugIdentity"]) self.assertAllClose([ -5.0 ], server.debug_tensor_values["toggled_2:0:DebugIdentity"]) else: self.assertEqual(0, len(server.debug_tensor_values))
def _compareOriginalAndReconstructedGraphDefs(self, sess, fetches, feed_dict=None, expected_output=None): run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() output = sess.run(fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if expected_output is not None: self.assertAllClose(expected_output, output) non_debug_graph_defs = run_metadata.partition_graphs debug_utils.watch_graph(run_options, sess.graph, debug_urls=self._debug_url) run_metadata = config_pb2.RunMetadata() output = sess.run(fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if expected_output is not None: self.assertAllClose(expected_output, output) dump = debug_data.DebugDumpDir( self._dump_dir, partition_graphs=run_metadata.partition_graphs, validate=True) reconstructed = dump.reconstructed_non_debug_partition_graphs() self.assertEqual(len(non_debug_graph_defs), len(reconstructed)) for i, non_debug_graph_def in enumerate(non_debug_graph_defs): device_name = debug_graphs._infer_device_name(non_debug_graph_def) test_util.assert_equal_graph_def( self._graphDefWithoutBlacklistedNodes( reconstructed[device_name]), self._graphDefWithoutBlacklistedNodes(non_debug_graph_def)) # Test debug_graphs.reconstruct_non_debug_graph_def. reconstructed_again = ( debug_graphs.reconstruct_non_debug_graph_def( run_metadata.partition_graphs[i])) test_util.assert_equal_graph_def( self._graphDefWithoutBlacklistedNodes(reconstructed_again), self._graphDefWithoutBlacklistedNodes(non_debug_graph_def))
def createAndRunGraphHelper(self): """Create and run a TensorFlow Graph to generate debug dumps. This is intentionally done in separate method, to make it easier to test the stack-top mode of source annotation. """ self.dump_root = self.get_temp_dir() self.curr_file_path = os.path.abspath( tf_inspect.getfile(tf_inspect.currentframe())) # Run a simple TF graph to generate some debug dumps that can be used in # source annotation. with session.Session() as sess: self.u_init = constant_op.constant(np.array([[5.0, 3.0], [-1.0, 0.0]]), shape=[2, 2], name="u_init") self.u_init_line_number = line_number_above() self.u = variables.Variable(self.u_init, name="u") self.u_line_number = line_number_above() self.v_init = constant_op.constant(np.array([[2.0], [-1.0]]), shape=[2, 1], name="v_init") self.v_init_line_number = line_number_above() self.v = variables.Variable(self.v_init, name="v") self.v_line_number = line_number_above() self.w = math_ops.matmul(self.u, self.v, name="w") self.w_line_number = line_number_above() self.evaluate(self.u.initializer) self.evaluate(self.v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls=["file://%s" % self.dump_root]) run_metadata = config_pb2.RunMetadata() sess.run(self.w, options=run_options, run_metadata=run_metadata) self.dump = debug_data.DebugDumpDir( self.dump_root, partition_graphs=run_metadata.partition_graphs) self.dump.set_python_graph(sess.graph)
def testToggleWatchesOnCoreMetadata(self): (_, debug_server_url, _, server_thread, server) = grpc_debug_test_server.start_server_on_separate_thread( dump_to_filesystem=False, toggle_watch_on_core_metadata=[("toggled_1", 0, "DebugIdentity"), ("toggled_2", 0, "DebugIdentity")]) self._servers_and_threads.append((server, server_thread)) with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: v_1 = variables.VariableV1(50.0, name="v_1") v_2 = variables.VariableV1(-50.0, name="v_1") # These two nodes have names that match those in the # toggle_watch_on_core_metadata argument used when calling # start_server_on_separate_thread(). toggled_1 = constant_op.constant(5.0, name="toggled_1") toggled_2 = constant_op.constant(-5.0, name="toggled_2") inc_v_1 = state_ops.assign_add(v_1, toggled_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, toggled_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[debug_server_url]) for i in xrange(4): server.clear_data() sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) if i % 2 == 0: self.assertEqual(2, len(server.debug_tensor_values)) self.assertAllClose( [5.0], server.debug_tensor_values["toggled_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], server.debug_tensor_values["toggled_2:0:DebugIdentity"]) else: self.assertEqual(0, len(server.debug_tensor_values))
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session(config=no_rewrite_session_config()) as sess: v = variables.Variable(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)", "DebugNumericSummary(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() # N.B.: These requests will be fulfilled not in this debugged # Session.run() invocation, but in the next one. if i % 2 == 0: self._server_1.request_watch("delta", 0, "DebugIdentity") self._server_1.request_unwatch("delta", 0, "DebugNumericSummary") else: self._server_1.request_unwatch("delta", 0, "DebugIdentity") self._server_1.request_watch("delta", 0, "DebugNumericSummary") sess.run(inc_v, options=run_options, run_metadata=run_metadata) if i == 0: self.assertEqual(0, len(self._server_1.debug_tensor_values)) else: self.assertEqual(1, len(self._server_1.debug_tensor_values)) if i % 2 == 1: self.assertAllClose( [5.0], self._server_1.debug_tensor_values["delta:0:DebugIdentity"]) else: self.assertAllClose( [[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0]], self._server_1.debug_tensor_values[ "delta:0:DebugNumericSummary"])
def createAndRunGraphHelper(self): """Create and run a TensorFlow Graph to generate debug dumps. This is intentionally done in separate method, to make it easier to test the stack-top mode of source annotation. """ self.dump_root = self.get_temp_dir() self.curr_file_path = os.path.abspath( tf_inspect.getfile(tf_inspect.currentframe())) # Run a simple TF graph to generate some debug dumps that can be used in # source annotation. with session.Session() as sess: self.u_init = constant_op.constant( np.array([[5.0, 3.0], [-1.0, 0.0]]), shape=[2, 2], name="u_init") self.u_init_line_number = line_number_above() self.u = variables.Variable(self.u_init, name="u") self.u_line_number = line_number_above() self.v_init = constant_op.constant( np.array([[2.0], [-1.0]]), shape=[2, 1], name="v_init") self.v_init_line_number = line_number_above() self.v = variables.Variable(self.v_init, name="v") self.v_line_number = line_number_above() self.w = math_ops.matmul(self.u, self.v, name="w") self.w_line_number = line_number_above() sess.run(self.u.initializer) sess.run(self.v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_urls=["file://%s" % self.dump_root]) run_metadata = config_pb2.RunMetadata() sess.run(self.w, options=run_options, run_metadata=run_metadata) self.dump = debug_data.DebugDumpDir( self.dump_root, partition_graphs=run_metadata.partition_graphs) self.dump.set_python_graph(sess.graph)
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenServers(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: v = variables.VariableV1(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(v.initializer) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)"], debug_urls=[self._debug_server_url_1, self._debug_server_url_2]) for i in xrange(4): self._server_1.clear_data() self._server_2.clear_data() if i % 2 == 0: self._server_1.request_watch("delta", 0, "DebugIdentity") self._server_2.request_watch("v", 0, "DebugIdentity") else: self._server_1.request_unwatch("delta", 0, "DebugIdentity") self._server_2.request_unwatch("v", 0, "DebugIdentity") sess.run(inc_v, options=run_options, run_metadata=run_metadata) if i % 2 == 0: self.assertEqual(1, len(self._server_1.debug_tensor_values)) self.assertEqual(1, len(self._server_2.debug_tensor_values)) self.assertAllClose( [5.0], self._server_1.debug_tensor_values["delta:0:DebugIdentity"]) self.assertAllClose( [50 + 5.0 * i], self._server_2.debug_tensor_values["v:0:DebugIdentity"]) else: self.assertEqual(0, len(self._server_1.debug_tensor_values)) self.assertEqual(0, len(self._server_2.debug_tensor_values))
def testMultiGPUSessionRun(self): local_devices = device_lib.list_local_devices() gpu_device_names = [] for device in local_devices: if device.device_type == "GPU": gpu_device_names.append(device.name) gpu_device_names = sorted(gpu_device_names) if len(gpu_device_names) < 2: self.skipTest( "This test requires at least 2 GPUs, but only %d is available." % len(gpu_device_names)) with session.Session() as sess: v = variables.Variable([10.0, 15.0], dtype=dtypes.float32, name="v") with ops.device(gpu_device_names[0]): u0 = math_ops.add(v, v, name="u0") with ops.device(gpu_device_names[1]): u1 = math_ops.multiply(v, v, name="u1") w = math_ops.subtract(u1, u0, name="w") sess.run(v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls="file://" + self._dump_root) run_metadata = config_pb2.RunMetadata() self.assertAllClose( [80.0, 195.0], sess.run(w, options=run_options, run_metadata=run_metadata)) debug_dump_dir = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(3, len(debug_dump_dir.devices())) self.assertAllClose( [10.0, 15.0], debug_dump_dir.get_tensors("v", 0, "DebugIdentity")[0]) self.assertAllClose( [20.0, 30.0], debug_dump_dir.get_tensors("u0", 0, "DebugIdentity")[0]) self.assertAllClose( [100.0, 225.0], debug_dump_dir.get_tensors("u1", 0, "DebugIdentity")[0])
def _decorate_run_options_for_debug( self, run_options, debug_urls, debug_ops="DebugIdentity", node_name_regex_whitelist=None, op_type_regex_whitelist=None, tensor_dtype_regex_whitelist=None, tolerate_debug_op_creation_failures=False): """Modify a RunOptions object for debug tensor watching. Specifies request for outputting partition graphs. Adds debug_tensor_watch_opts with proper debug URLs. Args: run_options: (RunOptions) the modified RunOptions object. debug_urls: (list of str) debug URLs to be entered in run_options. debug_tensor_watch_opts. debug_ops: (str or list of str) debug op(s) to be used by the debugger. node_name_regex_whitelist: Regular-expression whitelist for node name. op_type_regex_whitelist: Regular-expression whitelist for op type. tensor_dtype_regex_whitelist: Regular-expression whitelist for tensor dtype. tolerate_debug_op_creation_failures: Whether debug op creation failures are to be tolerated. """ run_options.output_partition_graphs = True debug_utils.watch_graph( run_options, self._sess.graph, debug_urls=debug_urls, debug_ops=debug_ops, node_name_regex_whitelist=node_name_regex_whitelist, op_type_regex_whitelist=op_type_regex_whitelist, tensor_dtype_regex_whitelist=tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures= tolerate_debug_op_creation_failures, reset_disk_byte_usage=(self._run_call_count == 1 or self._is_disk_usage_reset_each_run()))
def _compareOriginalAndReconstructedGraphDefs(self, sess, fetches, feed_dict=None, expected_output=None): run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() output = sess.run(fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if expected_output is not None: self.assertAllClose(expected_output, output) non_debug_graph_defs = run_metadata.partition_graphs debug_utils.watch_graph( run_options, sess.graph, debug_urls=self._debug_url) run_metadata = config_pb2.RunMetadata() output = sess.run(fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if expected_output is not None: self.assertAllClose(expected_output, output) dump = debug_data.DebugDumpDir( self._dump_dir, partition_graphs=run_metadata.partition_graphs, validate=True) reconstructed = dump.reconstructed_non_debug_partition_graphs() self.assertEqual(len(non_debug_graph_defs), len(reconstructed)) for i, non_debug_graph_def in enumerate(non_debug_graph_defs): device_name = debug_graphs._infer_device_name(non_debug_graph_def) test_util.assert_equal_graph_def( self._graphDefWithoutBlacklistedNodes(reconstructed[device_name]), self._graphDefWithoutBlacklistedNodes(non_debug_graph_def)) # Test debug_graphs.reconstruct_non_debug_graph_def. reconstructed_again = ( debug_graphs.reconstruct_non_debug_graph_def( run_metadata.partition_graphs[i])) test_util.assert_equal_graph_def( self._graphDefWithoutBlacklistedNodes(reconstructed_again), self._graphDefWithoutBlacklistedNodes(non_debug_graph_def))
def _decorate_run_options_for_debug( self, run_options, debug_urls, debug_ops="DebugIdentity", node_name_regex_whitelist=None, op_type_regex_whitelist=None, tensor_dtype_regex_whitelist=None, tolerate_debug_op_creation_failures=False): """Modify a RunOptions object for debug tensor watching. Specifies request for outputting partition graphs. Adds debug_tensor_watch_opts with proper debug URLs. Args: run_options: (RunOptions) the modified RunOptions object. debug_urls: (list of str) debug URLs to be entered in run_options. debug_tensor_watch_opts. debug_ops: (str or list of str) debug op(s) to be used by the debugger. node_name_regex_whitelist: Regular-expression whitelist for node name. op_type_regex_whitelist: Regular-expression whitelist for op type. tensor_dtype_regex_whitelist: Regular-expression whitelist for tensor dtype. tolerate_debug_op_creation_failures: Whether debug op creation failures are to be tolerated. """ run_options.output_partition_graphs = True debug_utils.watch_graph( run_options, self._sess.graph, debug_urls=debug_urls, debug_ops=debug_ops, node_name_regex_whitelist=node_name_regex_whitelist, op_type_regex_whitelist=op_type_regex_whitelist, tensor_dtype_regex_whitelist=tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=tolerate_debug_op_creation_failures, reset_disk_byte_usage=(self._run_call_count == 1 or self._is_disk_usage_reset_each_run()))
def before_run(self, run_context): """Called right before a session is run. Args: run_context: A session_run_hook.SessionRunContext. Encapsulates information on the run. Returns: A session_run_hook.SessionRunArgs object. """ if not self._grpc_debug_wrapper_session: self._grpc_debug_wrapper_session = grpc_wrapper.GrpcDebugWrapperSession( run_context.session, self._grpc_debug_server_addresses, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) fetches = run_context.original_args.fetches feed_dict = run_context.original_args.feed_dict watch_options = self._watch_fn(fetches, feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=self._grpc_debug_wrapper_session.prepare_run_debug_urls( fetches, feed_dict), debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options. tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) return session_run_hook.SessionRunArgs(None, feed_dict=None, options=run_options)
def testDebuggingDuringOpError(self): """Test the debug tensor dumping when error occurs in graph runtime.""" with session.Session() as sess: ph = array_ops.placeholder(dtypes.float32, name="mismatch/ph") x = array_ops.transpose(ph, name="mismatch/x") m = constant_op.constant(np.array([[1.0, 2.0]], dtype=np.float32), name="mismatch/m") y = math_ops.matmul(m, x, name="mismatch/y") run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=self._debug_urls()) with self.assertRaises(errors.OpError): sess.run(y, options=run_options, feed_dict={ph: np.array([[-3.0], [0.0]])}) dump = debug_data.DebugDumpDir(self._dump_root) # Despite the fact that the run() call errored out and partition_graphs # are not available via run_metadata, the partition graphs should still # have been loaded from the dump directory. self.assertTrue(dump.loaded_partition_graphs()) m_dumps = dump.watch_key_to_data("mismatch/m:0:DebugIdentity") self.assertEqual(1, len(m_dumps)) self.assertAllClose(np.array([[1.0, 2.0]]), m_dumps[0].get_tensor()) x_dumps = dump.watch_key_to_data("mismatch/x:0:DebugIdentity") self.assertEqual(1, len(x_dumps)) self.assertAllClose(np.array([[-3.0, 0.0]]), x_dumps[0].get_tensor())
def before_run(self, run_context): """Called right before a session is run. Args: run_context: A session_run_hook.SessionRunContext. Encapsulates information on the run. Returns: A session_run_hook.SessionRunArgs object. """ if not self._grpc_debug_wrapper_session: self._grpc_debug_wrapper_session = grpc_wrapper.GrpcDebugWrapperSession( run_context.session, self._grpc_debug_server_addresses, watch_fn=self._watch_fn, thread_name_filter=self._thread_name_filter, log_usage=self._log_usage) fetches = run_context.original_args.fetches feed_dict = run_context.original_args.feed_dict watch_options = self._watch_fn(fetches, feed_dict) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, run_context.session.graph, debug_urls=self._grpc_debug_wrapper_session.prepare_run_debug_urls( fetches, feed_dict), debug_ops=watch_options.debug_ops, node_name_regex_whitelist=watch_options.node_name_regex_whitelist, op_type_regex_whitelist=watch_options.op_type_regex_whitelist, tensor_dtype_regex_whitelist=watch_options.tensor_dtype_regex_whitelist, tolerate_debug_op_creation_failures=( watch_options.tolerate_debug_op_creation_failures)) return session_run_hook.SessionRunArgs( None, feed_dict=None, options=run_options)
def testDistributedRunWithGatedGrpcCommunicatesWithDebugServerCorrectly(self): graph = self._createGraph() with session.Session( config=self.session_config, graph=graph, target=self.server_target) as sess: sess.run(self.a.initializer) sess.run(self.b.initializer) run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, sess.graph, node_name_regex_whitelist=r"a", debug_ops=["DebugIdentity"], debug_urls=[self.debug_server_url]) # Test gated_grpc for an op located on the worker, i.e., on the same # host as where MasterSession is. # TODO(cais): gRPC gating of debug ops does not work on partition graphs # not located on MasterSession hosts (e.g., parameter servers) yet. Make # it work. debug_utils.watch_graph( run_options, sess.graph, node_name_regex_whitelist=r"p", debug_ops=["DebugIdentity(gated_grpc=True)"], debug_urls=[self.debug_server_url]) for i in xrange(4): if i % 2 == 0: self.debug_server.request_watch("p", 0, "DebugIdentity") else: self.debug_server.request_unwatch("p", 0, "DebugIdentity") expected_p = (10.0 + 2.0 * (i + 1)) * (100.0 - 5.0 * (i + 1)) self.assertAllClose(-expected_p, sess.run(self.q, options=run_options)) self.assertEqual(1, len(self.debug_server.core_metadata_json_strings)) core_metadata = json.loads( self.debug_server.core_metadata_json_strings[0]) self.assertEqual([], core_metadata["input_names"]) self.assertEqual(["q:0"], core_metadata["output_names"]) self.assertEqual(i, core_metadata["executor_step_index"]) if i == 0: self.assertEqual(1, len(self.debug_server.partition_graph_defs)) # Tensor "a" is from a PS. It may take longer to arrive due to the fact # that the stream connection between the PS and the debug server is # persistent and not torn down at the end of each Session.run() self._pollingAssertDebugTensorValuesAllClose([10.0 + 2.0 * i], "a:0:DebugIdentity") # Due to the gRPC gating of the debug op for "p", the debug tensor # should be available on odd-indexed runs. if i % 2 == 0: self.assertAllClose( [expected_p], self.debug_server.debug_tensor_values["p:0:DebugIdentity"]) else: self.assertNotIn("p:0:DebugIdentity", self.debug_server.debug_tensor_values) self.assertNotIn("b:0:DebugIdentity", self.debug_server.debug_tensor_values) self.debug_server.clear_data()
def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session(config=no_rewrite_session_config()) as sess: v_1 = variables.Variable(50.0, name="v_1") v_2 = variables.Variable(-50.0, name="v_1") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2") sess.run([v_1.initializer, v_2.initializer]) run_metadata = config_pb2.RunMetadata() run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity(gated_grpc=true)", "DebugNumericSummary(gated_grpc=true)"], debug_urls=[self._debug_server_url_1]) for i in xrange(4): self._server_1.clear_data() if i % 2 == 0: self._server_1.request_watch("delta_1", 0, "DebugIdentity") self._server_1.request_watch("delta_2", 0, "DebugIdentity") self._server_1.request_unwatch("delta_1", 0, "DebugNumericSummary") self._server_1.request_unwatch("delta_2", 0, "DebugNumericSummary") else: self._server_1.request_unwatch("delta_1", 0, "DebugIdentity") self._server_1.request_unwatch("delta_2", 0, "DebugIdentity") self._server_1.request_watch("delta_1", 0, "DebugNumericSummary") self._server_1.request_watch("delta_2", 0, "DebugNumericSummary") sess.run([inc_v_1, inc_v_2], options=run_options, run_metadata=run_metadata) # Watched debug tensors are: # Run 0: delta_[1,2]:0:DebugIdentity # Run 1: delta_[1,2]:0:DebugNumericSummary # Run 2: delta_[1,2]:0:DebugIdentity # Run 3: delta_[1,2]:0:DebugNumericSummary self.assertEqual(2, len(self._server_1.debug_tensor_values)) if i % 2 == 0: self.assertAllClose( [5.0], self._server_1.debug_tensor_values["delta_1:0:DebugIdentity"]) self.assertAllClose( [-5.0], self._server_1.debug_tensor_values["delta_2:0:DebugIdentity"]) else: self.assertAllClose( [[1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0, 5.0, 0.0, 1.0, 0.0]], self._server_1.debug_tensor_values[ "delta_1:0:DebugNumericSummary"]) self.assertAllClose( [[1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -5.0, -5.0, -5.0, 0.0, 1.0, 0.0]], self._server_1.debug_tensor_values[ "delta_2:0:DebugNumericSummary"])