def testRangeReadingGraphExecutionTraces(self, begin, end, expected_begin, expected_end): writer = debug_events_writer.DebugEventsWriter( self.dump_root, self.tfdbg_run_id, circular_buffer_size=-1) debugged_graph = debug_event_pb2.DebuggedGraph( graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) for i in range(5): op_name = "Op_%d" % i graph_op_creation = debug_event_pb2.GraphOpCreation( op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() writer.Close() with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() traces = reader.graph_execution_traces(begin=begin, end=end) self.assertLen(traces, expected_end - expected_begin) self.assertEqual(traces[0].op_name, "Op_%d" % expected_begin) self.assertEqual(traces[-1].op_name, "Op_%d" % (expected_end - 1))
def testWriteGraphExecutionTraceEventsWithCyclicBuffer(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root) num_execution_events = debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) trace_paths = glob.glob( os.path.join(self.dump_root, "*.graph_execution_traces")) self.assertEqual(len(trace_paths), 1) actuals = ReadDebugEvents(trace_paths[0]) # Before FlushExecutionFiles() is called. No data should have been written # to the file. self.assertEqual(len(actuals), 0) writer.FlushExecutionFiles() actuals = ReadDebugEvents(trace_paths[0]) self.assertEqual(len(actuals), debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE) for i in range(debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE): self.assertEqual( actuals[i].graph_execution_trace.op_name, "Op%d" % (i + debug_events_writer.DEFAULT_CYCLIC_BUFFER_SIZE))
def testWriteGraphExecutionTraceEventsWithCircularBuffer(self): writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list(reader.graph_execution_traces_iterators()[0]) # Before FlushExecutionFiles() is called. No data should have been written # to the file. self.assertEmpty(actuals) writer.FlushExecutionFiles() actuals = list( item.debug_event.graph_execution_trace for item in reader.graph_execution_traces_iterators()[0]) self.assertLen(actuals, debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE) for i in range(debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE): self.assertEqual( actuals[i].op_name, "Op%d" % (i + debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE))
def write_graph_execution_trace(): with graph_execution_trace_state["lock"]: op_name = "Op%d" % graph_execution_trace_state["counter"] graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") graph_execution_trace_state["counter"] += 1 writer.WriteGraphOpCreation(graph_op_creation) writer.WriteGraphExecutionTrace(trace)
def testReadingTwoFileSetsWithTheSameDumpRootSucceeds(self): # To simulate a multi-host data dump, we first generate file sets in two # different directories, with the same tfdbg_run_id, and then combine them. tfdbg_run_id = "foo" for i in range(2): writer = debug_events_writer.DebugEventsWriter( os.path.join(self.dump_root, str(i)), tfdbg_run_id, circular_buffer_size=-1) if i == 0: debugged_graph = debug_event_pb2.DebuggedGraph( graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) op_name = "Op_0" graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) op_name = "Op_1" graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) for _ in range(10): trace = debug_event_pb2.GraphExecutionTrace( op_name="Op_%d" % i, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() # Move all files from the subdirectory /1 to subdirectory /0. dump_root_0 = os.path.join(self.dump_root, "0") src_paths = glob.glob(os.path.join(self.dump_root, "1", "*")) for src_path in src_paths: dst_path = os.path.join( dump_root_0, # Rename the file set to avoid file name collision. re.sub(r"(tfdbg_events\.\d+)", r"\g<1>1", os.path.basename(src_path))) os.rename(src_path, dst_path) with debug_events_reader.DebugDataReader(dump_root_0) as reader: reader.update() # Verify the content of the .graph_execution_traces file. trace_digests = reader.graph_execution_traces(digest=True) self.assertLen(trace_digests, 20) for _ in range(10): trace = reader.read_graph_execution_trace(trace_digests[i]) self.assertEqual(trace.op_name, "Op_0") for _ in range(10): trace = reader.read_graph_execution_trace(trace_digests[i + 10]) self.assertEqual(trace.op_name, "Op_1")
def write_and_update_job(): while True: if writer_state["done"]: break op_name = "Op%d" % writer_state["counter"] graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer_state["counter"] += 1 writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() reader.update()
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) writer.FlushExecutionFiles() with debug_events_reader.DebugEventsReader(self.dump_root) as reader: actuals = list(reader.graph_execution_traces_iterator()) self.assertLen(actuals, num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].graph_execution_trace.op_name, "Op%d" % i)
def testWriteGraphExecutionTraceEventsWithoutCircularBufferBehavior(self): # A circular buffer size of 0 abolishes the circular buffer behavior. writer = debug_events_writer.DebugEventsWriter(self.dump_root, 0) num_execution_events = debug_events_writer.DEFAULT_CIRCULAR_BUFFER_SIZE * 2 for i in range(num_execution_events): trace = debug_event_pb2.GraphExecutionTrace() trace.op_name = "Op%d" % i writer.WriteGraphExecutionTrace(trace) writer.FlushExecutionFiles() trace_paths = glob.glob( os.path.join(self.dump_root, "*.graph_execution_traces")) self.assertEqual(len(trace_paths), 1) actuals = ReadDebugEvents(trace_paths[0]) self.assertEqual(len(actuals), num_execution_events) for i in range(num_execution_events): self.assertEqual(actuals[i].graph_execution_trace.op_name, "Op%d" % i)
def testConcurrentGraphExecutionTraceRandomReads(self): circular_buffer_size = -1 writer = debug_events_writer.DebugEventsWriter(self.dump_root, self.tfdbg_run_id, circular_buffer_size) debugged_graph = debug_event_pb2.DebuggedGraph(graph_id="graph1", graph_name="graph1") writer.WriteDebuggedGraph(debugged_graph) for i in range(100): op_name = "Op%d" % i graph_op_creation = debug_event_pb2.GraphOpCreation( op_type="FooOp", op_name=op_name, graph_id="graph1") writer.WriteGraphOpCreation(graph_op_creation) trace = debug_event_pb2.GraphExecutionTrace( op_name=op_name, tfdbg_context_id="graph1") writer.WriteGraphExecutionTrace(trace) writer.FlushNonExecutionFiles() writer.FlushExecutionFiles() reader = debug_events_reader.DebugDataReader(self.dump_root) reader.update() traces = [None] * 100 def read_job_1(): digests = reader.graph_execution_traces(digest=True) for i in range(49, -1, -1): traces[i] = reader.read_graph_execution_trace(digests[i]) def read_job_2(): digests = reader.graph_execution_traces(digest=True) for i in range(99, 49, -1): traces[i] = reader.read_graph_execution_trace(digests[i]) thread_1 = threading.Thread(target=read_job_1) thread_2 = threading.Thread(target=read_job_2) thread_1.start() thread_2.start() thread_1.join() thread_2.join() for i in range(100): self.assertEqual(traces[i].op_name, "Op%d" % i)
def WriteGraphExecutionTrace(): trace = debug_event_pb2.GraphExecutionTrace() with graph_execution_trace_state["lock"]: trace.op_name = "Op%d" % graph_execution_trace_state["counter"] graph_execution_trace_state["counter"] += 1 writer.WriteGraphExecutionTrace(trace)