def testSessionLogStartMessageDiscardsExpiredEvents(self): """Test that SessionLog.START message discards expired events. This discard logic is preferred over the out-of-order step discard logic, but this logic can only be used for event protos which have the SessionLog enum, which was introduced to event.proto for file_version >= brain.Event:2. """ gen = _EventGenerator(self) acc = ea.EventAccumulator(gen) gen.AddEvent( tf.Event(wall_time=0, step=1, file_version='brain.Event:2')) gen.AddScalar('s1', wall_time=1, step=100, value=20) gen.AddScalar('s1', wall_time=1, step=200, value=20) gen.AddScalar('s1', wall_time=1, step=300, value=20) gen.AddScalar('s1', wall_time=1, step=400, value=20) gen.AddScalar('s2', wall_time=1, step=202, value=20) gen.AddScalar('s2', wall_time=1, step=203, value=20) slog = tf.SessionLog(status=tf.SessionLog.START) gen.AddEvent(tf.Event(wall_time=2, step=201, session_log=slog)) acc.Reload() self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 200]) self.assertEqual([x.step for x in acc.Scalars('s2')], [])
def _GenerateTestData(self): """Generates the test data directory. The test data has a single run named run1 which contains: - a histogram - an image at timestamp and step 0 - scalar events containing the value i at step 10 * i and wall time 100 * i, for i in [1, _SCALAR_COUNT). - a graph definition """ temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir) run1_path = os.path.join(temp_dir, 'run1') os.makedirs(run1_path) writer = tf.train.SummaryWriter(run1_path) histogram_value = tf.HistogramProto(min=0, max=2, num=3, sum=6, sum_squares=5, bucket_limit=[0, 1, 2], bucket=[1, 1, 1]) # Add a simple graph event. graph_def = tf.GraphDef() node1 = graph_def.node.add() node1.name = 'a' node2 = graph_def.node.add() node2.name = 'b' node2.attr['very_large_attr'].s = b'a' * 2048 # 2 KB attribute writer.add_event(tf.Event(graph_def=graph_def.SerializeToString())) # 1x1 transparent GIF. encoded_image = base64.b64decode( 'R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7') image_value = tf.Summary.Image(height=1, width=1, colorspace=1, encoded_image_string=encoded_image) writer.add_event(tf.Event(wall_time=0, step=0, summary=tf.Summary(value=[tf.Summary.Value( tag='histogram', histo=histogram_value), tf.Summary.Value( tag='image', image=image_value)]))) # Write 100 simple values. for i in xrange(1, self._SCALAR_COUNT + 1): writer.add_event(tf.Event( # We use different values for wall time, step, and the value so we can # tell them apart. wall_time=100 * i, step=10 * i, summary=tf.Summary(value=[tf.Summary.Value(tag='simple_values', simple_value=i)]))) writer.flush() writer.close()
def testOnlySummaryEventsTriggerDiscards(self): """Test that file version event doesnt trigger data purge.""" gen = _EventGenerator() acc = ea.EventAccumulator(gen) gen.AddScalar('s1', wall_time=1, step=100, value=20) ev1 = tf.Event(wall_time=2, step=0, file_version='0') ev2 = tf.Event(wall_time=3, step=0, graph_def=graph_pb2.GraphDef()) gen.AddEvent(ev1) gen.AddEvent(ev2) acc.Reload() self.assertEqual([x.step for x in acc.Scalars('s1')], [100])
def testOnlySummaryEventsTriggerDiscards(self): """Test that file version event does not trigger data purge.""" gen = _EventGenerator(self) acc = ea.EventAccumulator(gen) gen.AddScalarTensor('s1', wall_time=1, step=100, value=20) ev1 = tf.Event(wall_time=2, step=0, file_version='brain.Event:1') graph_bytes = tf.GraphDef().SerializeToString() ev2 = tf.Event(wall_time=3, step=0, graph_def=graph_bytes) gen.AddEvent(ev1) gen.AddEvent(ev2) acc.Reload() self.assertEqual([x.step for x in acc.Tensors('s1')], [100])
def testMarkReset(self): event1 = tf.Event(step=123) event2 = tf.Event(step=456) path = self._save_records('events.out.tfevents.0.localhost', [event1.SerializeToString(), event2.SerializeToString()]) with self.EventLog(path) as log: log.mark() self.assertEqual(event1, log.get_next_event()) log.reset() self.assertEqual(event1, log.get_next_event()) self.assertEqual(event2, log.get_next_event()) self.assertIsNone(log.get_next_event())
def testMarkWithShrinkingBatchSize_raisesValueError(self): event1 = tf.Event(step=123) event2 = tf.Event(step=456) path = self._save_records('events.out.tfevents.0.localhost', [event1.SerializeToString(), event2.SerializeToString()]) with self.EventLog(path) as log: log.mark() self.assertEqual(event1, log.get_next_event()) self.assertEqual(event2, log.get_next_event()) log.reset() self.assertEqual(event1, log.get_next_event()) with six.assertRaisesRegex(self, ValueError, r'monotonic'): log.mark()
def testReadOneEvent(self): event = tf.Event(step=123) path = self._save_records('events.out.tfevents.0.localhost', [event.SerializeToString()]) with self.EventLog(path) as log: self.assertEqual(event, log.get_next_event()) self.assertIsNone(log.get_next_event())
def _check_health_pills_in_events_file(self, events_file_path, debug_key_to_tensors): reader = tf.compat.v1.python_io.tf_record_iterator(events_file_path) event_read = tf.Event() # The first event in the file should contain the events version, which is # important because without it, TensorBoard may purge health pill events. event_read.ParseFromString(next(reader)) self.assertEqual("brain.Event:2", event_read.file_version) health_pills = {} while True: next_event = next(reader, None) if not next_event: break event_read.ParseFromString(next_event) values = event_read.summary.value if values: if (values[0].metadata.plugin_data.plugin_name == constants.DEBUGGER_PLUGIN_NAME): debug_key = values[0].node_name if debug_key not in health_pills: health_pills[debug_key] = [ tf_debug.load_tensor_from_event(event_read) ] else: health_pills[debug_key].append( tf_debug.load_tensor_from_event(event_read)) for debug_key in debug_key_to_tensors: tensors = debug_key_to_tensors[debug_key] for i, tensor in enumerate(tensors): self.assertAllClose(self._compute_health_pill(tensor), health_pills[debug_key][i])
def _CreateEventWithDebugNumericSummary( self, device_name, op_name, output_slot, wall_time, step, list_of_values): """Creates event with a health pill summary. Args: device_name: The name of the op's device. op_name: The name of the op to which a DebugNumericSummary was attached. output_slot: The numeric output slot for the tensor. wall_time: The numeric wall time of the event. step: The step of the event. list_of_values: A python list of values within the tensor. Returns: A `tf.Event` with a health pill summary. """ event = tf.Event(step=step, wall_time=wall_time) value = event.summary.value.add( tag=op_name, node_name='%s:%d:DebugNumericSummary' % (op_name, output_slot), tensor=tf.make_tensor_proto( list_of_values, dtype=tf.float64, shape=[len(list_of_values)])) content_proto = debugger_event_metadata_pb2.DebuggerEventMetadata( device=device_name, output_slot=output_slot) value.metadata.plugin_data.plugin_name = constants.DEBUGGER_PLUGIN_NAME value.metadata.plugin_data.content = tf.compat.as_bytes( json_format.MessageToJson( content_proto, including_default_value_fields=True)) return event
def testFirstEventTimestamp(self): """Test that FirstEventTimestamp() returns wall_time of the first event.""" gen = _EventGenerator() acc = ea.EventAccumulator(gen) gen.AddEvent(tf.Event(wall_time=10, step=20, file_version='brain.Event:2')) gen.AddScalar('s1', wall_time=30, step=40, value=20) self.assertEqual(acc.FirstEventTimestamp(), 10)
def testEventsDiscardedPerTagAfterRestartForFileVersionLessThan2(self): """Tests that event discards after restart, only affect the misordered tag. If a step value is observed to be lower than what was previously seen, this should force a discard of all previous items that are outdated, but only for the out of order tag. Other tags should remain unaffected. Only file versions < 2 use this out-of-order discard logic. Later versions discard events based on the step value of SessionLog.START. """ warnings = [] self.stubs.Set(logging, 'warn', warnings.append) gen = _EventGenerator() acc = ea.EventAccumulator(gen) gen.AddEvent(tf.Event(wall_time=0, step=0, file_version='brain.Event:1')) gen.AddScalar('s1', wall_time=1, step=100, value=20) gen.AddScalar('s1', wall_time=1, step=200, value=20) gen.AddScalar('s1', wall_time=1, step=300, value=20) gen.AddScalar('s1', wall_time=1, step=101, value=20) gen.AddScalar('s1', wall_time=1, step=201, value=20) gen.AddScalar('s1', wall_time=1, step=301, value=20) gen.AddScalar('s2', wall_time=1, step=101, value=20) gen.AddScalar('s2', wall_time=1, step=201, value=20) gen.AddScalar('s2', wall_time=1, step=301, value=20) acc.Reload() ## Check that we have discarded 200 and 300 self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 101, 201, 301]) ## Check that s1 discards do not affect s2 ## i.e. check that only events from the out of order tag are discarded self.assertEqual([x.step for x in acc.Scalars('s2')], [101, 201, 301])
def _create_event_with_float_tensor(self, node_name, output_slot, debug_op, list_of_values): """Creates event with float64 (double) tensors. Args: node_name: The string name of the op. This lacks both the output slot as well as the name of the debug op. output_slot: The number that is the output slot. debug_op: The name of the debug op to use. list_of_values: A python list of values within the tensor. Returns: A `tf.Event` with a summary containing that node name and a float64 tensor with those values. """ event = tf.Event() value = event.summary.value.add( tag=node_name, node_name="%s:%d:%s" % (node_name, output_slot, debug_op), tensor=tensor_util.make_tensor_proto( list_of_values, dtype=tf.float64, shape=[len(list_of_values)])) plugin_content = debugger_event_metadata_pb2.DebuggerEventMetadata( device="/job:localhost/replica:0/task:0/cpu:0", output_slot=output_slot) value.metadata.plugin_data.plugin_name = constants.DEBUGGER_PLUGIN_NAME value.metadata.plugin_data.content = tf.compat.as_bytes( json_format.MessageToJson( plugin_content, including_default_value_fields=True)) return event
def Load(self): """Loads all new values from disk. Calling Load multiple times in a row will not 'drop' events as long as the return value is not iterated over. Yields: All values that were written to disk that have not been yielded yet. """ tf.logging.debug('Loading events from %s', self._file_path) while True: try: if not inspect.getargspec(self._reader.GetNext).args[1:]: # pylint: disable=deprecated-method self._reader.GetNext() else: # GetNext() expects a status argument on TF <= 1.7 with tf.errors.raise_exception_on_not_ok_status( ) as status: self._reader.GetNext(status) except (tf.errors.DataLossError, tf.errors.OutOfRangeError) as e: tf.logging.debug('Cannot read more events: %s', e) # We ignore partial read exceptions, because a record may be truncated. # PyRecordReader holds the offset prior to the failed read, so retrying # will succeed. break event = tf.Event() event.ParseFromString(self._reader.record()) yield event tf.logging.debug('No more events in %s', self._file_path)
def AddScalar(self, tag, wall_time=0, step=0, value=0): event = tf.Event( wall_time=wall_time, step=step, summary=tf.Summary( value=[tf.Summary.Value(tag=tag, simple_value=value)])) self.AddEvent(event)
def setUp(self): self.events_written = [] events_writer_manager = FakeEventsWriterManager(self.events_written) self.stream_handler = debugger_server_lib.DebuggerDataStreamHandler( events_writer_manager=events_writer_manager) self.stream_handler.on_core_metadata_event(tf.Event())
def main(): parser = argparse.ArgumentParser() parser.add_argument('inputs', nargs='+') args = parser.parse_args() for path in args.inputs: for existing in glob.glob(os.path.join(path, 'events.out.tfevents*')): os.unlink(existing) writer = tf.summary.FileWriter(path) for line in open(os.path.join(path, 'log.txt')): m = re.search(log_re, line) if m is None: continue timestamp, step, section, loss = m.groups() step = int(step) loss = float(loss) timestamp = dateutil.parser.parse(timestamp).timestamp() writer.add_event( tf.Event( wall_time=timestamp, step=step, summary=tf.Summary( value=[ tf.Summary.Value( tag='loss/{}'.format(section), simple_value=loss)]))) writer.close() print(path)
def testExpiredDataDiscardedAfterRestartForFileVersionLessThan2(self): """Tests that events are discarded after a restart is detected. If a step value is observed to be lower than what was previously seen, this should force a discard of all previous items with the same tag that are outdated. Only file versions < 2 use this out-of-order discard logic. Later versions discard events based on the step value of SessionLog.START. """ warnings = [] self.stubs.Set(tf.logging, 'warn', warnings.append) gen = _EventGenerator(self) acc = ea.EventAccumulator(gen) gen.AddEvent( tf.Event(wall_time=0, step=0, file_version='brain.Event:1')) gen.AddScalar('s1', wall_time=1, step=100, value=20) gen.AddScalar('s1', wall_time=1, step=200, value=20) gen.AddScalar('s1', wall_time=1, step=300, value=20) acc.Reload() ## Check that number of items are what they should be self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 200, 300]) gen.AddScalar('s1', wall_time=1, step=101, value=20) gen.AddScalar('s1', wall_time=1, step=201, value=20) gen.AddScalar('s1', wall_time=1, step=301, value=20) acc.Reload() ## Check that we have discarded 200 and 300 from s1 self.assertEqual([x.step for x in acc.Scalars('s1')], [100, 101, 201, 301])
def log_event(self, message, step=None, level=tf.LogMessage.INFO): event = tf.Event() event.wall_time = time.time() if step is not None: event.step = event event.log_message.level = level event.log_message.message = str(message) self.log_writer.add_event(event)
def _GenerateEventsData(self): fw = tf.summary.FileWriter(self.log_dir) event = tf.Event( wall_time=1, step=1, summary=tf.Summary(value=[tf.Summary.Value(tag='s1', simple_value=0)])) fw.add_event(event) fw.close()
def _GenerateEventsData(self): with test_util.FileWriterCache.get(self.log_dir) as fw: event = tf.Event( wall_time=1, step=1, summary=tf.Summary( value=[tf.Summary.Value(tag='s1', simple_value=0)])) fw.add_event(event)
def testRestartProgram_resumesThings(self): id_ = db.RUN_ROWID.create(1, 1) event1 = tf.Event(step=123) event2 = tf.Event(step=456) path = self._save_records('events.out.tfevents.1.localhost', [event1.SerializeToString(), event2.SerializeToString()]) with self.connect_db() as db_conn: with self.EventLog(path) as log: with loader.RunReader(id_, 'doodle') as run: run.add_event_log(db_conn, log) self.assertEqual(event1, run.get_next_event()) run.save_progress(db_conn) with self.EventLog(path) as log: with loader.RunReader(id_, 'doodle') as run: run.add_event_log(db_conn, log) self.assertEqual(event2, run.get_next_event())
def tb_add_histogram(experiment, name, wall_time, step, histo): writer = tb_get_xp_writer(experiment) summary = tf.Summary(value=[ tf.Summary.Value(tag=name, histo=histo), ]) event = tf.Event(wall_time=wall_time, step=step, summary=summary) writer.add_event(event) writer.flush() tb_modified_xp(experiment)
def testFirstEventTimestampLoadsEvent(self): """Test that FirstEventTimestamp() doesn't discard the loaded event.""" gen = _EventGenerator() acc = ea.EventAccumulator(gen) gen.AddEvent(tf.Event(wall_time=1, step=2, file_version='brain.Event:2')) self.assertEqual(acc.FirstEventTimestamp(), 1) acc.Reload() self.assertEqual(acc.file_version, 2.0)
def tb_add_scalar(experiment, name, wall_time, step, value): writer = tb_get_xp_writer(experiment) summary = tf.Summary(value=[ tf.Summary.Value(tag=name, simple_value=value), ]) event = tf.Event(wall_time=wall_time, step=step, summary=summary) writer.add_event(event) writer.flush() tb_modified_xp(experiment)
def write(self, name, data, step=0): # data will wrap in summary and write as a Event protobuf #'tag' will group the plot data in a single graph event = tf.Event( wall_time=time.time(), step=step, summary=tf.Summary( value=[tf.Summary.Value(tag=name, simple_value=data)])) self.writeEvent(event)
def AddHealthPill(self, wall_time, step, device_name, op_name, output_slot, elements): event = tf.Event(step=step, wall_time=wall_time) value = event.summary.value.add( tag=ea.HEALTH_PILL_EVENT_TAG_PREFIX + device_name, node_name='%s:%d:DebugNumericSummary' % (op_name, output_slot)) value.tensor.tensor_shape.dim.add(size=len(elements)) value.tensor.dtype = 2 # DT_DOUBLE value.tensor.tensor_content = np.array(elements, dtype=np.float64).tobytes() self.AddEvent(event)
def AddImage(self, tag, wall_time=0, step=0, encoded_image_string=b'imgstr', width=150, height=100): image = tf.Summary.Image(encoded_image_string=encoded_image_string, width=width, height=height) event = tf.Event( wall_time=wall_time, step=step, summary=tf.Summary( value=[tf.Summary.Value(tag=tag, image=image)])) self.AddEvent(event)
def testSentinelStepValueAssignedWhenExecutorStepCountKeyIsMissing(self): events_written = [] metadata_event = tf.Event() metadata_event.log_message.message = json.dumps({}) stream_handler = debugger_server_lib.DebuggerDataStreamHandler( events_writer_manager=FakeEventsWriterManager(events_written)) stream_handler.on_core_metadata_event(metadata_event) health_pill_event = self._create_event_with_float_tensor( "MatMul", 0, "DebugNumericSummary", list(range(1, 15))) stream_handler.on_value_event(health_pill_event) self.assertGreater(events_written[0].step, 0)
def AddHistogram(self, tag, wall_time=0, step=0, hmin=1, hmax=2, hnum=3, hsum=4, hsum_squares=5, hbucket_limit=None, hbucket=None): histo = tf.HistogramProto(min=hmin, max=hmax, num=hnum, sum=hsum, sum_squares=hsum_squares, bucket_limit=hbucket_limit, bucket=hbucket) event = tf.Event( wall_time=wall_time, step=step, summary=tf.Summary(value=[tf.Summary.Value(tag=tag, histo=histo)])) self.AddEvent(event)
def testSentinelStepValueAssignedWhenMetadataJsonIsInvalid(self): events_written = [] metadata_event = tf.Event() metadata_event.log_message.message = "some invalid JSON string" stream_handler = debugger_server_lib.DebuggerDataStreamHandler( events_writer_manager=FakeEventsWriterManager(events_written)) stream_handler.on_core_metadata_event(metadata_event) health_pill_event = self._create_event_with_float_tensor( "MatMul", 0, "DebugNumericSummary", list(range(1, 15))) stream_handler.on_value_event(health_pill_event) self.assertGreater(events_written[0].step, 0)