def testUsesSeed(self): """Tests that reservoirs with different seeds keep different samples.""" key = 'key' r1 = reservoir.Reservoir(10, seed=0) r2 = reservoir.Reservoir(10, seed=1) for i in xrange(100): r1.AddItem('key', i) r2.AddItem('key', i) self.assertNotEqual(r1.Items(key), r2.Items(key))
def testExceptions(self): with self.assertRaises(ValueError): reservoir.Reservoir(-1) with self.assertRaises(ValueError): reservoir.Reservoir(13.3) r = reservoir.Reservoir(12) with self.assertRaises(KeyError): r.Items("missing key")
def testDeterminism(self): """Tests that the reservoir is deterministic.""" key = "key" r1 = reservoir.Reservoir(10) r2 = reservoir.Reservoir(10) for i in xrange(100): r1.AddItem("key", i) r2.AddItem("key", i) self.assertEqual(r1.Items(key), r2.Items(key))
def _ProcessTensor(self, tag, wall_time, step, tensor): tv = TensorEvent(wall_time=wall_time, step=step, tensor_proto=tensor) with self._tensors_by_tag_lock: if tag not in self.tensors_by_tag: reservoir_size = self._GetTensorReservoirSize(tag) self.tensors_by_tag[tag] = reservoir.Reservoir(reservoir_size) self.tensors_by_tag[tag].AddItem(_TENSOR_RESERVOIR_KEY, tv)
def testItemsAndKeys(self): r = reservoir.Reservoir(42) r.AddItem("foo", 4) r.AddItem("bar", 9) r.AddItem("foo", 19) self.assertItemsEqual(r.Keys(), ["foo", "bar"]) self.assertEqual(r.Items("foo"), [4, 19]) self.assertEqual(r.Items("bar"), [9])
def testItemsAndKeys(self): r = reservoir.Reservoir(42) r.AddItem('foo', 4) r.AddItem('bar', 9) r.AddItem('foo', 19) self.assertItemsEqual(r.Keys(), ['foo', 'bar']) self.assertEqual(r.Items('foo'), [4, 19]) self.assertEqual(r.Items('bar'), [9])
def testBucketDeterminism(self): """Tests that reservoirs are deterministic at a bucket level. This means that only the order elements are added within a bucket matters. """ separate_reservoir = reservoir.Reservoir(10) interleaved_reservoir = reservoir.Reservoir(10) for i in xrange(100): separate_reservoir.AddItem('key1', i) for i in xrange(100): separate_reservoir.AddItem('key2', i) for i in xrange(100): interleaved_reservoir.AddItem('key1', i) interleaved_reservoir.AddItem('key2', i) for key in ['key1', 'key2']: self.assertEqual(separate_reservoir.Items(key), interleaved_reservoir.Items(key))
def testFilterItemsByKey(self): r = reservoir.Reservoir(100, seed=0) for i in xrange(10): r.AddItem("key1", i) r.AddItem("key2", i) self.assertEqual(len(r.Items("key1")), 10) self.assertEqual(len(r.Items("key2")), 10) self.assertEqual(r.FilterItems(lambda x: x <= 7, "key2"), 2) self.assertEqual(len(r.Items("key2")), 8) self.assertEqual(len(r.Items("key1")), 10) self.assertEqual(r.FilterItems(lambda x: x <= 3, "key1"), 6) self.assertEqual(len(r.Items("key1")), 4) self.assertEqual(len(r.Items("key2")), 8)
def __init__( self, path, size_guidance=None, compression_bps=NORMAL_HISTOGRAM_BPS, purge_orphaned_data=True, ): """Construct the `EventAccumulator`. Args: path: A file path to a directory containing tf events files, or a single tf events file. The accumulator will load events from this path. size_guidance: Information on how much data the EventAccumulator should store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much so as to avoid OOMing the client. The size_guidance should be a map from a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. compression_bps: Information on how the `EventAccumulator` should compress histogram data for the `CompressedHistograms` tag (for details see `ProcessCompressedHistogram`). purge_orphaned_data: Whether to discard any events that were "orphaned" by a TensorFlow restart. """ size_guidance = size_guidance or DEFAULT_SIZE_GUIDANCE sizes = {} for key in DEFAULT_SIZE_GUIDANCE: if key in size_guidance: sizes[key] = size_guidance[key] else: sizes[key] = DEFAULT_SIZE_GUIDANCE[key] self._first_event_timestamp = None self.scalars = reservoir.Reservoir(size=sizes[SCALARS]) self._graph = None self._graph_from_metagraph = False self._meta_graph = None self._tagged_metadata = {} self.summary_metadata = {} self.histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS]) self.compressed_histograms = reservoir.Reservoir( size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False ) self.images = reservoir.Reservoir(size=sizes[IMAGES]) self.audios = reservoir.Reservoir(size=sizes[AUDIO]) self.tensors = reservoir.Reservoir(size=sizes[TENSORS]) # Keep a mapping from plugin name to a dict mapping from tag to plugin data # content obtained from the SummaryMetadata (metadata field of Value) for # that plugin (This is not the entire SummaryMetadata proto - only the # content for that plugin). The SummaryWriter only keeps the content on the # first event encountered per tag, so we must store that first instance of # content for each tag. self._plugin_to_tag_to_content = collections.defaultdict(dict) self._generator_mutex = threading.Lock() self.path = path self._generator = _GeneratorFromPath(path) self._compression_bps = compression_bps self.purge_orphaned_data = purge_orphaned_data self.most_recent_step = -1 self.most_recent_wall_time = -1 self.file_version = None # The attributes that get built up by the accumulator self.accumulated_attrs = ( "scalars", "histograms", "compressed_histograms", "images", "audios", ) self._tensor_summaries = {}
def testRespectsSize(self): r = reservoir.Reservoir(42) self.assertEqual(r._buckets["meaning of life"]._max_size, 42)
def testEmptyReservoir(self): r = reservoir.Reservoir(1) self.assertFalse(r.Keys())
def __init__(self, path, size_guidance=DEFAULT_SIZE_GUIDANCE, compression_bps=NORMAL_HISTOGRAM_BPS, purge_orphaned_data=True): """Construct the `EventAccumulator`. Args: path: A file path to a directory containing tf events files, or a single tf events file. The accumulator will load events from this path. size_guidance: Information on how much data the EventAccumulator should store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much so as to avoid OOMing the client. The size_guidance should be a map from a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. compression_bps: Information on how the `EventAccumulator` should compress histogram data for the `CompressedHistograms` tag (for details see `ProcessCompressedHistogram`). purge_orphaned_data: Whether to discard any events that were "orphaned" by a TensorFlow restart. """ sizes = {} for key in DEFAULT_SIZE_GUIDANCE: if key in size_guidance: sizes[key] = size_guidance[key] else: sizes[key] = DEFAULT_SIZE_GUIDANCE[key] self._first_event_timestamp = None self._scalars = reservoir.Reservoir(size=sizes[SCALARS]) # Unlike the other reservoir, the reservoir for health pills is keyed by the # name of the op instead of the tag. This lets us efficiently obtain the # health pills per node. self._health_pills = reservoir.Reservoir(size=sizes[HEALTH_PILLS]) self._graph = None self._graph_from_metagraph = False self._meta_graph = None self._tagged_metadata = {} self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS]) self._compressed_histograms = reservoir.Reservoir( size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False) self._images = reservoir.Reservoir(size=sizes[IMAGES]) self._audio = reservoir.Reservoir(size=sizes[AUDIO]) self._tensors = reservoir.Reservoir(size=sizes[TENSORS]) self._generator_mutex = threading.Lock() self.path = path self._generator = _GeneratorFromPath(path) self._compression_bps = compression_bps self.purge_orphaned_data = purge_orphaned_data self.most_recent_step = -1 self.most_recent_wall_time = -1 self.file_version = None # The attributes that get built up by the accumulator self.accumulated_attrs = ('_scalars', '_histograms', '_compressed_histograms', '_images', '_audio') self._tensor_summaries = {}
def __init__(self, path, size_guidance=None, tensor_size_guidance=None, purge_orphaned_data=True): """Construct the `EventAccumulator`. Args: path: A file path to a directory containing tf events files, or a single tf events file. The accumulator will load events from this path. size_guidance: Information on how much data the EventAccumulator should store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much so as to avoid OOMing the client. The size_guidance should be a map from a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. tensor_size_guidance: Like `size_guidance`, but allowing finer granularity for tensor summaries. Should be a map from the `plugin_name` field on the `PluginData` proto to an integer representing the number of items to keep per tag. Plugins for which there is no entry in this map will default to the value of `size_guidance[event_accumulator.TENSORS]`. Defaults to `{}`. purge_orphaned_data: Whether to discard any events that were "orphaned" by a TensorFlow restart. """ size_guidance = dict(size_guidance or DEFAULT_SIZE_GUIDANCE) sizes = {} for key in DEFAULT_SIZE_GUIDANCE: if key in size_guidance: sizes[key] = size_guidance[key] else: sizes[key] = DEFAULT_SIZE_GUIDANCE[key] self._size_guidance = size_guidance self._tensor_size_guidance = dict(tensor_size_guidance or {}) self._first_event_timestamp = None self.scalars = reservoir.Reservoir(size=sizes[SCALARS]) self._graph = None self._graph_from_metagraph = False self._meta_graph = None self._tagged_metadata = {} self.summary_metadata = {} self.audios = reservoir.Reservoir(size=sizes[AUDIO]) self.tensors_by_tag = {} self._tensors_by_tag_lock = threading.Lock() # Keep a mapping from plugin name to a dict mapping from tag to plugin data # content obtained from the SummaryMetadata (metadata field of Value) for # that plugin (This is not the entire SummaryMetadata proto - only the # content for that plugin). The SummaryWriter only keeps the content on the # first event encountered per tag, so we must store that first instance of # content for each tag. self._plugin_to_tag_to_content = collections.defaultdict(dict) self._generator_mutex = threading.Lock() self.path = path self._generator = _GeneratorFromPath(path) self.purge_orphaned_data = purge_orphaned_data self.most_recent_step = -1 self.most_recent_wall_time = -1 self.file_version = None # The attributes that get built up by the accumulator self.accumulated_attrs = ('scalars', 'audios') self._tensor_summaries = {}