def __init__(self, path, size_guidance=DEFAULT_SIZE_GUIDANCE, compression_bps=NORMAL_HISTOGRAM_BPS): """Construct the `EventAccumulator`. Args: path: A file path to a directory containing tf events files, or a single tf events file. The accumulator will load events from this path. size_guidance: Information on how much data the EventAccumulator should store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much so as to avoid OOMing the client. The size_guidance should be a map from a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. compression_bps: Information on how the `EventAccumulator` should compress histogram data for the `CompressedHistograms` tag (for details see `ProcessCompressedHistogram`). """ sizes = {} for key in DEFAULT_SIZE_GUIDANCE: if key in size_guidance: sizes[key] = size_guidance[key] else: sizes[key] = DEFAULT_SIZE_GUIDANCE[key] self._scalars = reservoir.Reservoir(size=sizes[SCALARS]) self._graph = None self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS]) self._compressed_histograms = reservoir.Reservoir( size=sizes[COMPRESSED_HISTOGRAMS]) self._images = reservoir.Reservoir(size=sizes[IMAGES]) self._generator_mutex = threading.Lock() self._generator = _GeneratorFromPath(path) self._is_autoupdating = False self._activated = False self._compression_bps = compression_bps
def testUsesSeed(self): """Tests that reservoirs with different seeds keep different samples.""" key = 'key' r1 = reservoir.Reservoir(10, seed=0) r2 = reservoir.Reservoir(10, seed=1) for i in xrange(100): r1.AddItem('key', i) r2.AddItem('key', i) self.assertNotEqual(r1.Items(key), r2.Items(key))
def testExceptions(self): with self.assertRaises(ValueError): reservoir.Reservoir(-1) with self.assertRaises(ValueError): reservoir.Reservoir(13.3) r = reservoir.Reservoir(12) with self.assertRaises(KeyError): r.Items('missing key')
def __init__(self, path, size_guidance=DEFAULT_SIZE_GUIDANCE, compression_bps=NORMAL_HISTOGRAM_BPS, purge_orphaned_data=True): """Construct the `EventAccumulator`. Args: path: A file path to a directory containing tf events files, or a single tf events file. The accumulator will load events from this path. size_guidance: Information on how much data the EventAccumulator should store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much so as to avoid OOMing the client. The size_guidance should be a map from a `tagType` string to an integer representing the number of items to keep per tag for items of that `tagType`. If the size is 0, all events are stored. compression_bps: Information on how the `EventAccumulator` should compress histogram data for the `CompressedHistograms` tag (for details see `ProcessCompressedHistogram`). purge_orphaned_data: Whether to discard any events that were "orphaned" by a TensorFlow restart. """ sizes = {} for key in DEFAULT_SIZE_GUIDANCE: if key in size_guidance: sizes[key] = size_guidance[key] else: sizes[key] = DEFAULT_SIZE_GUIDANCE[key] self._first_event_timestamp = None self._scalars = reservoir.Reservoir(size=sizes[SCALARS]) self._graph = None self._graph_from_metagraph = False self._meta_graph = None self._tagged_metadata = {} self._histograms = reservoir.Reservoir(size=sizes[HISTOGRAMS]) self._compressed_histograms = reservoir.Reservoir( size=sizes[COMPRESSED_HISTOGRAMS], always_keep_last=False) self._images = reservoir.Reservoir(size=sizes[IMAGES]) self._audio = reservoir.Reservoir(size=sizes[AUDIO]) self._generator_mutex = threading.Lock() self._generator = _GeneratorFromPath(path) self._compression_bps = compression_bps self.purge_orphaned_data = purge_orphaned_data self.most_recent_step = -1 self.most_recent_wall_time = -1 self.file_version = None # The attributes that get built up by the accumulator self.accumulated_attrs = ('_scalars', '_histograms', '_compressed_histograms', '_images', '_audio') self._tensor_summaries = {}
def testDeterminism(self): """Tests that the reservoir is deterministic.""" key = 'key' r1 = reservoir.Reservoir(10) r2 = reservoir.Reservoir(10) for i in xrange(100): r1.AddItem('key', i) r2.AddItem('key', i) self.assertEqual(r1.Items(key), r2.Items(key))
def testItemsAndKeys(self): r = reservoir.Reservoir(42) r.AddItem('foo', 4) r.AddItem('bar', 9) r.AddItem('foo', 19) self.assertItemsEqual(r.Keys(), ['foo', 'bar']) self.assertEqual(r.Items('foo'), [4, 19]) self.assertEqual(r.Items('bar'), [9])
def testBucketDeterminism(self): """Tests that reservoirs are deterministic at a bucket level. This means that only the order elements are added within a bucket matters. """ separate_reservoir = reservoir.Reservoir(10) interleaved_reservoir = reservoir.Reservoir(10) for i in xrange(100): separate_reservoir.AddItem('key1', i) for i in xrange(100): separate_reservoir.AddItem('key2', i) for i in xrange(100): interleaved_reservoir.AddItem('key1', i) interleaved_reservoir.AddItem('key2', i) for key in ['key1', 'key2']: self.assertEqual(separate_reservoir.Items(key), interleaved_reservoir.Items(key))
def testFilterItemsByKey(self): r = reservoir.Reservoir(100, seed=0) for i in xrange(10): r.AddItem('key1', i) r.AddItem('key2', i) self.assertEqual(len(r.Items('key1')), 10) self.assertEqual(len(r.Items('key2')), 10) self.assertEqual(r.FilterItems(lambda x: x <= 7, 'key2'), 2) self.assertEqual(len(r.Items('key2')), 8) self.assertEqual(len(r.Items('key1')), 10) self.assertEqual(r.FilterItems(lambda x: x <= 3, 'key1'), 6) self.assertEqual(len(r.Items('key1')), 4) self.assertEqual(len(r.Items('key2')), 8)
def testRespectsSize(self): r = reservoir.Reservoir(42) self.assertEqual(r._buckets['meaning of life']._max_size, 42)
def testEmptyReservoir(self): r = reservoir.Reservoir(1) self.assertFalse(r.Keys())