def test_dscasw_fastly(self): import csv c = DataStreamCachingAlgorithmWithSlidingWindowCache( 100, monitored=500, subwindow_size=1500, subwindows=10) cache_hits = 0 contents = 0 with open( '../../../resources/Fastly_traces/requests_14M-2015-12-1_reformatted.trace', 'r') as csv_file: csv_reader = csv.reader(csv_file) for row in csv_reader: contents += 1 content = int(row[2]) if c.get(content): cache_hits += 1 else: c.put(content) # verify that all elements have a frequency and error that make sense if contents % 2000 == 0: ss = c._ss_cache.get_stream_summary( ).convert_to_dictionary() for element in ss: self.assertGreaterEqual(ss[element]['frequency'], ss[element]['max_error']) self.assertGreaterEqual(ss[element]['max_error'], 0) self.assertGreaterEqual(ss[element]['frequency'], 0) self.assertEqual([contents, cache_hits], [14885146, 1639996])
def test_dscasw_fastly(self): import csv c = DataStreamCachingAlgorithmWithSlidingWindowCache(100, monitored=500, subwindow_size=1500, subwindows=10) cache_hits = 0 contents = 0 with open('../../../resources/Fastly_traces/requests_14M-2015-12-1_reformatted.trace', 'r') as csv_file: csv_reader = csv.reader(csv_file) for row in csv_reader: contents += 1 content = int(row[2]) if c.get(content): cache_hits += 1 else: c.put(content) # verify that all elements have a frequency and error that make sense if contents % 2000 == 0: ss = c._ss_cache.get_stream_summary().convert_to_dictionary() for element in ss: self.assertGreaterEqual(ss[element]['frequency'], ss[element]['max_error']) self.assertGreaterEqual(ss[element]['max_error'], 0) self.assertGreaterEqual(ss[element]['frequency'], 0) self.assertEquals([contents, cache_hits], [14885146, 1639996])
def test_small_sliding_window(self): cache_hits = 0 contents = 0 input_stream = [ 1, 2, 1, 3, 1, 2, 4, 3, 5, 5, 6, 1, 7, 4, 2, 6, 1, 1, 4, 5, 1, 7, 8, 8, 8, 4, 6, 6, 4, 1, 6, 4, 8, 1, 8, 8, 9, 6, 1, 4 ] c = DataStreamCachingAlgorithmWithSlidingWindowCache(5, monitored=5, subwindows=2, subwindow_size=20) for i, input_element in enumerate(input_stream, start=1): contents += 1 if not c.get(input_element): c.put(input_element) else: cache_hits += 1 if i == 20: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 6) self.assertEqual(stream_summary.bucket_map[6][0].max_error, 0) self.assertEqual(stream_summary.id_to_bucket_map[4], 4) self.assertEqual(stream_summary.bucket_map[4][1].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[5], 4) self.assertEqual(stream_summary.bucket_map[4][0].max_error, 3) self.assertEqual(stream_summary.id_to_bucket_map[6], 3) self.assertEqual(stream_summary.bucket_map[3][1].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[2], 3) self.assertEqual(stream_summary.bucket_map[3][0].max_error, 2) if i == 40: c.print_caches() stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 10) self.assertEqual(stream_summary.bucket_map[10][0].max_error, 0) self.assertEqual(stream_summary.id_to_bucket_map[8], 9) self.assertEqual(stream_summary.bucket_map[9][0].max_error, 3) self.assertEqual(stream_summary.id_to_bucket_map[4], 8) self.assertEqual(stream_summary.bucket_map[8][1].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[6], 8) self.assertEqual(stream_summary.bucket_map[8][0].max_error, 4) self.assertEqual(stream_summary.id_to_bucket_map[9], 5) self.assertEqual(stream_summary.bucket_map[5][0].max_error, 4) self.assertEqual([cache_hits, contents], [25, 40])
def test_small_sliding_window(self): cache_hits = 0 contents = 0 input_stream = [1, 2, 1, 3, 1, 2, 4, 3, 5, 5, 6, 1, 7, 4, 2, 6, 1, 1, 4, 5, 1, 7, 8, 8, 8, 4, 6, 6, 4, 1, 6, 4, 8, 1, 8, 8, 9, 6, 1, 4] c = DataStreamCachingAlgorithmWithSlidingWindowCache(5, monitored=5, subwindows=2, subwindow_size=20) for i, input_element in enumerate(input_stream, start=1): contents += 1 if not c.get(input_element): c.put(input_element) else: cache_hits += 1 if i == 20: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 6) self.assertEquals(stream_summary.bucket_map[6][0].max_error, 0) self.assertEquals(stream_summary.id_to_bucket_map[4], 4) self.assertEquals(stream_summary.bucket_map[4][1].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[5], 4) self.assertEquals(stream_summary.bucket_map[4][0].max_error, 3) self.assertEquals(stream_summary.id_to_bucket_map[6], 3) self.assertEquals(stream_summary.bucket_map[3][1].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[2], 3) self.assertEquals(stream_summary.bucket_map[3][0].max_error, 2) if i == 40: c.print_caches() stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 10) self.assertEquals(stream_summary.bucket_map[10][0].max_error, 0) self.assertEquals(stream_summary.id_to_bucket_map[8], 9) self.assertEquals(stream_summary.bucket_map[9][0].max_error, 3) self.assertEquals(stream_summary.id_to_bucket_map[4], 8) self.assertEquals(stream_summary.bucket_map[8][1].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[6], 8) self.assertEquals(stream_summary.bucket_map[8][0].max_error, 4) self.assertEquals(stream_summary.id_to_bucket_map[9], 5) self.assertEquals(stream_summary.bucket_map[5][0].max_error, 4) self.assertEquals([cache_hits, contents], [25, 40])
def test_medium_sliding_window(self): cache_hits = 0 contents = 0 input_stream = [ 13, 2, 1, 6, 7, 2, 3, 6, 9, 8, 10, 1, 4, 6, 6, 1, 13, 3, 1, 5, 1, 15, 2, 17, 3, 6, 1, 14, 2, 1, 5, 17, 6, 3, 9, 7, 3, 16, 8, 2, 19, 4, 1, 4, 6, 6, 1, 5, 14, 8, 2, 9, 14, 1, 6, 4, 3, 7, 1, 9, 2, 1, 2, 4, 1, 4, 5, 11, 4, 4, 1, 17, 6, 1, 13, 9, 3, 13, 3, 1, 14, 4, 2, 3, 9, 8, 2, 7, 8, 14, 6, 3, 3, 2, 2, 1, 1, 7, 4, 3 ] c = DataStreamCachingAlgorithmWithSlidingWindowCache(5, monitored=5, subwindows=2, subwindow_size=20) for i, input_element in enumerate(input_stream, start=1): contents += 1 if not c.get(input_element): c.put(input_element) else: cache_hits += 1 if i == 20: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 5) self.assertEqual(stream_summary.bucket_map[5][0].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[6], 4) self.assertEqual(stream_summary.bucket_map[4][2].max_error, 0) self.assertEqual(stream_summary.id_to_bucket_map[5], 4) self.assertEqual(stream_summary.bucket_map[4][1].max_error, 3) self.assertEqual(stream_summary.id_to_bucket_map[3], 4) self.assertEqual(stream_summary.bucket_map[4][0].max_error, 3) self.assertEqual(stream_summary.id_to_bucket_map[13], 3) self.assertEqual(stream_summary.bucket_map[3][0].max_error, 2) self.assertEqual(c._lru_cache.dump(), [5, 1, 3, 13, 6]) self.assertEqual(c._guaranteed_top_k, []) if i == 40: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 8) self.assertEqual(stream_summary.bucket_map[8][4].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[3], 8) self.assertEqual(stream_summary.bucket_map[8][3].max_error, 6) self.assertEqual(stream_summary.id_to_bucket_map[2], 8) self.assertEqual(stream_summary.bucket_map[8][2].max_error, 7) self.assertEqual(stream_summary.id_to_bucket_map[8], 8) self.assertEqual(stream_summary.bucket_map[8][1].max_error, 7) self.assertEqual(stream_summary.id_to_bucket_map[16], 8) self.assertEqual(stream_summary.bucket_map[8][0].max_error, 7) self.assertEqual(c._lru_cache.dump(), [2, 8, 16, 3, 7]) self.assertEqual(c._guaranteed_top_k, []) if i == 60: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 12) self.assertEqual(stream_summary.bucket_map[12][4].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[7], 12) self.assertEqual(stream_summary.bucket_map[12][3].max_error, 11) self.assertEqual(stream_summary.id_to_bucket_map[4], 12) self.assertEqual(stream_summary.bucket_map[12][2].max_error, 11) self.assertEqual(stream_summary.id_to_bucket_map[3], 12) self.assertEqual(stream_summary.bucket_map[12][1].max_error, 11) self.assertEqual(stream_summary.id_to_bucket_map[9], 12) self.assertEqual(stream_summary.bucket_map[12][0].max_error, 11) if i == 80: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[1], 17) self.assertEqual(stream_summary.bucket_map[17][0].max_error, 2) self.assertEqual(stream_summary.id_to_bucket_map[4], 16) self.assertEqual(stream_summary.bucket_map[16][2].max_error, 11) self.assertEqual(stream_summary.id_to_bucket_map[13], 16) self.assertEqual(stream_summary.bucket_map[16][1].max_error, 14) self.assertEqual(stream_summary.id_to_bucket_map[3], 16) self.assertEqual(stream_summary.bucket_map[16][0].max_error, 14) self.assertEqual(stream_summary.id_to_bucket_map[9], 15) self.assertEqual(stream_summary.bucket_map[15][0].max_error, 14) if i == 100: stream_summary = c._ss_cache.get_stream_summary() self.assertEqual(stream_summary.id_to_bucket_map[3], 21) self.assertEqual(stream_summary.bucket_map[21][0].max_error, 18) self.assertEqual(stream_summary.id_to_bucket_map[2], 20) self.assertEqual(stream_summary.bucket_map[20][2].max_error, 18) self.assertEqual(stream_summary.id_to_bucket_map[1], 20) self.assertEqual(stream_summary.bucket_map[20][1].max_error, 18) self.assertEqual(stream_summary.id_to_bucket_map[4], 20) self.assertEqual(stream_summary.bucket_map[20][0].max_error, 19) self.assertEqual(stream_summary.id_to_bucket_map[8], 19) self.assertEqual(stream_summary.bucket_map[19][0].max_error, 17) self.assertEqual([cache_hits, contents], [31, 100])
def test_medium_sliding_window(self): cache_hits = 0 contents = 0 input_stream = [13, 2, 1, 6, 7, 2, 3, 6, 9, 8, 10, 1, 4, 6, 6, 1, 13, 3, 1, 5, 1, 15, 2, 17, 3, 6, 1, 14, 2, 1, 5, 17, 6, 3, 9, 7, 3, 16, 8, 2, 19, 4, 1, 4, 6, 6, 1, 5, 14, 8, 2, 9, 14, 1, 6, 4, 3, 7, 1, 9, 2, 1, 2, 4, 1, 4, 5, 11, 4, 4, 1, 17, 6, 1, 13, 9, 3, 13, 3, 1, 14, 4, 2, 3, 9, 8, 2, 7, 8, 14, 6, 3, 3, 2, 2, 1, 1, 7, 4, 3] c = DataStreamCachingAlgorithmWithSlidingWindowCache(5, monitored=5, subwindows=2, subwindow_size=20) for i, input_element in enumerate(input_stream, start=1): contents += 1 if not c.get(input_element): c.put(input_element) else: cache_hits += 1 if i == 20: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 5) self.assertEquals(stream_summary.bucket_map[5][0].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[6], 4) self.assertEquals(stream_summary.bucket_map[4][2].max_error, 0) self.assertEquals(stream_summary.id_to_bucket_map[5], 4) self.assertEquals(stream_summary.bucket_map[4][1].max_error, 3) self.assertEquals(stream_summary.id_to_bucket_map[3], 4) self.assertEquals(stream_summary.bucket_map[4][0].max_error, 3) self.assertEquals(stream_summary.id_to_bucket_map[13], 3) self.assertEquals(stream_summary.bucket_map[3][0].max_error, 2) self.assertEquals(c._lru_cache.dump(), [5, 1, 3, 13, 6]) self.assertEquals(c._guaranteed_top_k, []) if i == 40: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 8) self.assertEquals(stream_summary.bucket_map[8][4].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[3], 8) self.assertEquals(stream_summary.bucket_map[8][3].max_error, 6) self.assertEquals(stream_summary.id_to_bucket_map[2], 8) self.assertEquals(stream_summary.bucket_map[8][2].max_error, 7) self.assertEquals(stream_summary.id_to_bucket_map[8], 8) self.assertEquals(stream_summary.bucket_map[8][1].max_error, 7) self.assertEquals(stream_summary.id_to_bucket_map[16], 8) self.assertEquals(stream_summary.bucket_map[8][0].max_error, 7) self.assertEquals(c._lru_cache.dump(), [2, 8, 16, 3, 7]) self.assertEquals(c._guaranteed_top_k, []) if i == 60: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 12) self.assertEquals(stream_summary.bucket_map[12][4].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[7], 12) self.assertEquals(stream_summary.bucket_map[12][3].max_error, 11) self.assertEquals(stream_summary.id_to_bucket_map[4], 12) self.assertEquals(stream_summary.bucket_map[12][2].max_error, 11) self.assertEquals(stream_summary.id_to_bucket_map[3], 12) self.assertEquals(stream_summary.bucket_map[12][1].max_error, 11) self.assertEquals(stream_summary.id_to_bucket_map[9], 12) self.assertEquals(stream_summary.bucket_map[12][0].max_error, 11) if i == 80: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[1], 17) self.assertEquals(stream_summary.bucket_map[17][0].max_error, 2) self.assertEquals(stream_summary.id_to_bucket_map[4], 16) self.assertEquals(stream_summary.bucket_map[16][2].max_error, 11) self.assertEquals(stream_summary.id_to_bucket_map[13], 16) self.assertEquals(stream_summary.bucket_map[16][1].max_error, 14) self.assertEquals(stream_summary.id_to_bucket_map[3], 16) self.assertEquals(stream_summary.bucket_map[16][0].max_error, 14) self.assertEquals(stream_summary.id_to_bucket_map[9], 15) self.assertEquals(stream_summary.bucket_map[15][0].max_error, 14) if i == 100: stream_summary = c._ss_cache.get_stream_summary() self.assertEquals(stream_summary.id_to_bucket_map[3], 21) self.assertEquals(stream_summary.bucket_map[21][0].max_error, 18) self.assertEquals(stream_summary.id_to_bucket_map[2], 20) self.assertEquals(stream_summary.bucket_map[20][2].max_error, 18) self.assertEquals(stream_summary.id_to_bucket_map[1], 20) self.assertEquals(stream_summary.bucket_map[20][1].max_error, 18) self.assertEquals(stream_summary.id_to_bucket_map[4], 20) self.assertEquals(stream_summary.bucket_map[20][0].max_error, 19) self.assertEquals(stream_summary.id_to_bucket_map[8], 19) self.assertEquals(stream_summary.bucket_map[19][0].max_error, 17) self.assertEquals([cache_hits, contents], [31, 100])