class TestScheduling(unittest.TestCase): ''' Tests for Scheduling module. ''' def setUp(self): self.layers = {} self.layers['BASE'] = ConvLayer(8, 16, 28, 3) self.layers['POOL'] = PoolingLayer(16, 28, 2) self.layers['LR'] = LocalRegionLayer(16, 28, nreg=3, sreg=1) self.batch_size = 4 self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, idl_unit=50) self.none_cstr = SchedulingConstraint() self.cstr = SchedulingConstraint(topofm=1, topbat=self.batch_size) self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.options = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, ntops=10) self.ifmap_layouts = {} part = PartitionScheme(order=(pe.INPP, pe.BATP, pe.OUTP, pe.OFMP), pdims=((1, 2), (2, 1), (1, 2), (2, 1))) for wlkey in self.layers: input_layer = self.layers[wlkey].input_layer() self.ifmap_layouts[wlkey] = DataLayout( frngs=(FmapRange((0, 0, 0, 0), FmapPosition(b=self.batch_size, n=input_layer.nofm, h=input_layer.hofm, w=input_layer.wofm)), ), regions=(self.resource.src_data_region, ), parts=(part.projection(self.resource.src_data_region, appl2frng=True), )) self.sched_seq = (2, 0, 1) def test_valid_args(self): ''' Valid arguments for constructor. ''' schd = Scheduling(self.layers['BASE'], self.batch_size, self.cost, MapStrategyEyeriss) self.assertEqual(schd.layer, self.layers['BASE']) self.assertEqual(schd.batch_size, self.batch_size) self.assertEqual(schd.cost, self.cost) self.assertEqual(schd.map_strategy_class, MapStrategyEyeriss) def test_invalid_layer(self): ''' Invalid layer argument. ''' with self.assertRaisesRegexp(TypeError, 'Scheduling: .*layer.*'): _ = Scheduling((64, 128, 28, 3), self.batch_size, self.cost, MapStrategyEyeriss) def test_invalid_cost(self): ''' Invalid cost argument. ''' with self.assertRaisesRegexp(TypeError, 'Scheduling: .*cost.*'): _ = Scheduling(self.layers['BASE'], self.batch_size, tuple(self.cost), MapStrategyEyeriss) def test_invalid_map_strategy(self): ''' Invalid cost argument. ''' class _DummyClass(object): # pylint: disable=too-few-public-methods pass with self.assertRaisesRegexp(TypeError, 'Scheduling: .*map_strategy_class.*'): _ = Scheduling(self.layers['BASE'], self.batch_size, self.cost, _DummyClass) def test_schedule_search(self): ''' Schedule search. ''' for wlkey in self.layers: layer = self.layers[wlkey] ifmap_layout = self.ifmap_layouts[wlkey] schd = Scheduling(layer, self.batch_size, self.cost, MapStrategyEyeriss) condition = SchedulingCondition(resource=self.resource, constraint=self.cstr, ifmap_layout=ifmap_layout, sched_seq=self.sched_seq) res = schd.schedule_search(condition, self.options) # Top N. self.assertLessEqual(len(res), self.options.ntops) self.assertTrue(all(isinstance(r, SchedulingResult) for r in res)) for idx in range(len(res) - 1): self.assertLessEqual(res[idx].total_cost, res[idx + 1].total_cost) # Combination of loop blocking and partitioning. for r in res: self.assertAlmostEqual( r.total_cost, r.scheme['cost_op'] + r.scheme['cost_access'] + r.scheme['cost_noc'] + r.scheme['cost_static']) self.assertEqual(r.total_ops, layer.total_ops(self.batch_size)) self.assertSequenceEqual(r.scheme['total_nhops'], [ nh * f for nh, f in zip(r.scheme['unit_nhops'], r.scheme['fetch'][0]) ]) self.assertEqual(r.num_nodes, self.resource.proc_region.dim.size()) # Constraint. for r in res: self.assertEqual(r.scheme['to'][0], 1) # Ofmap layout. for r in res: self.assertEqual(r.ofmap_layout.complete_fmap_range().size(), layer.total_ofmap_size(self.batch_size)) # Sequence number. for r in res: self.assertTupleEqual(r.sched_seq, condition.sched_seq) def test_schedule_search_ilayout(self): ''' Invalid ifmap_layout. ''' layer = self.layers['BASE'] schd = Scheduling(layer, self.batch_size, self.cost, MapStrategyEyeriss) # Shift ifmap out of memory region. condition = SchedulingCondition( resource=self.resource, constraint=self.none_cstr, ifmap_layout=self.ifmap_layouts['BASE']._replace(regions=tuple( r._replace(origin=PhyDim2(-10, -10)) for r in self.ifmap_layouts['BASE'].regions)), sched_seq=self.sched_seq) with self.assertRaisesRegexp(ValueError, 'Scheduling: .*ifmap.*'): _ = schd.schedule_search(condition, self.options) # Not match layer. condition = SchedulingCondition( resource=self.resource, constraint=self.none_cstr, ifmap_layout=self.ifmap_layouts['POOL'], sched_seq=self.sched_seq) with self.assertRaisesRegexp(ValueError, 'Scheduling: .*ifmap.*'): _ = schd.schedule_search(condition, self.options) def test_schedule_search_nolbs(self): ''' Schedule search with no lbs. ''' layer = self.layers['BASE'] ifmap_layout = self.ifmap_layouts['BASE'] schd = Scheduling(layer, self.batch_size, self.cost, MapStrategyEyeriss) condition = SchedulingCondition( resource=self.resource._replace(size_regf=0), constraint=self.none_cstr, ifmap_layout=ifmap_layout, sched_seq=self.sched_seq) res = schd.schedule_search(condition, self.options) self.assertFalse(res) def test_pernode_sched_cache(self): ''' Per-node scheduling cache. ''' # pylint: disable=no-member Scheduling.schedule_search_per_node.cache_clear() layer = self.layers['BASE'] ifmap_layout = self.ifmap_layouts['BASE'] schd = Scheduling(layer, self.batch_size, self.cost, MapStrategyEyeriss) self.assertEqual(schd.schedule_search_per_node.cache_info().currsize, 0) self.assertTupleEqual(schd.cache_stats(), (0, 0)) condition = SchedulingCondition(resource=self.resource, constraint=self.cstr, ifmap_layout=ifmap_layout, sched_seq=self.sched_seq) Scheduling.schedule_search.cache_clear() _ = schd.schedule_search(condition, self.options) h, m = schd.cache_stats() self.assertEqual(schd.schedule_search_per_node.cache_info().currsize, m) self.assertEqual(h, 0) n = m Scheduling.schedule_search.cache_clear() _ = schd.schedule_search(condition, self.options) self.assertEqual(schd.schedule_search_per_node.cache_info().currsize, n) self.assertTupleEqual(schd.cache_stats(), (n, n)) def test_pernode_sched_cache_key(self): ''' Per-node scheduling cache key must be hash-able. ''' # pylint: disable=no-member Scheduling.schedule_search.cache_clear() Scheduling.schedule_search_per_node.cache_clear() layer = self.layers['BASE'] ifmap_layout = self.ifmap_layouts['BASE'] schd = Scheduling(layer, self.batch_size, self.cost, MapStrategyEyeriss) condition = SchedulingCondition(resource=self.resource, constraint=self.cstr, ifmap_layout=ifmap_layout, sched_seq=self.sched_seq) _ = schd.schedule_search(condition, self.options) h, m = schd.cache_stats() self.assertEqual(h, 0) # Make another instance. rsrc = Resource(**self.resource._asdict()) cstr = self.cstr opts = Option(**self.options._asdict()) self.assertNotEqual(id(rsrc), id(self.resource)) self.assertNotEqual(id(opts), id(self.options)) part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP), pdims=((2, 4), (2, 1), (1, 1), (1, 1))) _ = schd.schedule_search_per_node(part, rsrc, cstr, opts) h2, m2 = schd.cache_stats() self.assertEqual(h2, h + 1) self.assertEqual(m2, m)
class TestNNDataflow(unittest.TestCase): ''' Tests for NNDataflow module. ''' def setUp(self): self.alex_net = import_network('alex_net') self.vgg_net = import_network('vgg_net') net = Network('simple') net.set_input_layer(InputLayer(4, 2)) net.add('1', ConvLayer(4, 4, 2, 1)) net.add('2', ConvLayer(4, 4, 2, 1)) # Two more layers to avoid single-segment case. net.add('a1', ConvLayer(4, 1, 1, 1, strd=2)) net.add('a2', ConvLayer(1, 1, 1, 1)) self.simple_net = net net = Network('complex') net.set_input_layer(InputLayer(8, 8)) net.add('1', ConvLayer(8, 8, 8, 1)) net.add('2a', ConvLayer(8, 8, 8, 1), prevs=('1',)) net.add('3a', ConvLayer(8, 8, 8, 1)) net.add('2b', ConvLayer(8, 8, 8, 1), prevs=('1',)) net.add('3b', ConvLayer(8, 8, 8, 1)) net.add('4', ConvLayer(16, 8, 8, 1), prevs=('3a', '3b')) self.complex_net = net self.map_strategy = MapStrategyEyeriss self.resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), dram_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=128 * 1024 // 2, # 128 kB size_regf=512 // 2, # 512 B array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=0, idl_unit=0) self.options = Option() def test_invalid_network(self): ''' Invalid network argument. ''' with self.assertRaisesRegex(TypeError, 'NNDataflow: .*network.*'): _ = NNDataflow(self.alex_net.input_layer(), 4, self.resource, self.cost, self.map_strategy) def test_invalid_resource(self): ''' Invalid network argument. ''' with self.assertRaisesRegex(TypeError, 'NNDataflow: .*resource.*'): _ = NNDataflow(self.alex_net, 4, self.resource.proc_region, self.cost, self.map_strategy) def test_invalid_cost(self): ''' Invalid network argument. ''' with self.assertRaisesRegex(TypeError, 'NNDataflow: .*cost.*'): _ = NNDataflow(self.alex_net, 4, self.resource, self.cost._asdict(), self.map_strategy) def test_invalid_map_strategy(self): ''' Invalid map_strategy argument. ''' class _DummyClass(): # pylint: disable=too-few-public-methods pass with self.assertRaisesRegex(TypeError, 'NNDataflow: .*map_strategy.*'): _ = NNDataflow(self.alex_net, 4, self.resource, self.cost, _DummyClass) def test_verbose(self): ''' Verbose mode. ''' network = self.alex_net batch_size = 16 options = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, verbose=True) nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = stdout = StringIO() sys.stderr = stderr = StringIO() tops, _ = nnd.schedule_search(options) sys.stdout = old_stdout sys.stderr = old_stderr stdout_value = stdout.getvalue() stderr_value = stderr.getvalue() stdout.close() stderr.close() self.assertTrue(tops) self.assertFalse(stdout_value) for layer in network: self.assertIn(layer, stderr_value) def test_pipelining(self): ''' Pipelining. ''' network = self.alex_net batch_size = 1 options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True) nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_fast_forward_infeasible(self): ''' Enter fast forward due to infeasible constraint. ''' network = self.simple_net batch_size = 1 # Very small gbuf size. Small fmap tpart is infeasible. resource = self.resource._replace( dim_array=PhyDim2(2, 2), size_gbuf=16) options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True) nnd = NNDataflow(network, batch_size, resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) # No pipelining is feasible. for dtfl in tops: self.assertTupleEqual(dtfl['1'].sched_seq, (0, 0, 0)) self.assertTupleEqual(dtfl['2'].sched_seq, (1, 0, 0)) def test_fast_forward_found(self): ''' Enter fast forward due to early found. ''' network = self.simple_net batch_size = 1 # No time overhead limit. options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True, layer_pipeline_time_ovhd=float('inf')) nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_fast_forward_crit_time(self): ''' Enter fast forward due to long critical time. ''' network = self.simple_net batch_size = 1 # Multiple nodes for spatial pipelining. resource = self.resource._replace( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dim_array=PhyDim2(1, 1), ) # Very strict time overhead limit. # At large fmap tpart, utilization decreases and critical time would # increase. options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True, layer_pipeline_time_ovhd=1e-3) nnd = NNDataflow(network, batch_size, resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_fast_forward_frontier(self): ''' Enter fast forward due to off-frontier. ''' network = self.simple_net batch_size = 16 # Multiple nodes for spatial pipelining. resource = self.resource._replace( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dim_array=PhyDim2(2, 2), ) # No time overhead limit. options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True, layer_pipeline_time_ovhd=float('inf')) nnd = NNDataflow(network, batch_size, resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_fmap_fwd(self): ''' Fmap forward with shared mem sources or both on/off-chip destinations. ''' network = self.complex_net batch_size = 16 # Multiple nodes for spatial pipelining. resource = self.resource._replace( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), ) # No time overhead limit. options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True, layer_pipeline_time_ovhd=float('inf')) nnd = NNDataflow(network, batch_size, resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_sched_instance_sharing(self): ''' Scheduling instance sharing between layers. ''' network = self.alex_net batch_size = 1 nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) self.assertIs(nnd.layer_sched_dict['conv1_a'], nnd.layer_sched_dict['conv1_b']) self.assertIs(nnd.layer_sched_dict['conv2_a'], nnd.layer_sched_dict['conv2_b']) self.assertIs(nnd.layer_sched_dict['pool1_a'], nnd.layer_sched_dict['pool1_b']) def test_opt_goal(self): ''' Optimization goal. ''' network = self.alex_net batch_size = 8 resource = self.resource._replace( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC) ) nnd = NNDataflow(network, batch_size, resource, self.cost, self.map_strategy) options_e = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True, partition_batch=True, opt_goal='e', ntops=16) tops_e, _ = nnd.schedule_search(options_e) self.assertTrue(tops_e) options_d = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True, partition_batch=True, opt_goal='d', ntops=16) tops_d, _ = nnd.schedule_search(options_d) self.assertTrue(tops_d) options_ed = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True, partition_batch=True, opt_goal='ed', ntops=16) tops_ed, _ = nnd.schedule_search(options_ed) self.assertTrue(tops_ed) self.assertLess(tops_e[0].total_cost, tops_d[0].total_cost) self.assertLess(tops_e[0].total_cost, tops_ed[0].total_cost) self.assertLess(tops_d[0].total_time, tops_e[0].total_time) self.assertLess(tops_d[0].total_time, tops_ed[0].total_time) # Sum of the smallest ED may not be the smallest; allow for error. self.assertLess(tops_ed[0].total_cost * tops_ed[0].total_time, tops_e[0].total_cost * tops_e[0].total_time * 1.05) self.assertLess(tops_ed[0].total_cost * tops_ed[0].total_time, tops_d[0].total_cost * tops_d[0].total_time * 1.05) def test_ext_layer(self): ''' With external layers. ''' network = self.alex_net network.add_ext('e0', InputLayer(4, 1)) network.add('l1', FCLayer(1000, 4)) network.add('l2', FCLayer(8, 4), prevs=('e0', 'l1')) batch_size = 16 options = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True) nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) def test_no_valid_dataflow(self): ''' No valid dataflow is found. ''' # Very small REGF. self.resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), dram_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=128 * 1024 // 2, # 128 kB size_regf=2, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) nnd = NNDataflow(self.alex_net, 4, self.resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertFalse(tops) # With inter-layer pipelining. options = Option(hw_gbuf_save_writeback=True, partition_interlayer=True) tops, _ = nnd.schedule_search(options) self.assertFalse(tops) def test_scheduling_failure(self): ''' Layer scheduling failure. ''' network = self.alex_net batch_size = 16 nnd = NNDataflow(network, batch_size, self.resource, self.cost, MapStrategy) old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = stdout = StringIO() sys.stderr = stderr = StringIO() with self.assertRaises(NotImplementedError): _ = nnd.schedule_search(self.options) sys.stdout = old_stdout sys.stderr = old_stderr stdout_value = stdout.getvalue() stderr_value = stderr.getvalue() stdout.close() stderr.close() self.assertFalse(stdout_value) self.assertIn('Failed', stderr_value) def test_eyeriss_isca16(self): ''' Reproduce Eyeriss ISCA'16 paper Fig. 10. ''' network = self.alex_net batch_size = 16 nnd = NNDataflow(network, batch_size, self.resource, self.cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertTrue(tops) dfsch = tops[0] ## Check results. # Results as cost for each component: header = 'ALU, DRAM, Buffer, Array, RF' cost_bkdn = {} for layer in ['conv{}'.format(i) for i in range(1, 6)] \ + ['fc{}'.format(i) for i in range(1, 4)]: op_cost = 0 access_cost = [0] * me.NUM for layer_part in network: if not layer_part or not layer_part.startswith(layer): continue sr = dfsch[layer_part] op_cost += sr.total_ops * self.cost.mac_op access_cost = [ac + a * c for ac, a, c in zip(access_cost, sr.total_accesses, self.cost.mem_hier)] cost_bkdn[layer] = [] # To 1e9. cost_bkdn[layer].append(op_cost / 1e9) cost_bkdn[layer].append(access_cost[me.DRAM] / 1e9) cost_bkdn[layer].append(access_cost[me.GBUF] / 1e9) cost_bkdn[layer].append(access_cost[me.ITCN] / 1e9) cost_bkdn[layer].append(access_cost[me.REGF] / 1e9) # Check the major parts: ALU, DRAM, RF. major_cost_bkdn_ref = {'conv1': [1.69, 2.46, 6.75], 'conv2': [3.58, 2.27, 14.33], 'conv3': [2.39, 2.02, 9.57], 'conv4': [1.79, 1.57, 7.18], 'conv5': [1.20, 1.05, 4.78], 'fc1': [0.60, 7.78, 2.42], 'fc2': [0.27, 3.39, 1.07], 'fc3': [0.07, 0.84, 0.26], } for layer in cost_bkdn: success = all(abs(a - b) < 0.1 for a, b in zip(cost_bkdn[layer][:2] + cost_bkdn[layer][-1:], major_cost_bkdn_ref[layer])) self.assertTrue(success, 'test_eyeriss_isca16: ' 'ALU, DRAM, RF cost diff in layer {}.\n' 'header: {}\n' 'actual: {}\nref: {}' .format(layer, header, cost_bkdn[layer], major_cost_bkdn_ref[layer])) def test_eyeriss_isscc16(self): ''' Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V. ''' network = self.alex_net batch_size = 4 resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), dram_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(12, 14), size_gbuf=108 * 1024 // 2, # 108 kB size_regf=261, # 225 + 12 + 24 array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) cost = Cost(mac_op=2e-12, mem_hier=(460e-12, 15e-12, 4e-12, 1e-12), # pJ/16-b noc_hop=0, idl_unit=30e-3 / 200e6) # 30 mW GBUF + REGF nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertTrue(tops) dfsch = tops[0] ## Check results. # Results as stats of the rows in the table. header = 'Power, Processing Latency, Ops, Active PEs, Filter size' stats = {} for layer in ['conv{}'.format(i) for i in range(1, 6)]: onchip_cost = 0 time = 0 ops = 0 fil_size = 0 for layer_part in network: if not layer_part or not layer_part.startswith(layer): continue sr = dfsch[layer_part] onchip_cost += sr.total_cost \ - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM] time += sr.total_time ops += sr.total_ops fil_size += network[layer_part].total_filter_size() power = onchip_cost / (time / 200e6) * 1e3 # mW active_pes = int(ops / time) stats[layer] = [] stats[layer].append(power) stats[layer].append(time / 200.e3) # cycles to ms stats[layer].append(ops / 1e6) # to MOPs stats[layer].append(active_pes) stats[layer].append(fil_size / 1e3) # to k # Check. stats_ref = {'conv1': [332, 16.5, 421.66, 151, 34.8], # Act PE 154 'conv2': [288, 39.2, 895.79, 135, 307.2], 'conv3': [266, 21.8, 598.1, 156, 884.7], 'conv4': [235, 16.0, 448.6, 156, 663.6], 'conv5': [236, 10.0, 299.0, 156, 442.4], } for layer in stats: success = (0.6 * stats_ref[layer][0] < stats[layer][0] < stats_ref[layer][0]) \ and (0.8 * stats_ref[layer][1] < stats[layer][1] < stats_ref[layer][1]) \ and all(abs(a - b) < 0.1 for a, b in zip(stats[layer][2:], stats_ref[layer][2:])) self.assertTrue(success, 'test_eyeriss_isscc16: ' 'stats diff in layer {}.\n' 'header: {}\n' 'actual: {}\nref: {}' .format(layer, header, stats[layer], stats_ref[layer])) def test_eyeriss_asplos17(self): ''' Reproduce TETRIS ASPLOS'17 paper Figure 8. ''' network = self.alex_net batch_size = 16 ## L-1 configuration. resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC), dram_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=576056 // 2, # 576 kB size_regf=1024 // 2, # 1 kB array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) cost = Cost(mac_op=2e-12, mem_hier=(240e-12, 28e-12, 4e-12, 1e-12), # pJ/16-b noc_hop=0, idl_unit=320e-12) nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(self.options) self.assertTrue(tops) dfsch_l1 = tops[0] ## T-16 configuration. resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), dram_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), src_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(14, 14), size_gbuf=133032 // 2, # 133 kB size_regf=512 // 2, # 512 B array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False, ) cost = Cost(mac_op=2e-12, mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12), # pJ/16-b noc_hop=40e-12, idl_unit=200e-12) options = Option(sw_gbuf_bypass=(True, True, True), sw_solve_loopblocking=True, partition_hybrid=True) nnd = NNDataflow(network, batch_size, resource, cost, self.map_strategy) tops, _ = nnd.schedule_search(options) self.assertTrue(tops) dfsch_t16 = tops[0] ## Check results. # Same workload. self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops) # Performance of T-16 is proportional to PE resource (20% margin). self.assertLess(dfsch_t16.total_time, 1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16)) # Energy reduced by > 30%. # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7) # With dimension restriction on partitioning, this is slightly violated. self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72)