def argparser(): ''' Argument parser. ''' ap = argparse.ArgumentParser() ap.add_argument('net', help='network name, should be a .py file under "nns". ' 'Choices: {}.'.format(', '.join(all_networks()))) ap.add_argument('--batch', type=int, required=True, help='batch size') ap.add_argument('--word', type=int, default=16, help='word size in bits') ap.add_argument('--nodes', type=int, nargs=2, required=True, metavar=('H', 'W'), help='Parallel node partitioning dimensions') ap.add_argument('--array', type=int, nargs=2, required=True, metavar=('H', 'W'), help='PE array dimensions') ap.add_argument('--regf', type=int, required=True, help='register file size in bytes per PE') ap.add_argument('--gbuf', type=int, required=True, help='global buffer size in bytes') ap.add_argument('--bus-width', type=int, default=0, help='array bus width in bits. set 0 to ignore') ap.add_argument('--dram-bw', type=float, default='inf', help='total DRAM bandwidth in bytes per cycle.') ap.add_argument('--op-cost', type=float, default=1, help='cost of arithmetic operation') ap.add_argument('--hier-cost', type=float, nargs=4, default=[200, 6, 2, 1], metavar=('DRAM_COST', 'GBUF_COST', 'ITCN_COST', 'REGF_COST'), help='cost of access to memory hierarchy') ap.add_argument('--hop-cost', type=float, default=10, help='cost of access through one NoC hop') ap.add_argument('--unit-idle-cost', type=float, default=0, help='static cost over all nodes for unit execution time') ap.add_argument('--mem-type', default='2D', choices=['2D', '3D'], help='memory type. "2D" has memory only on edge nodes; ' '"3D" has memory vertially on top of all nodes.') ap.add_argument('--disable-bypass', nargs='*', default=[], choices=['i', 'o', 'f'], help='whether disallowing gbuf bypass for i (input), o ' '(output), or f (filter)') ap.add_argument('--solve-loopblocking', action='store_true', help='Use analytical solver to choose loop blocking. ' 'Otherwise use exhaustive search.') ap.add_argument('--enable-access-forwarding', action='store_true', help='Each node fetches a subset of data and forwards to ' 'other nodes.') ap.add_argument('--enable-gbuf-sharing', action='store_true', help='Share gbuf capacity across nodes through NoC.') ap.add_argument('--enable-save-writeback', action='store_true', help='Allow to save the writeback to memory for the ' 'intermediate data between layers if able to ' 'store the entire data set in on-chip buffers.') ap.add_argument('--disable-interlayer-opt', '--basic-interlayer-partition', action='store_true', help='Disable optimizations and only allow basic ' 'inter-layer pipeline.') ap.add_argument( '--hybrid-partition', '--hybrid-partition2d', # deprecated old name action='store_true', help='Use hybrid partition for layer for node mapping. ' 'Otherwise use naive method based on layer type.') ap.add_argument('--batch-partition', action='store_true', help='Allow partitioning batch, i.e., consider data ' 'parallelism.') ap.add_argument('--ifmaps-partition', '--ifmap-partition', action='store_true', help='Allow partitioning ifmap channel dimension, which ' 'requires extra data synchronization.') ap.add_argument('--interlayer-partition', '--inter-layer-partition', action='store_true', help='Allow partitioning resources across multiple layers ' 'and process them simultaneously as an inter-layer ' 'pipeline.') ap.add_argument('--layer-pipeline-time-overhead', type=float, default=float('inf'), help='maximum allowed execution time overhead due to ' 'layer pipelining.') ap.add_argument('--layer-pipeline-max-degree', type=float, default=float('inf'), help='maximum allowed layer pipelining degree, i.e., ' 'number of vertices in a pipeline segment.') ap.add_argument('-g', '--goal', default='e', choices=['e', 'd', 'ed', 'E', 'D', 'ED'], help='Goal of optimization: E(nergy), D(elay), or ED.') ap.add_argument('-t', '--top', type=int, default=1, help='Number of top schedules to keep during search.') ap.add_argument('-p', '--processes', type=int, default=multiprocessing.cpu_count() / 2, help='Number of parallel processes to use for search.') ap.add_argument('-v', '--verbose', action='store_true', help='Show progress and details.') return ap
def argparser(): ''' Argument parser. ''' ap = argparse.ArgumentParser() ap.add_argument('net', help='network name, should be a .py file under "nns". ' 'Choices: {}.'.format(', '.join(all_networks()))) ap.add_argument('--batch', type=int, required=True, help='batch size') ap.add_argument('--word', type=int, default=16, help='word size in bits') ap.add_argument('--nodes', type=int, nargs=2, required=True, metavar=('H', 'W'), help='Parallel node partitioning dimensions') ap.add_argument('--array', type=int, nargs=2, required=True, metavar=('H', 'W'), help='PE array dimensions') ap.add_argument('--regf', type=int, required=True, help='register file size in bytes per PE') ap.add_argument('--gbuf', type=int, required=True, help='global buffer size in bytes') ap.add_argument('--op-cost', type=float, default=1, help='cost of arithmetic operation') ap.add_argument('--hier-cost', type=float, nargs=4, default=[200, 6, 2, 1], metavar=('DRAM_COST', 'GBUF_COST', 'ITCN_COST', 'REGF_COST'), help='cost of access to memory hierarchy') ap.add_argument('--hop-cost', type=float, default=10, help='cost of access through one NoC hop') ap.add_argument('--unit-static-cost', type=float, default=0, help='static cost for unit execution time') ap.add_argument('--mem-type', default='2D', choices=['2D', '3D'], help='memory type. "2D" has memory only on edge nodes; ' '"3D" has memory vertially on top of all nodes.') ap.add_argument('--disable-bypass', nargs='*', default=[], choices=['i', 'o', 'f'], help='whether disallowing gbuf bypass for i (input), o ' '(output), or f (filter)') ap.add_argument('--solve-loopblocking', action='store_true', help='Use analytical solver to choose loop blocking. ' 'Otherwise use exhaustive search.') ap.add_argument( '--hybrid-partition', '--hybrid-partition2d', # deprecated old name action='store_true', help='Use hybrid partition for layer for node mapping. ' 'Otherwise use naive method based on layer type.') ap.add_argument('--batch-partition', action='store_true', help='Allow partitioning batch, i.e., consider data ' 'parallelism.') ap.add_argument('--ifmaps-partition', '--ifmap-partition', action='store_true', help='Allow partitioning ifmap channel dimension, which ' 'requires extra data synchronization.') ap.add_argument('-t', '--top', type=int, default=1, help='Number of top schedules to keep during search.') ap.add_argument('-p', '--processes', type=int, default=multiprocessing.cpu_count() / 2, help='Number of parallel processes to use for search.') ap.add_argument('-v', '--verbose', action='store_true', help='Show progress and details.') return ap
def test_import_network(self): ''' Get import_network. ''' for name in nns.all_networks(): network = nns.import_network(name) self.assertIsInstance(network, Network)
def setUp(self): self.net = {} net = Network('net1') # Linear. net.set_input_layer(InputLayer(10, 1)) net.add('0', FCLayer(10, 20)) net.add('1', FCLayer(20, 30)) net.add('1p', PoolingLayer(30, 1, 1)) net.add('2', FCLayer(30, 40)) net.add('3', FCLayer(40, 50)) self.net[net.net_name] = net net = Network('net2') # Long linear. net.set_input_layer(InputLayer(1, 1)) for idx in range(16): net.add(str(idx), FCLayer(1, 1)) self.net[net.net_name] = net net = Network('net3') # Fork. # /0-2\ /6- 7- 8\ # x 4-5 12 # \1-3/ \9-10-11/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('2', FCLayer(2, 1), prevs=('0', '1')) net.add('2p', PoolingLayer(1, 1, 1)) net.add('3', FCLayer(2, 1), prevs=('0', '1')) net.add('4', FCLayer(2, 1), prevs=('2p', '3')) net.add('5', FCLayer(1, 1)) net.add('5p', PoolingLayer(1, 1, 1)) net.add('6', FCLayer(1, 1), prevs='5p') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(1, 1), prevs='5p') net.add('10', FCLayer(1, 1)) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('8', '11')) self.net[net.net_name] = net net = Network('net4') # Complex fork. # /5 \ # 0-1-2-3-4-6-7-8-10-14 # \9/ # \11-12 / # \13 / net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1)) net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1)) net.add('4', FCLayer(1, 1)) net.add('5', FCLayer(1, 1), prevs='4') net.add('6', FCLayer(1, 1), prevs='4') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1), prevs='7') net.add('9', FCLayer(1, 1), prevs='7') net.add('10', FCLayer(1, 1)) net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10')) net.add('11', PoolingLayer(1, 1, 1), prevs='4') net.add('12', FCLayer(1, 1)) net.add('13', PoolingLayer(1, 1, 1), prevs='4') net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13')) self.net[net.net_name] = net net = Network('net5') # Corner cases. # ----\ # //1-2\ 7-8\ # 0-3-4-x 10-11-12 # \ \5/ 9 / \__/ # 6--/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(1, 1), prevs='3') net.add('5', FCLayer(1, 1), prevs='3') net.add('6', FCLayer(1, 1), prevs='0') net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('10', FCLayer(2, 1), prevs=('8', '9')) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('10', '11')) self.net[net.net_name] = net net = Network('net6') # Fmap sizes. net.set_input_layer(InputLayer(1, 24)) net.add('0', ConvLayer(1, 1, 24, 3)) net.add('1', ConvLayer(1, 1, 12, 3, strd=2)) net.add('1p', PoolingLayer(1, 6, 2)) net.add('2', ConvLayer(1, 1, 6, 3)) net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0')) self.net[net.net_name] = net net = Network('net7') # Topological order: see a visited vertex again. # /--- # 0-1-\\ # \2--2p net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='0') net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2')) self.net[net.net_name] = net net = Network('net8') # Forward to the middle. # /-\ # 0-1-2-2p-4-4p # \-3------/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='1') net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2')) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(2, 1), prevs='2p') net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4')) self.net[net.net_name] = net net = Network('net9') # Previous layers include input and others. net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0')) self.net[net.net_name] = net # Real networks. for net_name in all_networks(): self.net[net_name] = import_network(net_name) self.batch_size = 16 self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 4), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) self.ofmap_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), ), parts=(part, ))
def test_all_networks(self): ''' Get all_networks. ''' self.assertIn('alex_net', nns.all_networks()) self.assertIn('vgg_net', nns.all_networks()) self.assertGreater(len(nns.all_networks()), 5)