def test_add_unmatch_prev(self): ''' Modifier add unmatch prevs. ''' network = Network('test_net') network.set_input(InputLayer(3, 224)) network.add('c1', ConvLayer(3, 64, 224, 3)) with self.assertRaisesRegexp(ValueError, 'Network: .*c1.*p1.*mismatch fmap.*'): network.add('p1', PoolingLayer(64, 7, 2)) self.assertEqual(len(network), 1) with self.assertRaisesRegexp(ValueError, 'Network: .*c1.*c2.*mismatch fmap.*'): network.add('c2', ConvLayer(64, 128, 220, 3)) self.assertEqual(len(network), 1) with self.assertRaisesRegexp(ValueError, 'Network: .*merge.*c1.*p1.*'): network.add('p1', PoolingLayer(32, 7, 32)) self.assertEqual(len(network), 1) with self.assertRaisesRegexp(ValueError, 'Network: .*merge.*c1.*c2.*'): network.add('c2', ConvLayer(32, 128, 224, 3)) self.assertEqual(len(network), 1) network.add('c2', ConvLayer(64, 128, 224, 3)) with self.assertRaisesRegexp(ValueError, r'Network: .*merge.*c1\s*c2.*p1.*'): network.add('p1', PoolingLayer(128, 7, 32), prevs=('c1', 'c2')) self.assertEqual(len(network), 2)
def test_data_loops(self): ''' Get data_loops. ''' dls = LocalRegionLayer.data_loops() self.assertEqual(dls[de.FIL], DataDimLoops()) self.assertEqual(dls[de.IFM], DataDimLoops(le.OFM, le.BAT)) self.assertEqual(dls[de.OFM], DataDimLoops(le.OFM, le.BAT)) llayer = LocalRegionLayer(64, 28, 2, 1) player = PoolingLayer(64, 28, 2) self.assertTupleEqual(PoolingLayer.data_loops(), dls) self.assertTupleEqual(llayer.data_loops(), dls) self.assertTupleEqual(player.data_loops(), dls)
def setUp(self): self.layers = {} self.layers['BASE'] = ConvLayer(64, 64, 28, 3) self.layers['FC'] = FCLayer(4096, 1000, 6) self.layers['POOL'] = PoolingLayer(32, 7, 3, strd=2) self.layers['LR'] = LocalRegionLayer(32, 7, nreg=5, sreg=1) # With irregular nifm/nofm. self.layers['IRR'] = ConvLayer(255, 383, 13, 3) # With small numbers of fmaps. self.layers['SM'] = ConvLayer(5, 3, 13, 3) # Super small networks. No partitioning schemes. self.layers['SSM1'] = ConvLayer(1, 1, 2, 3) self.layers['SSM2'] = FCLayer(2, 2) self.layers['SSM3'] = PoolingLayer(1, 2, 2) self.batch_size = 8 self.dim_nodes = {} self.dim_nodes['BASE'] = PhyDim2(4, 4) self.dim_nodes['LG'] = PhyDim2(10, 10) self.dim_nodes['PRIME'] = PhyDim2(3, 3) self.options = {} # Irrelevant options. optdict = {'ntops': 10000} self.options['BASE'] = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, **optdict) self.options['NOBATP'] = Option(partition_hybrid=True, partition_batch=False, partition_ifmaps=True, **optdict) self.options['NOINPP'] = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=False, **optdict) self.options['NOHYB'] = Option(partition_hybrid=False, partition_batch=True, partition_ifmaps=False, **optdict) self.options['ACCFWD'] = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, hw_access_forwarding=True, **optdict) self.options['BUFSHR'] = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, hw_gbuf_sharing=True, **optdict)
def setUp(self): super(TestPipelineSegmentTiming, self).setUp() self.net1 = self.net['net1'] self.net4 = self.net['net4'] self.netlr = Network('net1') self.netlr.set_input_layer(InputLayer(10, 1)) self.netlr.add('0p1', PoolingLayer(10, 1, 1)) self.netlr.add('0p2', PoolingLayer(10, 1, 1)) self.netlr.add('0p3', PoolingLayer(10, 1, 1)) self.netlr.add('1', FCLayer(10, 20))
def test_hash(self): ''' Get hash. ''' l1 = Layer(2, 12) l2 = Layer(2, 12) self.assertEqual(hash(l1), hash(l2)) l1 = ConvLayer(2, 12, 56, 3) l2 = ConvLayer(2, 12, 56, 3) self.assertEqual(hash(l1), hash(l2)) l1 = PoolingLayer(12, 14, 2) l2 = PoolingLayer(12, 14, 2) self.assertEqual(hash(l1), hash(l2))
def setUp(self): # AlexNet. self.convlayers = OrderedDict() self.convlayers['conv1'] = ConvLayer(3, 96, 55, 11, 4) self.convlayers['conv2'] = ConvLayer(48, 256, 27, 5) self.convlayers['conv3'] = ConvLayer(256, 384, 13, 3) self.convlayers['conv4'] = ConvLayer(192, 384, 13, 3) self.convlayers['conv5'] = ConvLayer(192, 256, 13, 3) self.fclayers = {} self.fclayers['fc1'] = FCLayer(256, 4096, 6) self.fclayers['fc2'] = FCLayer(4096, 4096) self.fclayers['fc3'] = FCLayer(4096, 1000) # LocalRegionLayer. self.lrlayers = {} self.lrlayers['pool1'] = PoolingLayer(64, 7, 2) self.lrlayers['pool2'] = PoolingLayer(29, 13, 3) self.lrlayers['pool3'] = PoolingLayer(32, 7, 2, strd=3) self.lrlayers['lr1'] = LocalRegionLayer(32, 7, nreg=5, sreg=1) self.lrlayers['lr2'] = LocalRegionLayer(32, 7, nreg=5, sreg=1, strd=2) # Fake layers. self.fake_layers = {} # With irregular nifm/nofm. self.fake_layers['IRR'] = ConvLayer(255, 383, 13, 3) # With small numbers of fmaps. self.fake_layers['SM'] = ConvLayer(5, 3, 13, 3) # With large FIL height. self.fake_layers['LGFIL'] = ConvLayer(64, 64, 13, 22) # Resource. self.resource = {} proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM) # Eyeriss, ISSCC'16, JSSC'17. self.resource['BASE'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=PhyDim2(12, 14), size_gbuf=108 * 1024, size_regf=520, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False)
def test_eq(self): ''' Whether eq. ''' l1 = Layer(2, 12) l2 = Layer(2, 12) self.assertEqual(l1, l2) l1 = ConvLayer(2, 12, 56, 3) l2 = ConvLayer(2, 12, 56, 3) self.assertEqual(l1, l2) l1 = PoolingLayer(12, 14, 2) l2 = PoolingLayer(12, 14, 2) self.assertEqual(l1, l2) _ = l1 == 4
def test_poolinglayer(self): ''' PoolingLayer init. ''' player = PoolingLayer(64, 28, 2) self.assertEqual(player.ops_per_neuron(), 4) self.assertEqual(player.total_ifmap_size(), player.total_ofmap_size() * 4) player = PoolingLayer(64, 28, 3, strd=2) self.assertEqual(player.ops_per_neuron(), 9)
def setUp(self): self.layers = {} self.layers['BASE'] = ConvLayer(8, 16, 28, 3) self.layers['POOL'] = PoolingLayer(16, 28, 2) self.layers['LR'] = LocalRegionLayer(16, 28, nreg=3, sreg=1) self.batch_size = 4 self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, unit_static=50) self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DATA),), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64) self.options = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, ntops=10) self.ifmap_layouts = {} part = PartitionScheme(order=(pe.INPP, pe.BATP, pe.OUTP, pe.OFMP), pdims=((1, 2), (2, 1), (1, 2), (2, 1))) for wlkey in self.layers: self.ifmap_layouts[wlkey] = partition.get_ofmap_layout( self.layers[wlkey].input_layer(), self.batch_size, part, self.resource.src_data_region())
def test_part_layer(self): ''' Get part_layer. ''' batch_size = 16 layer = ConvLayer(32, 128, 28, 3) p_layer, p_batch_size, p_occ = self.ps1.part_layer(layer, batch_size) self.assertGreaterEqual(p_layer.hofm * self.ps1.dim(pe.OFMP).h, layer.hofm, 'part_layer: Conv: hofm') self.assertGreaterEqual(p_layer.wofm * self.ps1.dim(pe.OFMP).w, layer.wofm, 'part_layer: Conv: wofm') self.assertGreaterEqual(p_layer.nofm * self.ps1.size(pe.OUTP), layer.nofm, 'part_layer: Conv: nofm') self.assertGreaterEqual(p_layer.nifm * self.ps1.size(pe.INPP), layer.nifm, 'part_layer: Conv: nifm') self.assertGreaterEqual(p_batch_size * self.ps1.size(pe.BATP), 16, 'part_layer: Conv: batch_size') self.assertAlmostEqual(p_occ, 1. * (32 * 128 * 28 * 28 * 16) / (4 * 22 * 10 * 28 * 4 * self.ps1.size())) layer = PoolingLayer(128, 112, 2) p_layer, p_batch_size, p_occ = self.ps2.part_layer(layer, batch_size) self.assertGreaterEqual(p_layer.hofm * self.ps2.dim(pe.OFMP).h, layer.hofm, 'part_layer: Pooling: hofm') self.assertGreaterEqual(p_layer.wofm * self.ps2.dim(pe.OFMP).w, layer.wofm, 'part_layer: Pooling: wofm') self.assertGreaterEqual(p_layer.nofm * self.ps2.size(pe.OUTP), layer.nofm, 'part_layer: Pooling: nofm') self.assertGreaterEqual(p_layer.nifm, p_layer.nofm, 'part_layer: Pooling: nifm') self.assertGreaterEqual(p_batch_size * self.ps2.size(pe.BATP), 16, 'part_layer: Pooling: batch_size') self.assertAlmostEqual(p_occ, 1. * (128 * 112 * 112 * 16) / (32 * 23 * 23 * 2 * self.ps2.size()))
def setUp(self): ''' Set up. ''' self.network = Network('test_net') self.network.set_input(InputLayer(3, 224)) self.network.add('c1', ConvLayer(3, 64, 224, 3)) self.network.add('p1', PoolingLayer(64, 7, 32)) self.network.add('f1', FCLayer(64, 1000, 7))
def test_add_no_prev(self): ''' Modifier add no prevs. ''' network = Network('test_net') network.set_input(InputLayer(3, 224)) network.add('c1', ConvLayer(3, 64, 224, 3)) with self.assertRaisesRegexp(KeyError, 'Network: .*prev.*p1.*'): network.add('p1', PoolingLayer(64, 7, 32), prevs='p1')
def test_vertex_no_merge_lr(self): ''' LocalRegionLayer has no previous layer to merge with. ''' net = Network('tmp_net') net.set_input_layer(InputLayer(30, 1)) net.add('0', PoolingLayer(30, 1, 1)) net.add('1', FCLayer(30, 40)) net.add('1p', PoolingLayer(40, 1, 1)) ilp = self._make_ilp(net) for layer in net: vidx = ilp.dag_vertex_dict[layer] self.assertIn(layer, ilp.dag_vertex_list[vidx]) # Layer is named by topological order. self.assertTrue(layer.startswith(str(vidx)))
def setUp(self): self.network = Network('test_net') self.network.set_input(InputLayer(3, 224)) self.network.add('c1', ConvLayer(3, 64, 224, 3)) self.network.add('p1', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('p2', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('f1', FCLayer(64, 1000, 7), prevs=['p1', 'p2']) self.batch_size = 4 self.input_layout = partition.get_ofmap_layout( self.network.input_layer(), self.batch_size, PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(2, 1), type=NodeRegion.DATA)) self.c1res = SchedulingResult( dict_loop=OrderedDict([('cost', 1.), ('time', 2.), ('ops', 4.), ('access', [[7, 8, 9]] * me.NUM), ]), dict_part=OrderedDict([('cost', 0.5), ('total_nhops', [4, 5, 6]), ('num_nodes', 4), ]), ofmap_layout=partition.get_ofmap_layout( self.network['c1'], self.batch_size, PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DATA))) self.pres = SchedulingResult( dict_loop=OrderedDict([('cost', 0.1), ('time', 0.05), ('ops', 0.1), ('access', [[.7, .8, .9]] * me.NUM), ]), dict_part=OrderedDict([('cost', 0.5), ('total_nhops', [.4, .5, .6]), ('num_nodes', 2), ]), ofmap_layout=partition.get_ofmap_layout( self.network['p1'], self.batch_size, PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DATA))) self.dtfl = NNDataflowScheme(self.network, self.input_layout) self.dtfl['c1'] = self.c1res self.dtfl['p1'] = self.pres self.dtfl['p2'] = self.pres
def setUp(self): self.layers = {} self.layers['BASE'] = ConvLayer(8, 16, 28, 3) self.layers['POOL'] = PoolingLayer(16, 28, 2) self.layers['LR'] = LocalRegionLayer(16, 28, nreg=3, sreg=1) self.batch_size = 4 self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, idl_unit=50) self.none_cstr = SchedulingConstraint() self.cstr = SchedulingConstraint(topofm=1, topbat=self.batch_size) self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.options = Option(partition_hybrid=True, partition_batch=True, partition_ifmaps=True, ntops=10) self.ifmap_layouts = {} part = PartitionScheme(order=(pe.INPP, pe.BATP, pe.OUTP, pe.OFMP), pdims=((1, 2), (2, 1), (1, 2), (2, 1))) for wlkey in self.layers: input_layer = self.layers[wlkey].input_layer() self.ifmap_layouts[wlkey] = DataLayout( frngs=(FmapRange((0, 0, 0, 0), FmapPosition(b=self.batch_size, n=input_layer.nofm, h=input_layer.hofm, w=input_layer.wofm)), ), regions=(self.resource.src_data_region, ), parts=(part.projection(self.resource.src_data_region, appl2frng=True), )) self.sched_seq = (2, 0, 1)
def test_repr(self): ''' __repr__. ''' # pylint: disable=eval-used for l in [ LocalRegionLayer(64, 28, 2, 1), LocalRegionLayer(64, [28, 14], 1, [2, 4]), LocalRegionLayer(64, [28, 14], 1, [2, 4], 7), LocalRegionLayer(64, 28, 1, 4, 7) ]: self.assertIn('LocalRegionLayer', repr(l)) self.assertEqual(eval(repr(l)), l) for l in [ PoolingLayer(64, 28, 2), PoolingLayer(64, 28, 3, strd=2), PoolingLayer(64, [28, 14], [3, 4], strd=[2, 3]) ]: self.assertIn('PoolingLayer', repr(l)) self.assertEqual(eval(repr(l)), l)
def test_is_valid_padding_sifm(self): ''' is_valid_padding_sifm. ''' clayer = ConvLayer(3, 64, [28, 14], [3, 1], [2, 4]) self.assertTrue(clayer.is_valid_padding_sifm([28 * 2, 14 * 4])) self.assertTrue(clayer.is_valid_padding_sifm([27 * 2 + 3, 13 * 4 + 1])) self.assertFalse(clayer.is_valid_padding_sifm([28, 14])) self.assertFalse(clayer.is_valid_padding_sifm([28 * 2, 14])) self.assertTrue(clayer.is_valid_padding_sifm([27 * 2 + 3, 13 * 4 + 3])) flayer = FCLayer(2048, 4096, sfil=2) self.assertTrue(flayer.is_valid_padding_sifm(2)) self.assertTrue(flayer.is_valid_padding_sifm(1)) self.assertTrue(flayer.is_valid_padding_sifm([1, 2])) llayer = LocalRegionLayer(64, 28, 2, 1) self.assertTrue(llayer.is_valid_padding_sifm(28)) self.assertFalse(llayer.is_valid_padding_sifm(28 - 1)) self.assertFalse(llayer.is_valid_padding_sifm(28 + 1)) player = PoolingLayer(64, 28, [2, 3], strd=[3, 2]) self.assertTrue(player.is_valid_padding_sifm([28 * 3, 28 * 2])) self.assertTrue(player.is_valid_padding_sifm([27 * 3 + 2, 27 * 2 + 3]))
program. If not, see <https://opensource.org/licenses/BSD-3-Clause>. """ from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer ''' ZFNet Zeiler and Fergus, 2013 ''' NN = Network('ZFNet') NN.set_input(InputLayer(3, 224)) NN.add('conv1', ConvLayer(3, 96, 110, 7, 2)) NN.add('pool1', PoolingLayer(96, 55, 3, strd=2)) # Norm layer is ignored. NN.add('conv2', ConvLayer(96, 256, 26, 5, 2)) NN.add('pool2', PoolingLayer(256, 13, 3, strd=2)) # Norm layer is ignored. NN.add('conv3', ConvLayer(256, 512, 13, 3)) NN.add('conv4', ConvLayer(512, 1024, 13, 3)) NN.add('conv5', ConvLayer(1024, 512, 13, 3)) NN.add('pool3', PoolingLayer(512, 6, 3, strd=2)) NN.add('fc1', FCLayer(512, 4096, 6)) NN.add('fc2', FCLayer(4096, 4096)) NN.add('fc3', FCLayer(4096, 1000))
def setUp(self): self.net = {} net = Network('net1') # Linear. net.set_input_layer(InputLayer(10, 1)) net.add('0', FCLayer(10, 20)) net.add('1', FCLayer(20, 30)) net.add('1p', PoolingLayer(30, 1, 1)) net.add('2', FCLayer(30, 40)) net.add('3', FCLayer(40, 50)) self.net[net.net_name] = net net = Network('net2') # Long linear. net.set_input_layer(InputLayer(1, 1)) for idx in range(16): net.add(str(idx), FCLayer(1, 1)) self.net[net.net_name] = net net = Network('net3') # Fork. # /0-2\ /6- 7- 8\ # x 4-5 12 # \1-3/ \9-10-11/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY) net.add('2', FCLayer(2, 1), prevs=('0', '1')) net.add('2p', PoolingLayer(1, 1, 1)) net.add('3', FCLayer(2, 1), prevs=('0', '1')) net.add('4', FCLayer(2, 1), prevs=('2p', '3')) net.add('5', FCLayer(1, 1)) net.add('5p', PoolingLayer(1, 1, 1)) net.add('6', FCLayer(1, 1), prevs='5p') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(1, 1), prevs='5p') net.add('10', FCLayer(1, 1)) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('8', '11')) self.net[net.net_name] = net net = Network('net4') # Complex fork. # /5 \ # 0-1-2-3-4-6-7-8-10-14 # \9/ # \11-12 / # \13 / net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1)) net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1)) net.add('4', FCLayer(1, 1)) net.add('5', FCLayer(1, 1), prevs='4') net.add('6', FCLayer(1, 1), prevs='4') net.add('7', FCLayer(1, 1)) net.add('8', FCLayer(1, 1), prevs='7') net.add('9', FCLayer(1, 1), prevs='7') net.add('10', FCLayer(1, 1)) net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10')) net.add('11', PoolingLayer(1, 1, 1), prevs='4') net.add('12', FCLayer(1, 1)) net.add('13', PoolingLayer(1, 1, 1), prevs='4') net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13')) self.net[net.net_name] = net net = Network('net5') # Corner cases. # ----\ # //1-2\ 7-8\ # 0-3-4-x 10-11-12 # \ \5/ 9 / \__/ # 6--/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1)) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(1, 1), prevs='3') net.add('5', FCLayer(1, 1), prevs='3') net.add('6', FCLayer(1, 1), prevs='0') net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('8', FCLayer(1, 1)) net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6')) net.add('10', FCLayer(2, 1), prevs=('8', '9')) net.add('11', FCLayer(1, 1)) net.add('12', FCLayer(2, 1), prevs=('10', '11')) self.net[net.net_name] = net net = Network('net6') # Fmap sizes. net.set_input_layer(InputLayer(1, 24)) net.add('0', ConvLayer(1, 1, 24, 3)) net.add('1', ConvLayer(1, 1, 12, 3, strd=2)) net.add('1p', PoolingLayer(1, 6, 2)) net.add('2', ConvLayer(1, 1, 6, 3)) net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0')) self.net[net.net_name] = net net = Network('net7') # Topological order: see a visited vertex again. # /--- # 0-1-\\ # \2--2p net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='0') net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2')) self.net[net.net_name] = net net = Network('net8') # Forward to the middle. # /-\ # 0-1-2-2p-4-4p # \-3------/ net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(1, 1), prevs='0') net.add('2', FCLayer(1, 1), prevs='1') net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2')) net.add('3', FCLayer(1, 1), prevs='0') net.add('4', FCLayer(2, 1), prevs='2p') net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4')) self.net[net.net_name] = net net = Network('net9') # Previous layers include input and others. net.set_input_layer(InputLayer(1, 1)) net.add('0', FCLayer(1, 1)) net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0')) self.net[net.net_name] = net # Real networks. for net_name in all_networks(): self.net[net_name] = import_network(net_name) self.batch_size = 16 self.resource = Resource( proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.PROC), dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 8), type=NodeRegion.DRAM), src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dst_data_region=NodeRegion(origin=PhyDim2(0, 4), dim=PhyDim2(8, 4), type=NodeRegion.DRAM), dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) self.ofmap_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM), ), parts=(part, ))
def setUp(self): self.network = Network('test_net') self.network.set_input_layer(InputLayer(3, 224)) self.network.add('c1', ConvLayer(3, 64, 224, 3)) self.network.add('p1', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('p2', PoolingLayer(64, 7, 32), prevs='c1') self.network.add('f1', FCLayer(128, 1000, 7), prevs=['p1', 'p2']) self.batch_size = 4 input_layer = self.network.input_layer() self.input_layout = DataLayout( frngs=(FmapRange((0, 0, 0, 0), FmapPosition(b=self.batch_size, n=input_layer.nofm, h=input_layer.hofm, w=input_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(2, 1), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )) c1_layer = self.network['c1'] self.c1res = SchedulingResult( scheme=OrderedDict([ ('cost', 1.5), ('time', 200.), ('ops', 4.), ('num_nodes', 4), ('cost_op', 0.5), ('cost_access', 1.), ('cost_noc', 0), ('cost_static', 0), ('proc_time', 200), ('bus_time', 0), ('dram_time', 0), ('access', [[7, 8, 9]] * me.NUM), ('remote_gbuf_access', [0] * 3), ('total_nhops', [4, 5, 6]), ('fetch', [[1, 1, 1], [2, 2, 2]]), ('ti', [2, 2, 3]), ('to', [1, 2, 3]), ('tb', [1, 2, 3]), ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]), ('orders', [range(3)] * 2), ]), ofmap_layout=DataLayout( frngs=(FmapRange( (0, 0, 0, 0), FmapPosition(b=self.batch_size, n=c1_layer.nofm, h=c1_layer.hofm, w=c1_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )), sched_seq=(0, 0, 0)) p1_layer = self.network['p1'] self.p1res = SchedulingResult( scheme=OrderedDict([ ('cost', 0.6), ('time', 5), ('ops', 0.1), ('num_nodes', 2), ('cost_op', 0.1), ('cost_access', 0.5), ('cost_noc', 0), ('cost_static', 0), ('proc_time', 5), ('bus_time', 0), ('dram_time', 0), ('access', [[.7, .8, .9]] * me.NUM), ('remote_gbuf_access', [0] * 3), ('total_nhops', [.4, .5, .6]), ('fetch', [[1, 1, 1], [2, 2, 2]]), ('ti', [2, 2, 3]), ('to', [1, 2, 3]), ('tb', [1, 2, 3]), ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]), ('orders', [range(3)] * 2), ]), ofmap_layout=DataLayout( frngs=(FmapRange( (0, 0, 0, 0), FmapPosition(b=self.batch_size, n=p1_layer.nofm, h=p1_layer.hofm, w=p1_layer.wofm)), ), regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 2), type=NodeRegion.DRAM), ), parts=(PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM), )), sched_seq=(0, 1, 0)) self.p2res = SchedulingResult(scheme=self.p1res.scheme, ofmap_layout=self.p1res.ofmap_layout, sched_seq=(0, 2, 0)) self.dtfl = NNDataflowScheme(self.network, self.input_layout) self.dtfl['c1'] = self.c1res self.dtfl['p1'] = self.p1res self.dtfl['p2'] = self.p2res
def setUp(self): # Workload. self.layer = {} self.layer['BASE'] = ConvLayer(12, 10, 28, 3) self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20) self.layer['POOL'] = PoolingLayer(32, 28, 2) self.batch_size = 4 # Resource. self.resource = {} dim_array = PhyDim2(16, 16) proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_regions = (NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DATA), ) # Typical resource. self.resource['BASE'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=65536, size_regf=64) # Larger resource with sufficient capacity, to make all schemes valid. self.resource['LG'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3) # Small resource. self.resource['SM'] = Resource(proc_region=proc_region, data_regions=data_regions, dim_array=dim_array, size_gbuf=4096, size_regf=16) # Nested loop description after mapping. self.nld = {} self.nld['BASE'] = next( MapStrategyEyeriss(self.layer['BASE'], self.batch_size, dim_array).gen_nested_loop_desc()) self.nld['LGFIL'] = next( MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, dim_array).gen_nested_loop_desc()) self.nld['POOL'] = next( MapStrategyEyeriss(self.layer['POOL'], self.batch_size, dim_array).gen_nested_loop_desc()) # Fake nested loop, with zero filter size. self.nld['ZERO_FIL'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(0, 1000, 800), usize_regf=(0, 3, 1), unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake nested loop, with zero ifmap size. self.nld['ZERO_IFM'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(9, 0, 800), usize_regf=(3, 0, 1), unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Options. self.options = {} # Basic. self.options['BASE'] = Option(ntops=2**30) # Multiprocessing. self.options['MP'] = Option(ntops=2**30, nprocesses=8) # Limited top schemes. self.options['NTOPS'] = Option(ntops=10) # Bypass. self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30) # Bypass solver. self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3, sw_solve_loopblocking=True, ntops=2**30) # Cost. self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, unit_static=50) # Partition occupation. self.part_occ = 0.91
from nn_dataflow.core import InputLayer, ConvLayer, PoolingLayer ''' ResNet-152 He, Zhang, Ren, and Sun, 2015 ''' NN = Network('ResNet') NN.set_input(InputLayer(3, 224)) _PREVS = None NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) NN.add('pool1', PoolingLayer(64, 56, 2)) for i in range(1, 4): NN.add('conv2_{}_a'.format(i), ConvLayer(64, 64, 56, 1) if i == 1 else ConvLayer(256, 64, 56, 1), prevs=_PREVS) NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3)) NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1)) # With residual shortcut. if i == 1: # Residual does not cross module. _PREVS = None else: _PREVS = ('conv2_{}_c'.format(i), 'conv2_{}_c'.format(i - 1))
from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer ''' AlexNet Krizhevsky, Sutskever, and Hinton, 2012 ''' NN = Network('AlexNet') NN.set_input(InputLayer(3, 224)) NN.add('conv1_a', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,)) NN.add('conv1_b', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,)) NN.add('pool1_a', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_a',)) NN.add('pool1_b', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_b',)) # Norm layer is ignored. NN.add('conv2_a', ConvLayer(48, 128, 27, 5), prevs=('pool1_a',)) NN.add('conv2_b', ConvLayer(48, 128, 27, 5), prevs=('pool1_b',)) NN.add('pool2_a', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_a',)) NN.add('pool2_b', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_b',)) # Norm layer is ignored. NN.add('conv3_a', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b')) NN.add('conv3_b', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b')) NN.add('conv4_a', ConvLayer(192, 192, 13, 3), prevs=('conv3_a',)) NN.add('conv4_b', ConvLayer(192, 192, 13, 3), prevs=('conv3_b',)) NN.add('conv5_a', ConvLayer(192, 128, 13, 3), prevs=('conv4_a',)) NN.add('conv5_b', ConvLayer(192, 128, 13, 3), prevs=('conv4_b',))
""" from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer ''' GoogLeNet ILSVRC 2014 ''' NN = Network('GoogLeNet') NN.set_input_layer(InputLayer(3, 224)) NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) NN.add('pool1', PoolingLayer(64, 56, 3, strd=2)) # Norm layer is ignored. NN.add('conv2_3x3_reduce', ConvLayer(64, 64, 56, 1)) NN.add('conv2_3x3', ConvLayer(64, 192, 56, 3)) # Norm layer is ignored. NN.add('pool2', PoolingLayer(192, 28, 3, strd=2)) def add_inception(network, incp_id, sfmap, nfmaps_in, nfmaps_1, nfmaps_3r, nfmaps_3, nfmaps_5r, nfmaps_5, nfmaps_pool, prevs): ''' Add an inception module to the network. ''' pfx = 'inception_{}_'.format(incp_id) # 1x1 branch. network.add(pfx + '1x1', ConvLayer(nfmaps_in, nfmaps_1, sfmap, 1),
def setUp(self): # Workload. self.layer = {} self.layer['BASE'] = ConvLayer(12, 10, 28, 3) self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20) self.layer['POOL'] = PoolingLayer(32, 28, 2) self.layer['PAR'] = ConvLayer(24, 36, 56, 3) self.batch_size = 4 # Resource. self.resource = {} dim_array = PhyDim2(16, 16) proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.PROC) data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), type=NodeRegion.DRAM) # Typical resource. self.resource['BASE'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=65536, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Larger resource with sufficient capacity, to make all schemes valid. self.resource['LG'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Small resource. self.resource['SM'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=4096, size_regf=16, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Multi-node parallel resource. self.resource['PAR'] = Resource(proc_region=NodeRegion( origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC), dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=25000, size_regf=64, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Resource with no data regions. proc_data_region = NodeRegion(origin=PhyDim2(1, 1), dim=PhyDim2(1, 1), type=NodeRegion.PROC) self.resource['SRCNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=proc_data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DSTNOTDATA'] = Resource( proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=proc_data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) self.resource['DATALOCAL'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=proc_region, dst_data_region=proc_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=False) # Filter pinning. self.resource['FILPIN'] = Resource(proc_region=proc_region, dram_region=data_region, src_data_region=data_region, dst_data_region=data_region, dim_array=dim_array, size_gbuf=1024**3, size_regf=1024**3, array_bus_width=float('inf'), dram_bandwidth=float('inf'), no_time_mux=True) # Nested loop description after mapping. self.nld = {} self.nld['BASE'] = next( MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['LGFIL'] = next( MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) self.nld['POOL'] = next( MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1, dim_array).gen_nested_loop_desc()) # Fake nested loop, with zero filter size. self.nld['ZERO_FIL'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(0, 1000, 800), usize_regf=(0, 3, 1), unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake nested loop, with zero ifmap size. self.nld['ZERO_IFM'] = NestedLoopDesc( loopcnt=(12, 10, 4), usize_gbuf=(9, 0, 800), usize_regf=(3, 0, 1), unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)), data_loops=(DataDimLoops(le.IFM, le.OFM), DataDimLoops(le.IFM, le.BAT), DataDimLoops(le.OFM, le.BAT)), unit_ops=1, unit_time=1) # Fake partition scheme. self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM) # Fake buffer sharing scheme. self.bufshr = BufShrScheme(proc_region, self.part) # Options. self.options = {} # Basic. self.options['BASE'] = Option(ntops=2**30) # Multiprocessing. self.options['MP'] = Option(ntops=2**30, nprocesses=8) # Limited top schemes. self.options['NTOPS'] = Option(ntops=10) # Bypass. self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30) # Bypass solver. self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3, sw_solve_loopblocking=True, ntops=2**30) # Access forwarding. self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30) # Buffer sharing. self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30) # Buffer sharing with bypassing. self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, hw_gbuf_sharing=True, ntops=2**30) # Constraint. self.none_cstr = SchedulingConstraint() self.cstr = SchedulingConstraint(topifm=1, topbat=1) # Cost. self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1), noc_hop=50, idl_unit=50)
from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \ PoolingLayer, EltwiseLayer ''' ResNet-152 He, Zhang, Ren, and Sun, 2015 ''' NN = Network('ResNet') NN.set_input_layer(InputLayer(3, 224)) NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) NN.add('pool1', PoolingLayer(64, 56, 3, 2)) RES_PREV = 'pool1' for i in range(3): NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1)) NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3)) NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1)) # With residual shortcut. if i == 0: NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV, )) RES_PREV = 'conv2_br' NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2), prevs=(RES_PREV, 'conv2_{}_c'.format(i)))
def test_part_layer_invalid_inpart(self): ''' Get part_layer invalid INPP. ''' with self.assertRaisesRegexp(ValueError, 'PartitionScheme: .*input.*'): _ = self.ps1.part_layer(PoolingLayer(self.ps1.size(pe.OUTP), self.ps1.size(pe.OFMP), 2), self.ps1.size(pe.BATP))
from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer ''' VGGNet-16 Simonyan and Zisserman, 2014 ''' NN = Network('VGG') NN.set_input_layer(InputLayer(3, 224)) NN.add('conv1', ConvLayer(3, 64, 224, 3)) NN.add('conv2', ConvLayer(64, 64, 224, 3)) NN.add('pool1', PoolingLayer(64, 112, 2)) NN.add('conv3', ConvLayer(64, 128, 112, 3)) NN.add('conv4', ConvLayer(128, 128, 112, 3)) NN.add('pool2', PoolingLayer(128, 56, 2)) NN.add('conv5', ConvLayer(128, 256, 56, 3)) NN.add('conv6', ConvLayer(256, 256, 56, 3)) NN.add('conv7', ConvLayer(256, 256, 56, 3)) NN.add('pool3', PoolingLayer(256, 28, 2)) NN.add('conv8', ConvLayer(256, 512, 28, 3)) NN.add('conv9', ConvLayer(512, 512, 28, 3)) NN.add('conv10', ConvLayer(512, 512, 28, 3)) NN.add('pool4', PoolingLayer(512, 14, 2))
""" $lic$ Copyright (C) 2016-2019 by The Board of Trustees of Stanford University This program is free software: you can redistribute it and/or modify it under the terms of the Modified BSD-3 License as published by the Open Source Initiative. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the BSD-3 License for more details. You should have received a copy of the Modified BSD-3 License along with this program. If not, see <https://opensource.org/licenses/BSD-3-Clause>. """ from nn_dataflow.core import Network from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer ''' net1 ''' NN = Network('net1') # Linear. NN.set_input_layer(InputLayer(10, 1)) NN.add('0', FCLayer(10, 20)) NN.add('1', FCLayer(20, 30)) NN.add('1p', PoolingLayer(30, 1, 1)) NN.add('2', FCLayer(30, 40)) NN.add('3', FCLayer(40, 50))