Exemplo n.º 1
0
 def test_invalid_data_region(self):
     ''' Invalid src/dst_proc_region. '''
     with self.assertRaisesRegex(TypeError, 'Resource: .*src_data_.*'):
         _ = Resource(
             proc_region=self.proc_region,
             dram_region=self.dram_region,
             src_data_region=PhyDim2(2, 1),
             dst_data_region=self.dst_data_region,
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=8,
             dram_bandwidth=128,
             no_time_mux=False,
         )
     with self.assertRaisesRegex(TypeError, 'Resource: .*dst_data_.*'):
         _ = Resource(
             proc_region=self.proc_region,
             dram_region=self.dram_region,
             src_data_region=self.src_data_region,
             dst_data_region=PhyDim2(2, 1),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=8,
             dram_bandwidth=128,
             no_time_mux=False,
         )
Exemplo n.º 2
0
    def setUp(self):

        self.alex_net = import_network('alex_net')
        self.vgg_net = import_network('vgg_net')

        self.map_strategy = MapStrategyEyeriss

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=512 // 2,  # 512 B
        )

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         unit_static=0)

        self.options = Option()
    def setUp(self):

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        self.none_cstr = SchedulingConstraint()

        part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
        self.ifmap_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ),
            regions=(self.resource.src_data_region, ),
            parts=(part, ))

        self.sched_seq = (2, 0, 0)
Exemplo n.º 4
0
    def test_pernode_sched_cache_key(self):
        ''' Per-node scheduling cache key must be hash-able. '''
        layer = self.layers['BASE']
        ifmap_layout = self.ifmap_layouts['BASE']

        schd = Scheduling(layer, self.batch_size, self.cost,
                          MapStrategyEyeriss)

        condition = SchedulingCondition(resource=self.resource,
                                        ifmap_layout=ifmap_layout)

        _ = schd.schedule_search(condition, self.options)

        h, m = schd.cache_stats()
        self.assertEqual(h, 0)

        # Make another instance.
        rsrc = Resource(**self.resource._asdict())
        opts = Option(**self.options._asdict())
        self.assertNotEqual(id(rsrc), id(self.resource))
        self.assertNotEqual(id(opts), id(self.options))

        part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP),
                               pdims=((2, 2), (2, 2), (1, 1), (1, 1)))

        _ = schd.schedule_search_per_node(part, rsrc, opts)

        h2, m2 = schd.cache_stats()
        self.assertEqual(h2, h + 1)
        self.assertEqual(m2, m)
Exemplo n.º 5
0
    def setUp(self):

        self.layers = {}
        self.layers['BASE'] = ConvLayer(8, 16, 28, 3)
        self.layers['POOL'] = PoolingLayer(16, 28, 2)
        self.layers['LR'] = LocalRegionLayer(16, 28, nreg=3, sreg=1)

        self.batch_size = 4

        self.cost = Cost(mac_op=1, mem_hier=(200, 6, 2, 1),
                         noc_hop=50, unit_static=50)

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(4, 1),
                                     type=NodeRegion.DATA),),
            dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64)

        self.options = Option(partition_hybrid=True, partition_batch=True,
                              partition_ifmaps=True, ntops=10)

        self.ifmap_layouts = {}
        part = PartitionScheme(order=(pe.INPP, pe.BATP, pe.OUTP, pe.OFMP),
                               pdims=((1, 2), (2, 1), (1, 2), (2, 1)))
        for wlkey in self.layers:
            self.ifmap_layouts[wlkey] = partition.get_ofmap_layout(
                self.layers[wlkey].input_layer(), self.batch_size, part,
                self.resource.src_data_region())
Exemplo n.º 6
0
    def setUp(self):

        self.layers = {}
        self.layers['BASE'] = ConvLayer(8, 16, 28, 3)
        self.layers['POOL'] = PoolingLayer(16, 28, 2)
        self.layers['LR'] = LocalRegionLayer(16, 28, nreg=3, sreg=1)

        self.batch_size = 4

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         idl_unit=50)

        self.none_cstr = SchedulingConstraint()
        self.cstr = SchedulingConstraint(topofm=1, topbat=self.batch_size)

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 1),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 1),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        self.options = Option(partition_hybrid=True,
                              partition_batch=True,
                              partition_ifmaps=True,
                              ntops=10)

        self.ifmap_layouts = {}
        part = PartitionScheme(order=(pe.INPP, pe.BATP, pe.OUTP, pe.OFMP),
                               pdims=((1, 2), (2, 1), (1, 2), (2, 1)))
        for wlkey in self.layers:
            input_layer = self.layers[wlkey].input_layer()
            self.ifmap_layouts[wlkey] = DataLayout(
                frngs=(FmapRange((0, 0, 0, 0),
                                 FmapPosition(b=self.batch_size,
                                              n=input_layer.nofm,
                                              h=input_layer.hofm,
                                              w=input_layer.wofm)), ),
                regions=(self.resource.src_data_region, ),
                parts=(part.projection(self.resource.src_data_region,
                                       appl2frng=True), ))

        self.sched_seq = (2, 0, 1)
Exemplo n.º 7
0
 def test_invalid_array_bus_width(self):
     ''' Invalid array_bus_width. '''
     with self.assertRaisesRegex(TypeError,
                                 'Resource: .*array_bus_width.*'):
         _ = Resource(
             proc_region=self.proc_region,
             dram_region=self.dram_region,
             src_data_region=self.src_data_region,
             dst_data_region=self.dst_data_region,
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=1.2,
             dram_bandwidth=128,
             no_time_mux=False,
         )
     with self.assertRaisesRegex(ValueError,
                                 'Resource: .*array_bus_width.*'):
         _ = Resource(
             proc_region=self.proc_region,
             dram_region=self.dram_region,
             src_data_region=self.src_data_region,
             dst_data_region=self.dst_data_region,
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=-2,
             dram_bandwidth=128,
             no_time_mux=False,
         )
     with self.assertRaisesRegex(ValueError,
                                 'Resource: .*array_bus_width.*'):
         _ = Resource(
             proc_region=self.proc_region,
             dram_region=self.dram_region,
             src_data_region=self.src_data_region,
             dst_data_region=self.dst_data_region,
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=0,
             dram_bandwidth=128,
             no_time_mux=False,
         )
Exemplo n.º 8
0
    def setUp(self):

        self.alex_net = import_network('alex_net')
        self.vgg_net = import_network('vgg_net')

        net = Network('simple')
        net.set_input_layer(InputLayer(4, 2))
        net.add('1', ConvLayer(4, 4, 2, 1))
        net.add('2', ConvLayer(4, 4, 2, 1))
        # Two more layers to avoid single-segment case.
        net.add('a1', ConvLayer(4, 1, 1, 1, strd=2))
        net.add('a2', ConvLayer(1, 1, 1, 1))
        self.simple_net = net

        net = Network('complex')
        net.set_input_layer(InputLayer(8, 8))
        net.add('1', ConvLayer(8, 8, 8, 1))
        net.add('2a', ConvLayer(8, 8, 8, 1), prevs=('1', ))
        net.add('3a', ConvLayer(8, 8, 8, 1))
        net.add('2b', ConvLayer(8, 8, 8, 1), prevs=('1', ))
        net.add('3b', ConvLayer(8, 8, 8, 1))
        net.add('4', ConvLayer(16, 8, 8, 1), prevs=('3a', '3b'))
        self.complex_net = net

        self.map_strategy = MapStrategyEyeriss

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=512 // 2,  # 512 B
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         idl_unit=0)

        self.options = Option()
Exemplo n.º 9
0
 def test_invalid_data_regions_type(self):
     ''' Invalid data_regions type. '''
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=[
                 NodeRegion(dim=PhyDim2(2, 1),
                            origin=PhyDim2(0, 0),
                            type=NodeRegion.DATA),
             ],
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=NodeRegion(dim=PhyDim2(2, 1),
                                     origin=PhyDim2(0, 0),
                                     type=NodeRegion.DATA),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=(NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 0),
                                      type=NodeRegion.DATA), PhyDim2(2, 1)),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
    def setUp(self):

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA),),
            dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64)

        frmap = FmapRangeMap()
        frmap.add(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), (PhyDim2(0, 0),))
        self.ifmap_layout = DataLayout(origin=PhyDim2(0, 0), frmap=frmap,
                                       type=NodeRegion.DATA)
    def setUp(self):

        # AlexNet.
        self.convlayers = OrderedDict()
        self.convlayers['conv1'] = ConvLayer(3, 96, 55, 11, 4)
        self.convlayers['conv2'] = ConvLayer(48, 256, 27, 5)
        self.convlayers['conv3'] = ConvLayer(256, 384, 13, 3)
        self.convlayers['conv4'] = ConvLayer(192, 384, 13, 3)
        self.convlayers['conv5'] = ConvLayer(192, 256, 13, 3)
        self.fclayers = {}
        self.fclayers['fc1'] = FCLayer(256, 4096, 6)
        self.fclayers['fc2'] = FCLayer(4096, 4096)
        self.fclayers['fc3'] = FCLayer(4096, 1000)

        # LocalRegionLayer.
        self.lrlayers = {}
        self.lrlayers['pool1'] = PoolingLayer(64, 7, 2)
        self.lrlayers['pool2'] = PoolingLayer(29, 13, 3)
        self.lrlayers['pool3'] = PoolingLayer(32, 7, 2, strd=3)
        self.lrlayers['lr1'] = LocalRegionLayer(32, 7, nreg=5, sreg=1)
        self.lrlayers['lr2'] = LocalRegionLayer(32, 7, nreg=5, sreg=1, strd=2)

        # Fake layers.
        self.fake_layers = {}
        # With irregular nifm/nofm.
        self.fake_layers['IRR'] = ConvLayer(255, 383, 13, 3)
        # With small numbers of fmaps.
        self.fake_layers['SM'] = ConvLayer(5, 3, 13, 3)
        # With large FIL height.
        self.fake_layers['LGFIL'] = ConvLayer(64, 64, 13, 22)

        # Resource.
        self.resource = {}
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        # Eyeriss, ISSCC'16, JSSC'17.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         dram_region=data_region,
                                         src_data_region=data_region,
                                         dst_data_region=data_region,
                                         dim_array=PhyDim2(12, 14),
                                         size_gbuf=108 * 1024,
                                         size_regf=520,
                                         array_bus_width=float('inf'),
                                         dram_bandwidth=float('inf'),
                                         no_time_mux=False)
Exemplo n.º 12
0
 def test_invalid_proc_region(self):
     ''' Invalid proc_region. '''
     with self.assertRaisesRegexp(TypeError, 'Resource: .*proc_region.*'):
         _ = Resource(
             proc_region=PhyDim2(2, 2),
             data_regions=(NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 0),
                                      type=NodeRegion.DATA),
                           NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 1),
                                      type=NodeRegion.DATA)),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
         )
Exemplo n.º 13
0
 def test_dst_data_region(self):
     ''' Accessor dst_data_region. '''
     nr1 = NodeRegion(dim=PhyDim2(2, 1),
                      origin=PhyDim2(0, 0),
                      type=NodeRegion.DATA)
     nr2 = NodeRegion(dim=PhyDim2(2, 1),
                      origin=PhyDim2(0, 1),
                      type=NodeRegion.DATA)
     resource = Resource(proc_region=NodeRegion(dim=PhyDim2(4, 4),
                                                origin=PhyDim2(0, 0),
                                                type=NodeRegion.PROC),
                         dim_array=PhyDim2(16, 16),
                         size_gbuf=131072,
                         size_regf=512,
                         data_regions=(nr1, nr2))
     self.assertEqual(resource.dst_data_region(), nr2, 'dst_data_region')
     resource = Resource(proc_region=NodeRegion(dim=PhyDim2(4, 4),
                                                origin=PhyDim2(0, 0),
                                                type=NodeRegion.PROC),
                         dim_array=PhyDim2(16, 16),
                         size_gbuf=131072,
                         size_regf=512,
                         data_regions=(nr1, ))
     self.assertEqual(resource.dst_data_region(), nr1, 'dst_data_region')
Exemplo n.º 14
0
 def test_invalid_data_regions_proc(self):
     ''' Invalid data_regions with type PROC. '''
     with self.assertRaisesRegexp(ValueError, 'Resource: .*data_.*type.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=(NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 0),
                                      type=NodeRegion.PROC),
                           NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 1),
                                      type=NodeRegion.DATA)),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
         )
Exemplo n.º 15
0
 def test_invalid_proc_region_dram(self):
     ''' Invalid proc_region with type DRAM. '''
     with self.assertRaisesRegex(ValueError, 'Resource: .*proc_.*type.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.DRAM),
             dram_region=self.dram_region,
             src_data_region=self.src_data_region,
             dst_data_region=self.dst_data_region,
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=512,
             array_bus_width=8,
             dram_bandwidth=128,
             no_time_mux=False,
         )
Exemplo n.º 16
0
 def test_valid_args(self):
     ''' Valid arguments. '''
     resource = Resource(
         proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                origin=PhyDim2(0, 0),
                                type=NodeRegion.PROC),
         data_regions=(NodeRegion(dim=PhyDim2(2, 1),
                                  origin=PhyDim2(0, 0),
                                  type=NodeRegion.DATA),
                       NodeRegion(dim=PhyDim2(2, 1),
                                  origin=PhyDim2(0, 1),
                                  type=NodeRegion.DATA)),
         dim_array=PhyDim2(16, 16),
         size_gbuf=131072,
         size_regf=512,
     )
     self.assertTupleEqual(resource.proc_region.dim, (2, 2), 'proc_region')
     self.assertTupleEqual(resource.dim_array, (16, 16), 'dim_array')
     self.assertEqual(resource.size_gbuf, 131072, 'size_gbuf')
     self.assertEqual(resource.size_regf, 512, 'size_regf')
Exemplo n.º 17
0
    def test_no_valid_dataflow(self):
        ''' No valid dataflow is found. '''

        # Very small REGF.
        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=2,
        )

        nnd = NNDataflow(self.alex_net, 4, self.resource, self.cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)

        self.assertFalse(tops)
    def __init__(self, mlp_network):
        self.net = mlp_network  #MLP_network(18,32,64,32,2)
        self.map_strategy = MapStrategyEyeriss
        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=512 // 2,  # 512 B
        )

        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=0,
                         unit_static=0)

        self.options = Option()
Exemplo n.º 19
0
 def test_valid_args(self):
     ''' Valid arguments. '''
     resource = Resource(
         proc_region=self.proc_region,
         dram_region=self.dram_region,
         src_data_region=self.src_data_region,
         dst_data_region=self.dst_data_region,
         dim_array=PhyDim2(16, 16),
         size_gbuf=131072,
         size_regf=512,
         array_bus_width=8,
         dram_bandwidth=128,
         no_time_mux=False,
     )
     self.assertTupleEqual(resource.proc_region.dim, (2, 2), 'proc_region')
     self.assertTupleEqual(resource.dram_region.dim, (2, 2), 'dram_region')
     self.assertTupleEqual(resource.dim_array, (16, 16), 'dim_array')
     self.assertEqual(resource.size_gbuf, 131072, 'size_gbuf')
     self.assertEqual(resource.size_regf, 512, 'size_regf')
     self.assertEqual(resource.array_bus_width, 8, 'array_bus_width')
     self.assertEqual(resource.dram_bandwidth, 128, 'dram_bandwidth')
     self.assertFalse(resource.no_time_mux, 'no_time_mux')
Exemplo n.º 20
0
    def test_no_valid_dataflow(self):
        ''' No valid dataflow is found. '''

        # Very small REGF.
        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=128 * 1024 // 2,  # 128 kB
            size_regf=2,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        nnd = NNDataflow(self.alex_net, 4, self.resource, self.cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)

        self.assertFalse(tops)

        # With inter-layer pipelining.
        options = Option(hw_gbuf_save_writeback=True,
                         partition_interlayer=True)
        tops, _ = nnd.schedule_search(options)

        self.assertFalse(tops)
    def setUp(self):

        # Workload.
        self.layer = {}
        self.layer['BASE'] = ConvLayer(12, 10, 28, 3)
        self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20)
        self.layer['POOL'] = PoolingLayer(32, 28, 2)
        self.layer['PAR'] = ConvLayer(24, 36, 56, 3)
        self.batch_size = 4

        # Resource.
        self.resource = {}
        dim_array = PhyDim2(16, 16)
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        # Typical resource.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         dram_region=data_region,
                                         src_data_region=data_region,
                                         dst_data_region=data_region,
                                         dim_array=dim_array,
                                         size_gbuf=65536,
                                         size_regf=64,
                                         array_bus_width=float('inf'),
                                         dram_bandwidth=float('inf'),
                                         no_time_mux=False)
        # Larger resource with sufficient capacity, to make all schemes valid.
        self.resource['LG'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=1024**3,
                                       size_regf=1024**3,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Small resource.
        self.resource['SM'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=4096,
                                       size_regf=16,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Multi-node parallel resource.
        self.resource['PAR'] = Resource(proc_region=NodeRegion(
            origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC),
                                        dram_region=data_region,
                                        src_data_region=data_region,
                                        dst_data_region=data_region,
                                        dim_array=dim_array,
                                        size_gbuf=25000,
                                        size_regf=64,
                                        array_bus_width=float('inf'),
                                        dram_bandwidth=float('inf'),
                                        no_time_mux=False)
        # Resource with no data regions.
        proc_data_region = NodeRegion(origin=PhyDim2(1, 1),
                                      dim=PhyDim2(1, 1),
                                      type=NodeRegion.PROC)
        self.resource['SRCNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=proc_data_region,
            dst_data_region=data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DSTNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=data_region,
            dst_data_region=proc_data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DATALOCAL'] = Resource(proc_region=proc_region,
                                              dram_region=data_region,
                                              src_data_region=proc_region,
                                              dst_data_region=proc_region,
                                              dim_array=dim_array,
                                              size_gbuf=1024**3,
                                              size_regf=1024**3,
                                              array_bus_width=float('inf'),
                                              dram_bandwidth=float('inf'),
                                              no_time_mux=False)
        # Filter pinning.
        self.resource['FILPIN'] = Resource(proc_region=proc_region,
                                           dram_region=data_region,
                                           src_data_region=data_region,
                                           dst_data_region=data_region,
                                           dim_array=dim_array,
                                           size_gbuf=1024**3,
                                           size_regf=1024**3,
                                           array_bus_width=float('inf'),
                                           dram_bandwidth=float('inf'),
                                           no_time_mux=True)

        # Nested loop description after mapping.
        self.nld = {}
        self.nld['BASE'] = next(
            MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['LGFIL'] = next(
            MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['POOL'] = next(
            MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        # Fake nested loop, with zero filter size.
        self.nld['ZERO_FIL'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(0, 1000, 800),
            usize_regf=(0, 3, 1),
            unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)
        # Fake nested loop, with zero ifmap size.
        self.nld['ZERO_IFM'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(9, 0, 800),
            usize_regf=(3, 0, 1),
            unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)

        # Fake partition scheme.
        self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM)

        # Fake buffer sharing scheme.
        self.bufshr = BufShrScheme(proc_region, self.part)

        # Options.
        self.options = {}
        # Basic.
        self.options['BASE'] = Option(ntops=2**30)
        # Multiprocessing.
        self.options['MP'] = Option(ntops=2**30, nprocesses=8)
        # Limited top schemes.
        self.options['NTOPS'] = Option(ntops=10)
        # Bypass.
        self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30)
        # Bypass solver.
        self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                        sw_solve_loopblocking=True,
                                        ntops=2**30)
        # Access forwarding.
        self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30)
        # Buffer sharing.
        self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30)
        # Buffer sharing with bypassing.
        self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                            hw_gbuf_sharing=True,
                                            ntops=2**30)

        # Constraint.
        self.none_cstr = SchedulingConstraint()
        self.cstr = SchedulingConstraint(topifm=1, topbat=1)

        # Cost.
        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         idl_unit=50)
Exemplo n.º 22
0
def do_scheduling(args):
    '''
    Get optimal scheduling for given problem. Return a result schedule.
    '''

    ## Network.

    network = import_network(args.net)
    batch_size = args.batch

    ## Resource.

    dim_nodes = PhyDim2(*args.nodes)
    dim_array = PhyDim2(*args.array)

    # Sizes of gbuf and regf are in words.
    word = (args.word + 7) / 8
    size_gbuf = args.gbuf / word
    size_regf = args.regf / word

    array_bus_width = args.bus_width // args.word
    if not array_bus_width:
        array_bus_width = float('inf')
    dram_bandwidth = args.dram_bw / word

    proc_region = NodeRegion(dim=dim_nodes,
                             origin=PhyDim2(0, 0),
                             type=NodeRegion.PROC)

    if args.mem_type == '2D':
        # Memory nodes are on two sides.
        data_region = NodeRegion(dim=PhyDim2(2, 2),
                                 origin=PhyDim2(0, 0),
                                 dist=dim_nodes - PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        assert data_region.rel2abs(PhyDim2(1, 1)) + PhyDim2(1, 1) \
                == proc_region.dim
    elif args.mem_type == '3D':
        # Memory nodes are on the top.
        data_region = NodeRegion(dim=dim_nodes,
                                 origin=PhyDim2(0, 0),
                                 type=NodeRegion.DRAM)

    resource = Resource(proc_region=proc_region,
                        dram_region=data_region,
                        src_data_region=data_region,
                        dst_data_region=data_region,
                        dim_array=dim_array,
                        size_gbuf=size_gbuf,
                        size_regf=size_regf,
                        array_bus_width=array_bus_width,
                        dram_bandwidth=dram_bandwidth,
                        no_time_mux=False)

    ## Cost.

    hier_cost = [0] * me.NUM
    hier_cost[me.DRAM] = args.hier_cost[0]
    hier_cost[me.GBUF] = args.hier_cost[1]
    hier_cost[me.ITCN] = args.hier_cost[2]
    hier_cost[me.REGF] = args.hier_cost[3]
    cost = Cost(mac_op=args.op_cost,
                mem_hier=tuple(hier_cost),
                noc_hop=args.hop_cost,
                idl_unit=args.unit_idle_cost)

    ## Options.

    bypass = [True] * de.NUM
    bypass[de.IFM] = 'i' not in args.disable_bypass
    bypass[de.OFM] = 'o' not in args.disable_bypass
    bypass[de.FIL] = 'f' not in args.disable_bypass
    options = Option(
        sw_gbuf_bypass=tuple(bypass),
        sw_solve_loopblocking=args.solve_loopblocking,
        hw_access_forwarding=args.enable_access_forwarding,
        hw_gbuf_sharing=args.enable_gbuf_sharing,
        hw_gbuf_save_writeback=args.enable_save_writeback,
        partition_hybrid=args.hybrid_partition,
        partition_batch=args.batch_partition,
        partition_ifmaps=args.ifmaps_partition,
        partition_interlayer=args.interlayer_partition,
        layer_pipeline_time_ovhd=args.layer_pipeline_time_overhead,
        layer_pipeline_max_degree=args.layer_pipeline_max_degree,
        layer_pipeline_opt=not args.disable_interlayer_opt,
        opt_goal=args.goal.lower(),
        ntops=args.top,
        nprocesses=args.processes,
        verbose=args.verbose)

    ## Search schedules.

    nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss)
    tbeg = time.time()
    tops, cache_stats = nnd.schedule_search(options)
    tend = time.time()
    telapsed = tend - tbeg

    if not tops:
        sys.stderr.write('No valid dataflow found.\n')
        return None

    top = tops[0]

    ## Write results.

    res_map = OrderedDict()

    res_map['version'] = get_version(with_local=True)

    res_map['net'] = args.net
    res_map['batch'] = args.batch

    res_map['resource'] = resource._asdict()
    res_map['cost'] = cost._asdict()
    res_map['options'] = options._asdict()

    res_map['cache_stats'] = cache_stats
    res_map['elapsed'] = telapsed

    stats = stats_dict(top, cost)
    for key, val in stats.items():
        res_map[key] = val

    return res_map
    def setUp(self):

        # Workload.
        self.layer = {}
        self.layer['BASE'] = ConvLayer(12, 10, 28, 3)
        self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20)
        self.layer['POOL'] = PoolingLayer(32, 28, 2)
        self.batch_size = 4

        # Resource.
        self.resource = {}
        dim_array = PhyDim2(16, 16)
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_regions = (NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DATA), )
        # Typical resource.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         data_regions=data_regions,
                                         dim_array=dim_array,
                                         size_gbuf=65536,
                                         size_regf=64)
        # Larger resource with sufficient capacity, to make all schemes valid.
        self.resource['LG'] = Resource(proc_region=proc_region,
                                       data_regions=data_regions,
                                       dim_array=dim_array,
                                       size_gbuf=1024**3,
                                       size_regf=1024**3)
        # Small resource.
        self.resource['SM'] = Resource(proc_region=proc_region,
                                       data_regions=data_regions,
                                       dim_array=dim_array,
                                       size_gbuf=4096,
                                       size_regf=16)

        # Nested loop description after mapping.
        self.nld = {}
        self.nld['BASE'] = next(
            MapStrategyEyeriss(self.layer['BASE'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        self.nld['LGFIL'] = next(
            MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        self.nld['POOL'] = next(
            MapStrategyEyeriss(self.layer['POOL'], self.batch_size,
                               dim_array).gen_nested_loop_desc())
        # Fake nested loop, with zero filter size.
        self.nld['ZERO_FIL'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(0, 1000, 800),
            usize_regf=(0, 3, 1),
            unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)
        # Fake nested loop, with zero ifmap size.
        self.nld['ZERO_IFM'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(9, 0, 800),
            usize_regf=(3, 0, 1),
            unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)

        # Options.
        self.options = {}
        # Basic.
        self.options['BASE'] = Option(ntops=2**30)
        # Multiprocessing.
        self.options['MP'] = Option(ntops=2**30, nprocesses=8)
        # Limited top schemes.
        self.options['NTOPS'] = Option(ntops=10)
        # Bypass.
        self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30)
        # Bypass solver.
        self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                        sw_solve_loopblocking=True,
                                        ntops=2**30)

        # Cost.
        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         unit_static=50)

        # Partition occupation.
        self.part_occ = 0.91
Exemplo n.º 24
0
    def test_eyeriss_asplos17(self):
        '''
        Reproduce TETRIS ASPLOS'17 paper Figure 8.
        '''
        network = self.alex_net

        batch_size = 16

        ## L-1 configuration.

        resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                                   dim=PhyDim2(1, 1),
                                                   type=NodeRegion.PROC),
                            dram_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            src_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            dst_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            dim_array=PhyDim2(16, 16),
                            size_gbuf=576056 // 2,  # 576 kB
                            size_regf=1024 // 2,  # 1 kB
                            array_bus_width=float('inf'),
                            dram_bandwidth=float('inf'),
                            no_time_mux=False,
                           )

        cost = Cost(mac_op=2e-12,
                    mem_hier=(240e-12, 28e-12, 4e-12, 1e-12),  # pJ/16-b
                    noc_hop=0,
                    idl_unit=320e-12)

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)
        self.assertTrue(tops)
        dfsch_l1 = tops[0]

        ## T-16 configuration.

        resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                                   dim=PhyDim2(4, 4),
                                                   type=NodeRegion.PROC),
                            dram_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(4, 4),
                                type=NodeRegion.DRAM),
                            src_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(4, 4),
                                type=NodeRegion.DRAM),
                            dst_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(4, 4),
                                type=NodeRegion.DRAM),
                            dim_array=PhyDim2(14, 14),
                            size_gbuf=133032 // 2,  # 133 kB
                            size_regf=512 // 2,  # 512 B
                            array_bus_width=float('inf'),
                            dram_bandwidth=float('inf'),
                            no_time_mux=False,
                           )

        cost = Cost(mac_op=2e-12,
                    mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pJ/16-b
                    noc_hop=40e-12,
                    idl_unit=200e-12)

        options = Option(sw_gbuf_bypass=(True, True, True),
                         sw_solve_loopblocking=True,
                         partition_hybrid=True)

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(options)
        self.assertTrue(tops)
        dfsch_t16 = tops[0]

        ## Check results.

        # Same workload.
        self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops)

        # Performance of T-16 is proportional to PE resource (20% margin).
        self.assertLess(dfsch_t16.total_time,
                        1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16))
        # Energy reduced by > 30%.
        # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
        # With dimension restriction on partitioning, this is slightly violated.
        self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72)
Exemplo n.º 25
0
    def test_eyeriss_isscc16(self):
        '''
        Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V.
        '''
        network = self.alex_net

        batch_size = 4

        resource = Resource(proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                                   dim=PhyDim2(1, 1),
                                                   type=NodeRegion.PROC),
                            dram_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            src_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            dst_data_region=NodeRegion(
                                origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM),
                            dim_array=PhyDim2(12, 14),
                            size_gbuf=108 * 1024 // 2,  # 108 kB
                            size_regf=261,  # 225 + 12 + 24
                            array_bus_width=float('inf'),
                            dram_bandwidth=float('inf'),
                            no_time_mux=False,
                           )

        cost = Cost(mac_op=2e-12,
                    mem_hier=(460e-12, 15e-12, 4e-12, 1e-12),  # pJ/16-b
                    noc_hop=0,
                    idl_unit=30e-3 / 200e6)  # 30 mW GBUF + REGF

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)
        self.assertTrue(tops)
        dfsch = tops[0]

        ## Check results.

        # Results as stats of the rows in the table.
        header = 'Power, Processing Latency, Ops, Active PEs, Filter size'
        stats = {}

        for layer in ['conv{}'.format(i) for i in range(1, 6)]:
            onchip_cost = 0
            time = 0
            ops = 0
            fil_size = 0

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch[layer_part]
                onchip_cost += sr.total_cost \
                        - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM]
                time += sr.total_time
                ops += sr.total_ops
                fil_size += network[layer_part].total_filter_size()

            power = onchip_cost / (time / 200e6) * 1e3  # mW
            active_pes = int(ops / time)

            stats[layer] = []
            stats[layer].append(power)
            stats[layer].append(time / 200.e3)  # cycles to ms
            stats[layer].append(ops / 1e6)  # to MOPs
            stats[layer].append(active_pes)
            stats[layer].append(fil_size / 1e3)  # to k

        # Check.
        stats_ref = {'conv1': [332, 16.5, 421.66, 151, 34.8],  # Act PE 154
                     'conv2': [288, 39.2, 895.79, 135, 307.2],
                     'conv3': [266, 21.8, 598.1, 156, 884.7],
                     'conv4': [235, 16.0, 448.6, 156, 663.6],
                     'conv5': [236, 10.0, 299.0, 156, 442.4],
                    }
        for layer in stats:
            success = (0.6 * stats_ref[layer][0]
                       < stats[layer][0]
                       < stats_ref[layer][0]) \
                    and (0.8 * stats_ref[layer][1]
                         < stats[layer][1]
                         < stats_ref[layer][1]) \
                    and all(abs(a - b) < 0.1 for a, b
                            in zip(stats[layer][2:], stats_ref[layer][2:]))
            self.assertTrue(success,
                            'test_eyeriss_isscc16: '
                            'stats diff in layer {}.\n'
                            'header: {}\n'
                            'actual: {}\nref: {}'
                            .format(layer, header, stats[layer],
                                    stats_ref[layer]))
Exemplo n.º 26
0
    def eyerissAsplos17(self):
        '''
        Reproduce TETRIS ASPLOS'17 paper Figure 8.
        '''
        #network = self.alex_net
        network = self.mock_net

        batch_size = 1

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(14, 14),
            size_gbuf=133032 // 2,  # 133 kB
            size_regf=512 // 2,  # 512 B
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
            num_value_pes=256,
        )

        # model values
        print('converting weights')
        q_weight_dict = {}
        weights_dict = read_weights()
        for w_layer in [
                'conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6', 'fc7',
                'fc8'
        ]:
            array = convertToArray(weights_dict, w_layer)
            array_qint8 = quantizeWeights(array, 'qint8')
            q_weight_dict[w_layer] = array_qint8
        #print('''Hey num weights in conv1 are {} '''.format(len(array_qint8)))
        # hardware costs
        mult_cost = readValueMult8Cost()
        #control_cost = readValueControl8Cost()
        print('done converting weights')

        #with open('weights.pickle', 'wb') as f:
        #  pickle.dump(q_weight_dict,f)

        #counter = 0
        #c = 0
        #for m in mult_cost.keys():
        #  c += mult_cost[m]
        #  counter += 1
        #ave = c/counter
        #print('{} '.format(counter))
        #print('average = {}'.format(ave))
        #print('conv3 weights are')
        #for w in q_weight_dict['conv1']:
        #  print(w)
        #exit()

        cost = Cost(
            value_control=1.92e-13,
            value_mult=mult_cost,
            mac_op=2e-12,
            adder_cost=(1.178e-5) / 200000000,
            mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pj/16-b
            noc_hop=40e-12,
            idl_unit=200e-12,
            my_weights=q_weight_dict,
            mem_cycles=(200, 6, 2, 1))

        #cost = cost(value_control=control_cost,
        #            value_mult=mult_cost,
        #            mac_op=2e-12,
        #            mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pj/16-b
        #            noc_hop=40e-12,
        #            idl_unit=200e-12)

        options = Option(sw_gbuf_bypass=(True, True, True),
                         sw_solve_loopblocking=True,
                         partition_hybrid=True)

        #pdb.set_trace()

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(options)
        self.assertTrue(tops)
        dfsch_t16 = tops[0]

        ## Check results.

        # Same workload.
        #self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops)
        print('t16 ops: {}'.format(dfsch_t16.total_ops))

        # Performance of T-16 is proportional to PE resource (20% margin).
        #self.assertLess(dfsch_t16.total_time,
        #                1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16))
        print('t16_time: {}'.format(dfsch_t16.total_time))

        # Energy reduced by > 30%.
        # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
        # With dimension restriction on partitioning, this is slightly violated.
        #self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72)
        print('t16_energy: {}'.format(dfsch_t16.total_cost))
        for i in dfsch_t16:
            print(str(i) + ',')
        ## Check results.

        # Results as cost for each component:
        header = 'ALU, DRAM, Buffer, Array, RF'
        cost_bkdn = {}

        for layer in dfsch_t16:
            layer = str(layer)
            op_cost = 0
            access_cost = [0] * me.NUM

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch_t16[layer_part]
                op_cost += sr.total_ops * cost.mac_op
                access_cost = [
                    ac + a * c for ac, a, c in zip(
                        access_cost, sr.total_accesses, cost.mem_hier)
                ]

            cost_bkdn[layer] = []
            # To 1e9.
            cost_bkdn[layer].append(op_cost * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.DRAM] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.GBUF] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.ITCN] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.REGF] * 1e12 / 1e9)

        for layer in cost_bkdn:
            print(cost_bkdn[layer])
    def setUp(self):

        self.net = {}

        net = Network('net1')
        # Linear.
        net.set_input_layer(InputLayer(10, 1))
        net.add('0', FCLayer(10, 20))
        net.add('1', FCLayer(20, 30))
        net.add('1p', PoolingLayer(30, 1, 1))
        net.add('2', FCLayer(30, 40))
        net.add('3', FCLayer(40, 50))
        self.net[net.net_name] = net

        net = Network('net2')
        # Long linear.
        net.set_input_layer(InputLayer(1, 1))
        for idx in range(16):
            net.add(str(idx), FCLayer(1, 1))
        self.net[net.net_name] = net

        net = Network('net3')
        # Fork.
        # /0-2\   /6- 7- 8\
        #   x  4-5         12
        # \1-3/   \9-10-11/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('2', FCLayer(2, 1), prevs=('0', '1'))
        net.add('2p', PoolingLayer(1, 1, 1))
        net.add('3', FCLayer(2, 1), prevs=('0', '1'))
        net.add('4', FCLayer(2, 1), prevs=('2p', '3'))
        net.add('5', FCLayer(1, 1))
        net.add('5p', PoolingLayer(1, 1, 1))
        net.add('6', FCLayer(1, 1), prevs='5p')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(1, 1), prevs='5p')
        net.add('10', FCLayer(1, 1))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('8', '11'))
        self.net[net.net_name] = net

        net = Network('net4')
        # Complex fork.
        #          /5       \
        # 0-1-2-3-4-6-7-8-10-14
        #              \9/
        #          \11-12   /
        #          \13      /
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1))
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1))
        net.add('4', FCLayer(1, 1))
        net.add('5', FCLayer(1, 1), prevs='4')
        net.add('6', FCLayer(1, 1), prevs='4')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1), prevs='7')
        net.add('9', FCLayer(1, 1), prevs='7')
        net.add('10', FCLayer(1, 1))
        net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10'))
        net.add('11', PoolingLayer(1, 1, 1), prevs='4')
        net.add('12', FCLayer(1, 1))
        net.add('13', PoolingLayer(1, 1, 1), prevs='4')
        net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13'))
        self.net[net.net_name] = net

        net = Network('net5')
        # Corner cases.
        #  ----\
        # //1-2\ 7-8\
        # 0-3-4-x   10-11-12
        #  \ \5/ 9 /  \__/
        #   6--/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(1, 1), prevs='3')
        net.add('5', FCLayer(1, 1), prevs='3')
        net.add('6', FCLayer(1, 1), prevs='0')
        net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('10', FCLayer(2, 1), prevs=('8', '9'))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('10', '11'))
        self.net[net.net_name] = net

        net = Network('net6')
        # Fmap sizes.
        net.set_input_layer(InputLayer(1, 24))
        net.add('0', ConvLayer(1, 1, 24, 3))
        net.add('1', ConvLayer(1, 1, 12, 3, strd=2))
        net.add('1p', PoolingLayer(1, 6, 2))
        net.add('2', ConvLayer(1, 1, 6, 3))
        net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0'))
        self.net[net.net_name] = net

        net = Network('net7')
        # Topological order: see a visited vertex again.
        #  /---
        # 0-1-\\
        #  \2--2p
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='0')
        net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2'))
        self.net[net.net_name] = net

        net = Network('net8')
        # Forward to the middle.
        #    /-\
        # 0-1-2-2p-4-4p
        #  \-3------/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='1')
        net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2'))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(2, 1), prevs='2p')
        net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4'))
        self.net[net.net_name] = net

        net = Network('net9')
        # Previous layers include input and others.
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0'))
        self.net[net.net_name] = net

        # Real networks.
        for net_name in all_networks():
            self.net[net_name] = import_network(net_name)

        self.batch_size = 16

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 4),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
        self.ofmap_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ),
            regions=(NodeRegion(origin=PhyDim2(0, 0),
                                dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM), ),
            parts=(part, ))
Exemplo n.º 28
0
    def eyerissAsplos17(self):
        '''
        Reproduce TETRIS ASPLOS'17 paper Figure 8.
        '''
        network = self.alex_net

        batch_size = 16

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(14, 14),
            size_gbuf=133032 // 2,  # 133 kB
            size_regf=512 // 2,  # 512 B
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pJ/16-b
            noc_hop=40e-12,
            idl_unit=200e-12)

        options = Option(sw_gbuf_bypass=(True, True, True),
                         sw_solve_loopblocking=True,
                         partition_hybrid=True)

        pdb.set_trace()

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(options)
        self.assertTrue(tops)
        dfsch_t16 = tops[0]

        ## Check results.

        # Same workload.
        #self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops)
        print('t16 ops: {}'.format(dfsch_t16.total_ops))

        # Performance of T-16 is proportional to PE resource (20% margin).
        #self.assertLess(dfsch_t16.total_time,
        #                1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16))
        print('t16_time: {}'.format(dfsch_t16.total_time))

        # Energy reduced by > 30%.
        # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
        # With dimension restriction on partitioning, this is slightly violated.
        #self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72)
        print('t16_energy: {}'.format(dfsch_t16.total_cost))
        for i in dfsch_t16:
            print(str(i) + ',')
        ## Check results.

        # Results as cost for each component:
        header = 'ALU, DRAM, Buffer, Array, RF'
        cost_bkdn = {}

        for layer in dfsch_t16:
            layer = str(layer)
            op_cost = 0
            access_cost = [0] * me.NUM

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch_t16[layer_part]
                op_cost += sr.total_ops * cost.mac_op
                access_cost = [
                    ac + a * c for ac, a, c in zip(
                        access_cost, sr.total_accesses, cost.mem_hier)
                ]

            cost_bkdn[layer] = []
            # To 1e9.
            cost_bkdn[layer].append(op_cost * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.DRAM] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.GBUF] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.ITCN] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.REGF] * 1e12 / 1e9)

        for layer in cost_bkdn:
            print(cost_bkdn[layer])
Exemplo n.º 29
0
def do_scheduling(args):
    '''
    Get optimal scheduling for given problem. Return a result schedule.
    '''

    ## Network.

    network = import_network(args.net)
    batch_size = args.batch

    ## Resource.

    dim_nodes = PhyDim2(*args.nodes)
    dim_array = PhyDim2(*args.array)

    # Sizes of gbuf and regf are in words.
    word = (args.word + 7) / 8
    size_gbuf = args.gbuf / word
    size_regf = args.regf / word

    proc_region = NodeRegion(dim=dim_nodes,
                             origin=PhyDim2(0, 0),
                             type=NodeRegion.PROC)

    if args.mem_type == '2D':
        # Memory nodes are on two sides.
        data_regions = (NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1),
                                   origin=PhyDim2(h=0, w=0),
                                   type=NodeRegion.DATA),
                        NodeRegion(dim=PhyDim2(h=dim_nodes.h, w=1),
                                   origin=PhyDim2(h=0, w=dim_nodes.w - 1),
                                   type=NodeRegion.DATA))
    elif args.mem_type == '3D':
        # All nodes have memory.
        data_regions = (NodeRegion(dim=dim_nodes,
                                   origin=PhyDim2(0, 0),
                                   type=NodeRegion.DATA), )

    resource = Resource(proc_region=proc_region,
                        data_regions=data_regions,
                        dim_array=dim_array,
                        size_gbuf=size_gbuf,
                        size_regf=size_regf)

    ## Cost.

    hier_cost = [0] * me.NUM
    hier_cost[me.DRAM] = args.hier_cost[0]
    hier_cost[me.GBUF] = args.hier_cost[1]
    hier_cost[me.ITCN] = args.hier_cost[2]
    hier_cost[me.REGF] = args.hier_cost[3]
    cost = Cost(mac_op=args.op_cost,
                mem_hier=tuple(hier_cost),
                noc_hop=args.hop_cost,
                unit_static=args.unit_static_cost)

    ## Options.

    bypass = [True] * de.NUM
    bypass[de.IFM] = 'i' not in args.disable_bypass
    bypass[de.OFM] = 'o' not in args.disable_bypass
    bypass[de.FIL] = 'f' not in args.disable_bypass
    options = Option(sw_gbuf_bypass=tuple(bypass),
                     sw_solve_loopblocking=args.solve_loopblocking,
                     partition_hybrid=args.hybrid_partition,
                     partition_batch=args.batch_partition,
                     partition_ifmaps=args.ifmaps_partition,
                     ntops=args.top,
                     nprocesses=args.processes,
                     verbose=args.verbose)

    ## Search schedules.

    nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss)
    tops, cache_stats = nnd.schedule_search(options)

    if not tops:
        sys.stderr.write('No valid dataflow found.\n')
        return None

    top = tops[0]

    ## Write results.

    res_map = OrderedDict()

    res_map['version'] = get_version(with_local=True)

    res_map['net'] = args.net
    res_map['batch'] = args.batch

    res_map['resource'] = resource._asdict()
    res_map['cost'] = cost._asdict()
    res_map['options'] = options._asdict()

    res_map['cache_stats'] = cache_stats

    stats = stats_dict(top, cost)
    for key, val in stats.items():
        res_map[key] = val

    return res_map