Пример #1
0
    def test_eyeriss_isscc16(self):
        '''
        Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V.
        '''
        network = self.alex_net

        batch_size = 4

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(1, 1),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(12, 14),
            size_gbuf=108 * 1024 // 2,  # 108 kB
            size_regf=261,  # 225 + 12 + 24
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(460e-12, 15e-12, 4e-12, 1e-12),  # pJ/16-b
            noc_hop=0,
            idl_unit=30e-3 / 200e6)  # 30 mW GBUF + REGF

        # nnd = NNDataflow(network, batch_size, resource, cost,
        #                  self.map_strategy)
        # tops, _ = nnd.schedule_search(self.options)
        # self.assertTrue(tops)
        # dfsch = tops[0]

        # Use tool
        from nn_dataflow.tools import nn_dataflow_search as nnds
        cmd = '--batch 4 --nodes 1 1 --array 12 14 --regf 522 --gbuf 110592 --mem-type 3D --disable-bypass i o f --hop-cost 0 --op-cost 2e-12 --hier-cost 460e-12 15e-12 4e-12 1e-12 --unit-idle-cost 1.5e-10 alex_net'
        args = nnds.argparser().parse_args(cmd.split())
        res = nnds.do_scheduling(args)
        dfsch = res['schedules']

        ## Check results.

        # Results as stats of the rows in the table.
        header = 'Power, Processing Latency, Ops, Active PEs, Filter size'
        stats = {}

        for layer in ['conv{}'.format(i) for i in range(1, 6)]:
            onchip_cost = 0
            time = 0
            ops = 0
            fil_size = 0

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch[layer_part]
                onchip_cost += sr.total_cost \
                        - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM]
                time += sr.total_time
                ops += sr.total_ops
                fil_size += network[layer_part].total_filter_size()

            power = onchip_cost / (time / 200e6) * 1e3  # mW
            active_pes = int(ops / time)

            stats[layer] = []
            stats[layer].append(power)
            stats[layer].append(time / 200.e3)  # cycles to ms
            stats[layer].append(ops / 1e6)  # to MOPs
            stats[layer].append(active_pes)
            stats[layer].append(fil_size / 1e3)  # to k

        # Check.
        stats_ref = {
            'conv1': [332, 16.5, 421.66, 151, 34.8],  # Act PE 154
            'conv2': [288, 39.2, 895.79, 135, 307.2],
            'conv3': [266, 21.8, 598.1, 156, 884.7],
            'conv4': [235, 16.0, 448.6, 156, 663.6],
            'conv5': [236, 10.0, 299.0, 156, 442.4],
        }
        for layer in stats:
            success = (0.6 * stats_ref[layer][0]
                       < stats[layer][0]
                       < stats_ref[layer][0]) \
                    and (0.8 * stats_ref[layer][1]
                         < stats[layer][1]
                         < stats_ref[layer][1]) \
                    and all(abs(a - b) < 0.1 for a, b
                            in zip(stats[layer][2:], stats_ref[layer][2:]))
            self.assertTrue(
                success, 'test_eyeriss_isscc16: '
                'stats diff in layer {}.\n'
                'header: {}\n'
                'actual: {}\nref: {}'.format(layer, header, stats[layer],
                                             stats_ref[layer]))
Пример #2
0
 def test_invalid_origin(self):
     ''' Invalid origin. '''
     with self.assertRaisesRegexp(TypeError, 'NodeRegion: .*origin.*'):
         _ = NodeRegion(dim=PhyDim2(4, 4),
                        origin=(1, 3),
                        type=NodeRegion.PROC)
Пример #3
0
    def setUp(self):
        self.network = Network('test_net')
        self.network.set_input_layer(InputLayer(3, 224))
        self.network.add('c1', ConvLayer(3, 64, 224, 3))
        self.network.add('p1', PoolingLayer(64, 7, 32), prevs='c1')
        self.network.add('p2', PoolingLayer(64, 7, 32), prevs='c1')
        self.network.add('f1', FCLayer(128, 1000, 7), prevs=['p1', 'p2'])

        self.batch_size = 4

        input_layer = self.network.input_layer()
        self.input_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0),
                             FmapPosition(b=self.batch_size,
                                          n=input_layer.nofm,
                                          h=input_layer.hofm,
                                          w=input_layer.wofm)), ),
            regions=(NodeRegion(origin=PhyDim2(0, 0),
                                dim=PhyDim2(2, 1),
                                type=NodeRegion.DRAM), ),
            parts=(PartitionScheme(order=range(pe.NUM),
                                   pdims=[(1, 1)] * pe.NUM), ))

        c1_layer = self.network['c1']
        self.c1res = SchedulingResult(
            scheme=OrderedDict([
                ('cost', 1.5),
                ('time', 200.),
                ('ops', 4.),
                ('num_nodes', 4),
                ('cost_op', 0.5),
                ('cost_access', 1.),
                ('cost_noc', 0),
                ('cost_static', 0),
                ('proc_time', 200),
                ('bus_time', 0),
                ('dram_time', 0),
                ('access', [[7, 8, 9]] * me.NUM),
                ('remote_gbuf_access', [0] * 3),
                ('total_nhops', [4, 5, 6]),
                ('fetch', [[1, 1, 1], [2, 2, 2]]),
                ('ti', [2, 2, 3]),
                ('to', [1, 2, 3]),
                ('tb', [1, 2, 3]),
                ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]),
                ('orders', [range(3)] * 2),
            ]),
            ofmap_layout=DataLayout(
                frngs=(FmapRange(
                    (0, 0, 0, 0),
                    FmapPosition(b=self.batch_size,
                                 n=c1_layer.nofm,
                                 h=c1_layer.hofm,
                                 w=c1_layer.wofm)), ),
                regions=(NodeRegion(origin=PhyDim2(0, 0),
                                    dim=PhyDim2(1, 2),
                                    type=NodeRegion.DRAM), ),
                parts=(PartitionScheme(order=range(pe.NUM),
                                       pdims=[(1, 1)] * pe.NUM), )),
            sched_seq=(0, 0, 0))

        p1_layer = self.network['p1']
        self.p1res = SchedulingResult(
            scheme=OrderedDict([
                ('cost', 0.6),
                ('time', 5),
                ('ops', 0.1),
                ('num_nodes', 2),
                ('cost_op', 0.1),
                ('cost_access', 0.5),
                ('cost_noc', 0),
                ('cost_static', 0),
                ('proc_time', 5),
                ('bus_time', 0),
                ('dram_time', 0),
                ('access', [[.7, .8, .9]] * me.NUM),
                ('remote_gbuf_access', [0] * 3),
                ('total_nhops', [.4, .5, .6]),
                ('fetch', [[1, 1, 1], [2, 2, 2]]),
                ('ti', [2, 2, 3]),
                ('to', [1, 2, 3]),
                ('tb', [1, 2, 3]),
                ('tvals', [[2, 1, 1], [2, 2, 2], [3, 3, 3]]),
                ('orders', [range(3)] * 2),
            ]),
            ofmap_layout=DataLayout(
                frngs=(FmapRange(
                    (0, 0, 0, 0),
                    FmapPosition(b=self.batch_size,
                                 n=p1_layer.nofm,
                                 h=p1_layer.hofm,
                                 w=p1_layer.wofm)), ),
                regions=(NodeRegion(origin=PhyDim2(0, 0),
                                    dim=PhyDim2(1, 2),
                                    type=NodeRegion.DRAM), ),
                parts=(PartitionScheme(order=range(pe.NUM),
                                       pdims=[(1, 1)] * pe.NUM), )),
            sched_seq=(0, 1, 0))

        self.p2res = SchedulingResult(scheme=self.p1res.scheme,
                                      ofmap_layout=self.p1res.ofmap_layout,
                                      sched_seq=(0, 2, 0))

        self.dtfl = NNDataflowScheme(self.network, self.input_layout)
        self.dtfl['c1'] = self.c1res
        self.dtfl['p1'] = self.p1res
        self.dtfl['p2'] = self.p2res
    def test_use_fwd(self):
        ''' Use access forwarding. '''
        layer = self.layers['BASE']

        part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP),
                               pdims=((2, 1), (2, 4), (1, 2), (2, 1)))

        nr = NodeRegion(origin=PhyDim2(0, 0),
                        dim=part.dim(),
                        type=NodeRegion.PROC)

        far_dist = 1000

        ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm,
                                         PhyDim2(-far_dist, 0), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm,
                                         PhyDim2(0, -far_dist), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        filter_nodes = frozenset([PhyDim2(far_dist, 0), PhyDim2(0, far_dist)])

        nhops_base = partition.unit_nhops_to_proc_region(
            layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout,
            self.options['BASE'])
        nhops_accfwd = partition.unit_nhops_to_proc_region(
            layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout,
            self.options['ACCFWD'])
        nhops_bufshr = partition.unit_nhops_to_proc_region(
            layer, self.batch_size, nr, part, filter_nodes, ilayout, olayout,
            self.options['BUFSHR'])

        for dce in range(de.NUM):
            self.assertEqual(nhops_accfwd[dce], nhops_bufshr[dce])

        # In the basic access scheme, FIL and IFM are independently fetched,
        # resulting in repeating remote fetch. OFM are merged locally and only
        # stored back remotely once.
        self.assertGreater(
            nhops_base[de.FIL],
            layer.total_filter_size() * far_dist * part.size(pe.BATP) *
            part.size(pe.OFMP) * 0.8)
        self.assertGreater(
            nhops_base[de.IFM],
            layer.total_ifmap_size(self.batch_size) * far_dist *
            part.size(pe.OUTP) * 0.8)

        p_layer, p_batch_size, _ = part.part_layer(layer, self.batch_size)
        # With forwarding, everyone is only remotely fetched once.
        self.assertLess(
            nhops_accfwd[de.FIL],
            p_layer.total_filter_size() * part.size(pe.INPP, pe.OUTP) *
            (far_dist + nr.dim.size()))
        self.assertLess(
            nhops_accfwd[de.IFM],
            p_layer.total_ifmap_size(p_batch_size) *
            part.size(pe.INPP, pe.OFMP, pe.BATP) * (far_dist + nr.dim.size()))
        self.assertLess(
            nhops_accfwd[de.OFM],
            p_layer.total_ofmap_size(p_batch_size) *
            part.size(pe.OUTP, pe.OFMP, pe.BATP) * (far_dist + nr.dim.size()))
Пример #5
0
    def test_dim(self):
        ''' Get dim. '''
        self.assertEqual(self.ps1.dim(0), PhyDim2(2, 3))
        self.assertEqual(self.ps1.dim(1), PhyDim2(3, 1))
        self.assertEqual(self.ps1.dim(2), PhyDim2(1, 5))
        self.assertEqual(self.ps1.dim(3), PhyDim2(5, 2))

        self.assertEqual(self.ps2.dim(0), PhyDim2(2, 2))
        self.assertEqual(self.ps2.dim(1), PhyDim2(5, 5))
        self.assertEqual(self.ps2.dim(2), PhyDim2(3, 3))
        self.assertEqual(self.ps2.dim(3), PhyDim2(1, 1))

        self.assertEqual(self.ps1.dim(0, 1, 2),
                         PhyDim2(2, 3) * PhyDim2(3, 1) * PhyDim2(1, 5))
        self.assertEqual(
            self.ps1.dim(),
            PhyDim2(2, 3) * PhyDim2(3, 1) * PhyDim2(1, 5) * PhyDim2(5, 2))

        self.assertEqual(self.ps1.dim(0, 1, 2), self.ps1.dim(1, 2, 0))
Пример #6
0
 def test_hop_dist(self):
     ''' Get hop distance. '''
     dim1 = PhyDim2(14, 12)
     dim2 = PhyDim2(5, 20)
     self.assertEqual(dim1.hop_dist(dim2), 9 + 8, 'hop_dist')
     self.assertEqual(dim2.hop_dist(dim1), 9 + 8, 'hop_dist')
    def test_origin(self):
        ''' Origin. '''
        layer = self.layers['BASE']

        part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP),
                               pdims=((1, 1), (1, 1), (1, 1), (1, 1)))

        nr = NodeRegion(origin=PhyDim2(3, 3),
                        dim=part.dim(),
                        type=NodeRegion.PROC)

        ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm,
                                         PhyDim2(-3, -3), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm,
                                         PhyDim2(3, 3), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        filter_nodes = frozenset([PhyDim2(3, -3)])

        nhops_1 = partition.unit_nhops_to_proc_region(layer, self.batch_size,
                                                      nr, part, filter_nodes,
                                                      ilayout, olayout,
                                                      self.options['BASE'])

        nr = NodeRegion(origin=PhyDim2(6, 6),
                        dim=part.dim(),
                        type=NodeRegion.PROC)

        ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm,
                                         PhyDim2(-6, -6), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm,
                                         PhyDim2(6, 6), (1, 1), (1, 1),
                                         PhyDim2(1, 1))

        filter_nodes = frozenset([PhyDim2(6, -6)])

        nhops_2 = partition.unit_nhops_to_proc_region(layer, self.batch_size,
                                                      nr, part, filter_nodes,
                                                      ilayout, olayout,
                                                      self.options['BASE'])

        self.assertListEqual(nhops_2, [n * 2 for n in nhops_1])
Пример #8
0
    def test_merge(self):
        ''' Merge. '''
        fr = FmapRange((0, ) * 4, (30, ) * 4)

        frm = FmapRangeMap()
        frm.add(FmapRange((0, 0, 0, 0), (4, 1, 16, 16)),
                (PhyDim2(0, 0), PhyDim2(1, 1)))
        frm.add(FmapRange((0, 1, 0, 0), (4, 3, 16, 16)),
                (PhyDim2(1, 0), PhyDim2(2, 2)))
        dly = DataLayout(origin=PhyDim2(-1, -1),
                         frmap=frm,
                         type=NodeRegion.DATA)

        mdly1 = self.dly.merge('|', dly)
        mdly2 = dly.merge('|', self.dly)
        self.assertEqual(mdly1.type, self.dly.type, 'merge |: type')
        self.assertEqual(mdly2.type, dly.type, 'merge |: type')
        self.assertEqual(mdly1.frmap.complete_fmap_range(),
                         mdly2.frmap.complete_fmap_range(),
                         'merge |: complete_fmap_range')
        self.assertEqual(
            mdly1.frmap.complete_fmap_range().size(),
            self.frm.complete_fmap_range().size() +
            frm.complete_fmap_range().size(),
            'merge |: complete_fmap_range: size')
        self.assertEqual(mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)),
                         mdly2.total_transfer_nhops(fr, PhyDim2(0, 0)),
                         'merge |: nhops')
        self.assertEqual(
            mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)),
            self.dly.total_transfer_nhops(fr, PhyDim2(0, 0)) +
            dly.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge |: nhops')

        frm.add(FmapRange((0, 3, 0, 0), (4, 4, 16, 16)),
                (PhyDim2(1, 3), PhyDim2(2, 1), PhyDim2(-1, -2)))
        # Only type differs from self.dly.
        sdly = DataLayout(origin=self.dly.origin,
                          frmap=self.dly.frmap,
                          type=NodeRegion.PROC)
        dly = DataLayout(origin=PhyDim2(-1, -1),
                         frmap=frm,
                         type=NodeRegion.PROC)
        mdly1 = sdly.merge('+', dly)
        mdly2 = dly.merge('+', sdly)
        self.assertEqual(mdly1.type, sdly.type, 'merge +: type')
        self.assertEqual(mdly2.type, dly.type, 'merge +: type')
        self.assertEqual(mdly1.frmap.complete_fmap_range(),
                         mdly2.frmap.complete_fmap_range(),
                         'merge +: complete_fmap_range')
        self.assertEqual(mdly1.frmap.complete_fmap_range().size(),
                         self.frm.complete_fmap_range().size(),
                         'merge +: complete_fmap_range: size')
        self.assertEqual(mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)),
                         mdly2.total_transfer_nhops(fr, PhyDim2(0, 0)),
                         'merge +: nhops')
        self.assertEqual(
            mdly1.total_transfer_nhops(fr, PhyDim2(0, 0)),
            sdly.total_transfer_nhops(fr, PhyDim2(0, 0)) +
            dly.total_transfer_nhops(fr, PhyDim2(0, 0)), 'merge +: nhops')
Пример #9
0
 def test_merge_invalid_type(self):
     ''' Merge invalid. '''
     with self.assertRaisesRegexp(TypeError, 'DataLayout: .*other.*'):
         _ = self.dly.merge('|', self.frm)
     with self.assertRaisesRegexp(TypeError, 'DataLayout: .*other.*'):
         _ = self.dly.merge('+', PhyDim2(1, 3))
Пример #10
0
def do_scheduling(args):
    '''
    Get optimal scheduling for given problem. Return a result schedule.
    '''

    ## Network.

    network = import_network(args.net)
    batch_size = args.batch

    ## Resource.

    dim_nodes = PhyDim2(*args.nodes)
    dim_array = PhyDim2(*args.array)

    # Sizes of gbuf and regf are in words.
    word = (args.word + 7) // 8
    size_gbuf = args.gbuf // word
    size_regf = args.regf // word

    array_bus_width = args.bus_width // args.word
    if not array_bus_width:
        array_bus_width = float('inf')
    dram_bandwidth = args.dram_bw / word

    proc_region = NodeRegion(dim=dim_nodes,
                             origin=PhyDim2(0, 0),
                             type=NodeRegion.PROC)

    if args.mem_type == '2D':
        # Memory nodes are on two sides.
        data_region = NodeRegion(dim=PhyDim2(2, 2),
                                 origin=PhyDim2(0, 0),
                                 dist=dim_nodes - PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        assert data_region.rel2abs(PhyDim2(1, 1)) + PhyDim2(1, 1) \
                == proc_region.dim
    elif args.mem_type == '3D':
        # Memory nodes are on the top.
        data_region = NodeRegion(dim=dim_nodes,
                                 origin=PhyDim2(0, 0),
                                 type=NodeRegion.DRAM)

    resource = Resource(proc_region=proc_region,
                        dram_region=data_region,
                        src_data_region=data_region,
                        dst_data_region=data_region,
                        dim_array=dim_array,
                        size_gbuf=size_gbuf,
                        size_regf=size_regf,
                        array_bus_width=array_bus_width,
                        dram_bandwidth=dram_bandwidth,
                        no_time_mux=False)

    ## Cost.

    hier_cost = [0] * me.NUM
    hier_cost[me.DRAM] = args.hier_cost[0]
    hier_cost[me.GBUF] = args.hier_cost[1]
    hier_cost[me.ITCN] = args.hier_cost[2]
    hier_cost[me.REGF] = args.hier_cost[3]
    cost = Cost(mac_op=args.op_cost,
                mem_hier=tuple(hier_cost),
                noc_hop=args.hop_cost,
                idl_unit=args.unit_idle_cost)

    ## Options.

    bypass = [True] * de.NUM
    bypass[de.IFM] = 'i' not in args.disable_bypass
    bypass[de.OFM] = 'o' not in args.disable_bypass
    bypass[de.FIL] = 'f' not in args.disable_bypass
    options = Option(
        sw_gbuf_bypass=tuple(bypass),
        sw_solve_loopblocking=args.solve_loopblocking,
        hw_access_forwarding=args.enable_access_forwarding,
        hw_gbuf_sharing=args.enable_gbuf_sharing,
        hw_gbuf_save_writeback=args.enable_save_writeback,
        partition_hybrid=args.hybrid_partition,
        partition_batch=args.batch_partition,
        partition_ifmaps=args.ifmaps_partition,
        partition_interlayer=args.interlayer_partition,
        layer_pipeline_time_ovhd=args.layer_pipeline_time_overhead,
        layer_pipeline_max_degree=args.layer_pipeline_max_degree,
        layer_pipeline_opt=not args.disable_interlayer_opt,
        opt_goal=args.goal.lower(),
        ntops=args.top,
        nprocesses=args.processes,
        verbose=args.verbose)

    ## Search schedules.

    nnd = NNDataflow(network, batch_size, resource, cost, MapStrategyEyeriss)
    tbeg = time.time()
    tops, cache_stats = nnd.schedule_search(options)
    tend = time.time()
    telapsed = tend - tbeg

    if not tops:
        sys.stderr.write('No valid dataflow found.\n')
        return None

    top = tops[0]

    ## Write results.

    res_map = OrderedDict()

    res_map['version'] = get_version(with_local=True)

    res_map['net'] = args.net
    res_map['batch'] = args.batch

    res_map['resource'] = resource._asdict()
    res_map['cost'] = cost._asdict()
    res_map['options'] = options._asdict()

    res_map['cache_stats'] = cache_stats
    res_map['elapsed'] = telapsed

    stats = stats_dict(top, cost)
    for key, val in stats.items():
        res_map[key] = val

    return res_map
Пример #11
0
    def test_view(self):
        ''' Get view. '''
        frm = self.frm.copy()
        frm.add(FmapRange((0, 0, 0, 16), (4, 4, 16, 20)),
                (PhyDim2(2, 2), PhyDim2(3, 3)))
        frm.add(FmapRange((0, 0, 16, 0), (4, 4, 20, 20)),
                (PhyDim2(1, 1), PhyDim2(3, 3), PhyDim2(5, 5)))
        dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA)

        cfr = dly.frmap.complete_fmap_range()
        counters = dly.frmap.rget_counter(cfr)
        nhops = dly.total_transfer_nhops(cfr, PhyDim2(1, 2))

        dly1 = dly.view(PhyDim2(-1, -1))
        self.assertEqual(dly1.origin, PhyDim2(0, 0), 'view: origin')
        self.assertEqual(dly1.type, dly.type, 'view: type')
        self.assertEqual(dly1.frmap.complete_fmap_range(), cfr,
                         'view: complete_fmap_range')
        self.assertDictEqual(dly1.frmap.rget_counter(cfr), counters,
                             'view: counter')
        self.assertEqual(
            dly1.total_transfer_nhops(cfr,
                                      PhyDim2(1, 2) + PhyDim2(-1, -1)), nhops,
            'view: nhops')

        dly2 = dly.view(PhyDim2(3, 1))
        self.assertEqual(dly2.type, dly.type, 'view: type')
        self.assertEqual(dly2.frmap.complete_fmap_range(), cfr,
                         'view: complete_fmap_range')
        self.assertDictEqual(dly2.frmap.rget_counter(cfr), counters,
                             'view: counter')
        self.assertEqual(
            dly2.total_transfer_nhops(cfr,
                                      PhyDim2(1, 2) + PhyDim2(3, 1)), nhops,
            'view: nhops')
Пример #12
0
 def test_invalid_data_regions_type(self):
     ''' Invalid data_regions type. '''
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=[
                 NodeRegion(dim=PhyDim2(2, 1),
                            origin=PhyDim2(0, 0),
                            type=NodeRegion.DATA),
             ],
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=NodeRegion(dim=PhyDim2(2, 1),
                                     origin=PhyDim2(0, 0),
                                     type=NodeRegion.DATA),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
     with self.assertRaisesRegexp(TypeError, 'Resource: .*data_regions.*'):
         _ = Resource(
             proc_region=NodeRegion(dim=PhyDim2(2, 2),
                                    origin=PhyDim2(0, 0),
                                    type=NodeRegion.PROC),
             data_regions=(NodeRegion(dim=PhyDim2(2, 1),
                                      origin=PhyDim2(0, 0),
                                      type=NodeRegion.DATA), PhyDim2(2, 1)),
             dim_array=PhyDim2(16, 16),
             size_gbuf=131072,
             size_regf=(512, ),
         )
Пример #13
0
    def test_eyeriss_asplos17(self):
        '''
        Reproduce TETRIS ASPLOS'17 paper Figure 8.
        '''
        network = self.alex_net

        batch_size = 16

        ## L-1 configuration.

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(16, 16),
            size_gbuf=576056 // 2,  # 576 kB
            size_regf=1024 // 2,  # 1 kB
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(240e-12, 28e-12, 4e-12, 1e-12),  # pJ/16-b
            noc_hop=0,
            unit_static=320e-12)

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)
        self.assertTrue(tops)
        dfsch_l1 = tops[0]

        ## T-16 configuration.

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(4, 4),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(14, 14),
            size_gbuf=133032 // 2,  # 133 kB
            size_regf=512 // 2,  # 512 B
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pJ/16-b
            noc_hop=40e-12,
            unit_static=200e-12)

        options = Option(sw_gbuf_bypass=(True, True, True),
                         sw_solve_loopblocking=True,
                         partition_hybrid=True)

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(options)
        self.assertTrue(tops)
        dfsch_t16 = tops[0]

        ## Check results.

        # Same workload.
        self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops)

        # Performance of T-16 is proportional to PE resource (20% margin).
        self.assertLess(dfsch_t16.total_time,
                        1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16))
        # Energy reduced by > 30%.
        self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
Пример #14
0
    def test_eyeriss_isscc16(self):
        '''
        Reproduce Eyeriss ISSCC'16 paper Fig. 14.5.6, JSSC'17 paper Table V.
        '''
        network = self.alex_net

        batch_size = 4

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(1, 1),
                                   type=NodeRegion.PROC),
            data_regions=(NodeRegion(origin=PhyDim2(0, 0),
                                     dim=PhyDim2(1, 1),
                                     type=NodeRegion.DATA), ),
            dim_array=PhyDim2(12, 14),
            size_gbuf=108 * 1024 // 2,  # 108 kB
            size_regf=261,  # 225 + 12 + 24
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(460e-12, 15e-12, 4e-12, 1e-12),  # pJ/16-b
            noc_hop=0,
            unit_static=30e-3 / 200e6)  # 30 mW GBUF + REGF

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(self.options)
        self.assertTrue(tops)
        dfsch = tops[0]

        ## Check results.

        # Results as stats of the rows in the table.
        header = 'Power, Processing Latency, Ops, Active PEs, Filter size'
        stats = {}

        for layer in ['conv{}'.format(i) for i in range(1, 6)]:
            onchip_cost = 0
            time = 0
            ops = 0
            fil_size = 0

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch[layer_part]
                onchip_cost += sr.total_cost \
                        - sr.total_accesses[me.DRAM] * cost.mem_hier[me.DRAM]
                time += sr.total_time
                ops += sr.total_ops
                fil_size += network[layer_part].total_filter_size()

            power = onchip_cost / (time / 200e6) * 1e3  # mW
            active_pes = int(ops / time)

            stats[layer] = []
            stats[layer].append(power)
            stats[layer].append(time / 200.e3)  # cycles to ms
            stats[layer].append(ops / 1e6)  # to MOPs
            stats[layer].append(active_pes)
            stats[layer].append(fil_size / 1e3)  # to k

        # Check.
        stats_ref = {
            'conv1': [332, 16.5, 421.66, 151, 34.8],  # Act PE 154
            'conv2': [288, 39.2, 895.79, 135, 307.2],
            'conv3': [266, 21.8, 598.1, 156, 884.7],
            'conv4': [235, 16.0, 448.6, 156, 663.6],
            'conv5': [236, 10.0, 299.0, 156, 442.4],
        }
        for layer in stats:
            success = (0.6 * stats_ref[layer][0]
                       < stats[layer][0]
                       < stats_ref[layer][0]) \
                    and (0.8 * stats_ref[layer][1]
                         < stats[layer][1]
                         < stats_ref[layer][1]) \
                    and all(abs(a - b) < 0.1 for a, b
                            in zip(stats[layer][2:], stats_ref[layer][2:]))
            self.assertTrue(
                success, 'test_eyeriss_isscc16: '
                'stats diff in layer {}.\n'
                'header: {}\n'
                'actual: {}\nref: {}'.format(layer, header, stats[layer],
                                             stats_ref[layer]))
Пример #15
0
 def test_sub(self):
     ''' Operation sub. '''
     dim1 = PhyDim2(14, 12)
     dim2 = PhyDim2(5, 3)
     self.assertTupleEqual(dim1 - dim2, (9, 9), 'sub')
     self.assertTupleEqual(dim1 - 3, (11, 9), 'sub')
Пример #16
0
 def test_invalid_frmap_type(self):
     ''' Invalid frmap type. '''
     with self.assertRaisesRegexp(TypeError, 'DataLayout: .*frmap.*'):
         _ = DataLayout(origin=PhyDim2(2, 3),
                        frmap=FmapRange((0, ) * 4, (1, ) * 4),
                        type=NodeRegion.DATA)
Пример #17
0
 def test_neg(self):
     ''' Operation neg. '''
     dim1 = PhyDim2(14, 12)
     dim2 = PhyDim2(5, 3)
     self.assertTupleEqual(-dim1, (-14, -12), 'neg')
     self.assertTupleEqual(-dim2, (-5, -3), 'neg')
Пример #18
0
 def test_invalid_type(self):
     ''' Invalid type. '''
     with self.assertRaisesRegexp(ValueError, 'DataLayout: .*type.*'):
         _ = DataLayout(origin=PhyDim2(2, 3),
                        frmap=self.frm,
                        type=NodeRegion.NUM)
Пример #19
0
 def test_hop_dist_error(self):
     ''' Get hop distance. '''
     dim1 = PhyDim2(14, 12)
     with self.assertRaisesRegexp(TypeError, 'hop_dist'):
         _ = dim1.hop_dist((5, 20))
Пример #20
0
    def test_is_in_region(self):
        ''' Whether is in region. '''
        nr1 = NodeRegion(dim=PhyDim2(2, 2),
                         origin=PhyDim2(1, 1),
                         type=NodeRegion.DATA)
        self.assertTrue(self.dly.is_in_region(nr1))
        nr2 = NodeRegion(dim=PhyDim2(3, 3),
                         origin=PhyDim2(0, 0),
                         type=NodeRegion.DATA)
        self.assertTrue(self.dly.is_in_region(nr2))
        nr3 = NodeRegion(dim=PhyDim2(2, 2),
                         origin=PhyDim2(0, 0),
                         type=NodeRegion.DATA)
        self.assertFalse(self.dly.is_in_region(nr3))
        nr4 = NodeRegion(dim=PhyDim2(3, 3),
                         origin=PhyDim2(0, 0),
                         type=NodeRegion.PROC)
        self.assertFalse(self.dly.is_in_region(nr4))

        frm = self.frm.copy()
        frm.add(FmapRange((0, 0, 0, 16), (4, 4, 16, 20)),
                (PhyDim2(2, 2), PhyDim2(3, 3)))
        dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA)

        nr4 = NodeRegion(dim=PhyDim2(3, 3),
                         origin=PhyDim2(1, 1),
                         type=NodeRegion.DATA)
        self.assertFalse(dly.is_in_region(nr4))
        nr5 = NodeRegion(dim=PhyDim2(4, 4),
                         origin=PhyDim2(1, 1),
                         type=NodeRegion.DATA)
        self.assertTrue(dly.is_in_region(nr5))

        frm.add(FmapRange((0, 0, 16, 0), (4, 4, 20, 20)),
                (PhyDim2(1, 1), PhyDim2(3, 3), PhyDim2(5, 5)))
        dly = DataLayout(origin=PhyDim2(1, 1), frmap=frm, type=NodeRegion.DATA)

        self.assertFalse(dly.is_in_region(nr5))
        nr6 = NodeRegion(dim=PhyDim2(7, 7),
                         origin=PhyDim2(0, 0),
                         type=NodeRegion.DATA)
        self.assertTrue(dly.is_in_region(nr6))
    def test_small(self):
        ''' Small case with hand calculation. '''
        layer = ConvLayer(6, 8, 16, 3)
        assert self.batch_size == 8

        # i (0, 0), (2, 0): (0, 0, 0, 0) -- (4, 6, 10, 10)
        #   (0, 1), (2, 1): (0, 0, 0, 8) -- (4, 6, 10, 18)
        #   (0, 2), (2, 2): (4, 0, 0, 0) -- (8, 6, 10, 10)
        #   (0, 3), (2, 3): (4, 0, 0, 8) -- (8, 6, 10, 18)
        #   (1, 0), (3, 0): (0, 0, 8, 0) -- (4, 6, 18, 10)
        #   (1, 1), (3, 1): (0, 0, 8, 8) -- (4, 6, 18, 18)
        #   (1, 2), (3, 2): (4, 0, 8, 0) -- (8, 6, 18, 10)
        #   (1, 3), (3, 3): (4, 0, 8, 8) -- (8, 6, 18, 18)
        # o (0, 0): (0, 0, 0, 0) -- (4, 4, 8, 8)
        #   (0, 1): (0, 0, 0, 8) -- (4, 4, 8, 16)
        #   (0, 2): (4, 0, 0, 0) -- (8, 4, 8, 8)
        #   (0, 3): (4, 0, 0, 8) -- (8, 4, 8, 16)
        #   (1, 0): (0, 0, 8, 0) -- (4, 4, 16, 8)
        #   (1, 1): (0, 0, 8, 8) -- (4, 4, 16, 16)
        #   (1, 2): (4, 0, 8, 0) -- (8, 4, 16, 8)
        #   (1, 3): (4, 0, 8, 8) -- (8, 4, 16, 16)
        #   (2, 0): (0, 4, 0, 0) -- (4, 8, 8, 8)
        #   (2, 1): (0, 4, 0, 8) -- (4, 8, 8, 16)
        #   (2, 2): (4, 4, 0, 0) -- (8, 8, 8, 8)
        #   (2, 3): (4, 4, 0, 8) -- (8, 8, 8, 16)
        #   (3, 0): (0, 4, 8, 0) -- (4, 8, 16, 8)
        #   (3, 1): (0, 4, 8, 8) -- (4, 8, 16, 16)
        #   (3, 2): (4, 4, 8, 0) -- (8, 8, 16, 8)
        #   (3, 3): (4, 4, 8, 8) -- (8, 8, 16, 16)
        part = PartitionScheme(order=(pe.BATP, pe.INPP, pe.OUTP, pe.OFMP),
                               pdims=((2, 1), (2, 2), (1, 2), (1, 1)))

        nr = NodeRegion(origin=PhyDim2(0, 0),
                        dim=part.dim(),
                        type=NodeRegion.PROC)

        # (0, 0, 0, 0) -- (4, 6, 18, 9): (-2, -2)
        # (0, 0, 0, 9) -- (4, 6, 18, 18): (-2, -1)
        # (4, 0, 0, 0) -- (8, 6, 18, 9): (-1, -2)
        # (4, 0, 0, 9) -- (8, 6, 18, 18): (-1, -1)
        ilayout = self._make_data_layout(layer.nifm, layer.hifm, layer.wifm,
                                         PhyDim2(-2, -2), (2, 1), (1, 1),
                                         PhyDim2(2, 2))

        # (0, 0, 0, 0) -- (8, 4, 16, 8): (2, 2)
        # (0, 0, 0, 8) -- (8, 4, 16, 16): (2, 3)
        # (0, 4, 0, 0) -- (8, 8, 16, 8): (3, 2)
        # (0, 4, 0, 8) -- (8, 8, 16, 16): (3, 3)
        olayout = self._make_data_layout(layer.nofm, layer.hofm, layer.wofm,
                                         PhyDim2(2, 2), (1, 1), (2, 1),
                                         PhyDim2(2, 2))

        filter_nodes = frozenset([PhyDim2(0, 0)])

        # filter: (0, 0) -> all, 6 * 4 * 3 * 3

        # ifmap: (-2, -2) -> (0, 0), (2, 0): 4 * 6 * 10 * 9
        #                 -> (0, 1), (2, 1): 4 * 6 * 10 * (9 - 8)
        #                 -> (1, 0), (3, 0): 4 * 6 * (18 - 8) * 9
        #                 -> (1, 1), (3, 1): 4 * 6 * (18 - 8) * (9 - 8)
        #        (-2, -1) -> (0, 0), (2, 0): 4 * 6 * 10 * (10 - 9)
        #                 -> (0, 1), (2, 1): 4 * 6 * 10 * (18 - 9)
        #                 -> (1, 0), (3, 0): 4 * 6 * (18 - 8) * (10 - 9)
        #                 -> (1, 1), (3, 1): 4 * 6 * (18 - 8) * (18 - 9)
        #        (-1, -2) -> (0, 2), (2, 2): (8 - 4) * 6 * 10 * 9
        #                 -> (0, 3), (2, 3): (8 - 4) * 6 * 10 * (9 - 8)
        #                 -> (1, 2), (3, 2): (8 - 4) * 6 * (18 - 8) * 9
        #                 -> (1, 3), (3, 3): (8 - 4) * 6 * (18 - 8) * (9 - 8)
        #        (-1, -1) -> (0, 2), (2, 2): (8 - 4) * 6 * 10 * (10 - 9)
        #                 -> (0, 3), (2, 3): (8 - 4) * 6 * 10 * (18 - 9)
        #                 -> (1, 2), (3, 2): (8 - 4) * 6 * (18 - 8) * (10 - 9)
        #                 -> (1, 3), (3, 3): (8 - 4) * 6 * (18 - 8) * (18 - 9)

        # ofmap: (2, 2) -> (0, 0):
        #               -> (0, 2):
        #               -> (1, 0):
        #               -> (1, 2): 4 * 4 * 8 * 8
        #        (2, 3) -> (0/1, 1/3)
        #        (3, 2) -> (2/3, 0/2)
        #        (3, 3) -> (2/3, 1/3)

        nhops = partition.unit_nhops_to_proc_region(layer, self.batch_size, nr,
                                                    part, filter_nodes,
                                                    ilayout, olayout,
                                                    self.options['BASE'])

        self.assertEqual(
            nhops[de.FIL],
            6 * 4 * 3 * 3 * sum(h + w for h in range(4) for w in range(4)))
        self.assertEqual(
            nhops[de.IFM], 4 * 6 * 10 *
            ((4 + 6) * 9 + (5 + 7) * 1 + (5 + 7) * 9 + (6 + 8) * 1 +
             (3 + 5) * 1 + (4 + 6) * 9 + (4 + 6) * 1 + (5 + 7) * 9 +
             (5 + 7) * 9 + (6 + 8) * 1 + (6 + 8) * 9 + (7 + 9) * 1 +
             (4 + 6) * 1 + (5 + 7) * 9 + (5 + 7) * 1 + (6 + 8) * 9))
        self.assertEqual(
            nhops[de.OFM], 4 * 4 * 8 * 8 * ((4 + 2 + 3 + 1) + (4 + 2 + 3 + 1) +
                                            (3 + 1 + 2 + 0) + (3 + 1 + 2 + 0)))
    def setUp(self):

        self.net = {}

        net = Network('net1')
        # Linear.
        net.set_input_layer(InputLayer(10, 1))
        net.add('0', FCLayer(10, 20))
        net.add('1', FCLayer(20, 30))
        net.add('1p', PoolingLayer(30, 1, 1))
        net.add('2', FCLayer(30, 40))
        net.add('3', FCLayer(40, 50))
        self.net[net.net_name] = net

        net = Network('net2')
        # Long linear.
        net.set_input_layer(InputLayer(1, 1))
        for idx in range(16):
            net.add(str(idx), FCLayer(1, 1))
        self.net[net.net_name] = net

        net = Network('net3')
        # Fork.
        # /0-2\   /6- 7- 8\
        #   x  4-5         12
        # \1-3/   \9-10-11/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('1', FCLayer(1, 1), prevs=net.INPUT_LAYER_KEY)
        net.add('2', FCLayer(2, 1), prevs=('0', '1'))
        net.add('2p', PoolingLayer(1, 1, 1))
        net.add('3', FCLayer(2, 1), prevs=('0', '1'))
        net.add('4', FCLayer(2, 1), prevs=('2p', '3'))
        net.add('5', FCLayer(1, 1))
        net.add('5p', PoolingLayer(1, 1, 1))
        net.add('6', FCLayer(1, 1), prevs='5p')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(1, 1), prevs='5p')
        net.add('10', FCLayer(1, 1))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('8', '11'))
        self.net[net.net_name] = net

        net = Network('net4')
        # Complex fork.
        #          /5       \
        # 0-1-2-3-4-6-7-8-10-14
        #              \9/
        #          \11-12   /
        #          \13      /
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1))
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1))
        net.add('4', FCLayer(1, 1))
        net.add('5', FCLayer(1, 1), prevs='4')
        net.add('6', FCLayer(1, 1), prevs='4')
        net.add('7', FCLayer(1, 1))
        net.add('8', FCLayer(1, 1), prevs='7')
        net.add('9', FCLayer(1, 1), prevs='7')
        net.add('10', FCLayer(1, 1))
        net.add('10p', PoolingLayer(2, 1, 1), prevs=('8', '10'))
        net.add('11', PoolingLayer(1, 1, 1), prevs='4')
        net.add('12', FCLayer(1, 1))
        net.add('13', PoolingLayer(1, 1, 1), prevs='4')
        net.add('14', FCLayer(5, 1), prevs=('5', '10p', '12', '13'))
        self.net[net.net_name] = net

        net = Network('net5')
        # Corner cases.
        #  ----\
        # //1-2\ 7-8\
        # 0-3-4-x   10-11-12
        #  \ \5/ 9 /  \__/
        #   6--/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(1, 1), prevs='3')
        net.add('5', FCLayer(1, 1), prevs='3')
        net.add('6', FCLayer(1, 1), prevs='0')
        net.add('7', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('8', FCLayer(1, 1))
        net.add('9', FCLayer(5, 1), prevs=('0', '2', '4', '5', '6'))
        net.add('10', FCLayer(2, 1), prevs=('8', '9'))
        net.add('11', FCLayer(1, 1))
        net.add('12', FCLayer(2, 1), prevs=('10', '11'))
        self.net[net.net_name] = net

        net = Network('net6')
        # Fmap sizes.
        net.set_input_layer(InputLayer(1, 24))
        net.add('0', ConvLayer(1, 1, 24, 3))
        net.add('1', ConvLayer(1, 1, 12, 3, strd=2))
        net.add('1p', PoolingLayer(1, 6, 2))
        net.add('2', ConvLayer(1, 1, 6, 3))
        net.add('3', ConvLayer(1, 1, 6, 3, strd=4), prevs=('0'))
        self.net[net.net_name] = net

        net = Network('net7')
        # Topological order: see a visited vertex again.
        #  /---
        # 0-1-\\
        #  \2--2p
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='0')
        net.add('2p', PoolingLayer(3, 1, 1), prevs=('0', '1', '2'))
        self.net[net.net_name] = net

        net = Network('net8')
        # Forward to the middle.
        #    /-\
        # 0-1-2-2p-4-4p
        #  \-3------/
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(1, 1), prevs='0')
        net.add('2', FCLayer(1, 1), prevs='1')
        net.add('2p', PoolingLayer(2, 1, 1), prevs=('1', '2'))
        net.add('3', FCLayer(1, 1), prevs='0')
        net.add('4', FCLayer(2, 1), prevs='2p')
        net.add('4p', PoolingLayer(2, 1, 1), prevs=('3', '4'))
        self.net[net.net_name] = net

        net = Network('net9')
        # Previous layers include input and others.
        net.set_input_layer(InputLayer(1, 1))
        net.add('0', FCLayer(1, 1))
        net.add('1', FCLayer(2, 1), prevs=(net.INPUT_LAYER_KEY, '0'))
        self.net[net.net_name] = net

        # Real networks.
        for net_name in all_networks():
            self.net[net_name] = import_network(net_name)

        self.batch_size = 16

        self.resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(8, 8),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 4),
                                       dim=PhyDim2(8, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(16, 16),
            size_gbuf=65536,
            size_regf=64,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)

        part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
        self.ofmap_layout = DataLayout(
            frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)), ),
            regions=(NodeRegion(origin=PhyDim2(0, 0),
                                dim=PhyDim2(1, 1),
                                type=NodeRegion.DRAM), ),
            parts=(part, ))
Пример #23
0
 def _make_region(dim):
     return NodeRegion(origin=PhyDim2(0, 0),
                       dim=PhyDim2(*dim),
                       type=NodeRegion.DRAM)
Пример #24
0
 def test_valid_args(self):
     ''' Valid arguments. '''
     dim = PhyDim2(14, 12)
     self.assertEqual(dim.h, 14, 'h')
     self.assertEqual(dim.w, 12, 'w')
Пример #25
0
    def eyerissAsplos17(self):
        '''
        Reproduce TETRIS ASPLOS'17 paper Figure 8.
        '''
        network = self.alex_net

        batch_size = 16

        resource = Resource(
            proc_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.PROC),
            dram_region=NodeRegion(origin=PhyDim2(0, 0),
                                   dim=PhyDim2(4, 4),
                                   type=NodeRegion.DRAM),
            src_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dst_data_region=NodeRegion(origin=PhyDim2(0, 0),
                                       dim=PhyDim2(4, 4),
                                       type=NodeRegion.DRAM),
            dim_array=PhyDim2(14, 14),
            size_gbuf=133032 // 2,  # 133 kB
            size_regf=512 // 2,  # 512 B
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False,
        )

        cost = Cost(
            mac_op=2e-12,
            mem_hier=(80e-12, 14e-12, 4e-12, 0.6e-12),  # pJ/16-b
            noc_hop=40e-12,
            idl_unit=200e-12)

        options = Option(sw_gbuf_bypass=(True, True, True),
                         sw_solve_loopblocking=True,
                         partition_hybrid=True)

        pdb.set_trace()

        nnd = NNDataflow(network, batch_size, resource, cost,
                         self.map_strategy)
        tops, _ = nnd.schedule_search(options)
        self.assertTrue(tops)
        dfsch_t16 = tops[0]

        ## Check results.

        # Same workload.
        #self.assertAlmostEqual(dfsch_t16.total_ops, dfsch_l1.total_ops)
        print('t16 ops: {}'.format(dfsch_t16.total_ops))

        # Performance of T-16 is proportional to PE resource (20% margin).
        #self.assertLess(dfsch_t16.total_time,
        #                1.2 * dfsch_l1.total_time * (16 * 16) / (14 * 14 * 16))
        print('t16_time: {}'.format(dfsch_t16.total_time))

        # Energy reduced by > 30%.
        # self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.7)
        # With dimension restriction on partitioning, this is slightly violated.
        #self.assertLess(dfsch_t16.total_cost, dfsch_l1.total_cost * 0.72)
        print('t16_energy: {}'.format(dfsch_t16.total_cost))
        for i in dfsch_t16:
            print(str(i) + ',')
        ## Check results.

        # Results as cost for each component:
        header = 'ALU, DRAM, Buffer, Array, RF'
        cost_bkdn = {}

        for layer in dfsch_t16:
            layer = str(layer)
            op_cost = 0
            access_cost = [0] * me.NUM

            for layer_part in network:
                if not layer_part or not layer_part.startswith(layer):
                    continue
                sr = dfsch_t16[layer_part]
                op_cost += sr.total_ops * cost.mac_op
                access_cost = [
                    ac + a * c for ac, a, c in zip(
                        access_cost, sr.total_accesses, cost.mem_hier)
                ]

            cost_bkdn[layer] = []
            # To 1e9.
            cost_bkdn[layer].append(op_cost * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.DRAM] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.GBUF] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.ITCN] * 1e12 / 1e9)
            cost_bkdn[layer].append(access_cost[me.REGF] * 1e12 / 1e9)

        for layer in cost_bkdn:
            print(cost_bkdn[layer])
Пример #26
0
 def test_size(self):
     ''' Get size. '''
     dim = PhyDim2(14, 12)
     self.assertEqual(dim.size(), 14 * 12, 'size')
Пример #27
0
 def test_invalid_type(self):
     ''' Invalid type. '''
     with self.assertRaisesRegexp(ValueError, 'NodeRegion: .*type.*'):
         _ = NodeRegion(dim=PhyDim2(4, 4),
                        origin=PhyDim2(1, 3),
                        type=NodeRegion.NUM)
Пример #28
0
 def test_add(self):
     ''' Operation add. '''
     dim1 = PhyDim2(14, 12)
     dim2 = PhyDim2(5, 3)
     self.assertTupleEqual(dim1 + dim2, (19, 15), 'add')
     self.assertTupleEqual(dim1 + 3, (17, 15), 'add')
Пример #29
0
    def setUp(self):

        # Workload.
        self.layer = {}
        self.layer['BASE'] = ConvLayer(12, 10, 28, 3)
        self.layer['LGFIL'] = ConvLayer(2, 4, 28, 20)
        self.layer['POOL'] = PoolingLayer(32, 28, 2)
        self.layer['PAR'] = ConvLayer(24, 36, 56, 3)
        self.batch_size = 4

        # Resource.
        self.resource = {}
        dim_array = PhyDim2(16, 16)
        proc_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.PROC)
        data_region = NodeRegion(origin=PhyDim2(0, 0),
                                 dim=PhyDim2(1, 1),
                                 type=NodeRegion.DRAM)
        # Typical resource.
        self.resource['BASE'] = Resource(proc_region=proc_region,
                                         dram_region=data_region,
                                         src_data_region=data_region,
                                         dst_data_region=data_region,
                                         dim_array=dim_array,
                                         size_gbuf=65536,
                                         size_regf=64,
                                         array_bus_width=float('inf'),
                                         dram_bandwidth=float('inf'),
                                         no_time_mux=False)
        # Larger resource with sufficient capacity, to make all schemes valid.
        self.resource['LG'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=1024**3,
                                       size_regf=1024**3,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Small resource.
        self.resource['SM'] = Resource(proc_region=proc_region,
                                       dram_region=data_region,
                                       src_data_region=data_region,
                                       dst_data_region=data_region,
                                       dim_array=dim_array,
                                       size_gbuf=4096,
                                       size_regf=16,
                                       array_bus_width=float('inf'),
                                       dram_bandwidth=float('inf'),
                                       no_time_mux=False)
        # Multi-node parallel resource.
        self.resource['PAR'] = Resource(proc_region=NodeRegion(
            origin=PhyDim2(0, 0), dim=PhyDim2(4, 2), type=NodeRegion.PROC),
                                        dram_region=data_region,
                                        src_data_region=data_region,
                                        dst_data_region=data_region,
                                        dim_array=dim_array,
                                        size_gbuf=25000,
                                        size_regf=64,
                                        array_bus_width=float('inf'),
                                        dram_bandwidth=float('inf'),
                                        no_time_mux=False)
        # Resource with no data regions.
        proc_data_region = NodeRegion(origin=PhyDim2(1, 1),
                                      dim=PhyDim2(1, 1),
                                      type=NodeRegion.PROC)
        self.resource['SRCNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=proc_data_region,
            dst_data_region=data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DSTNOTDATA'] = Resource(
            proc_region=proc_region,
            dram_region=data_region,
            src_data_region=data_region,
            dst_data_region=proc_data_region,
            dim_array=dim_array,
            size_gbuf=1024**3,
            size_regf=1024**3,
            array_bus_width=float('inf'),
            dram_bandwidth=float('inf'),
            no_time_mux=False)
        self.resource['DATALOCAL'] = Resource(proc_region=proc_region,
                                              dram_region=data_region,
                                              src_data_region=proc_region,
                                              dst_data_region=proc_region,
                                              dim_array=dim_array,
                                              size_gbuf=1024**3,
                                              size_regf=1024**3,
                                              array_bus_width=float('inf'),
                                              dram_bandwidth=float('inf'),
                                              no_time_mux=False)
        # Filter pinning.
        self.resource['FILPIN'] = Resource(proc_region=proc_region,
                                           dram_region=data_region,
                                           src_data_region=data_region,
                                           dst_data_region=data_region,
                                           dim_array=dim_array,
                                           size_gbuf=1024**3,
                                           size_regf=1024**3,
                                           array_bus_width=float('inf'),
                                           dram_bandwidth=float('inf'),
                                           no_time_mux=True)

        # Nested loop description after mapping.
        self.nld = {}
        self.nld['BASE'] = next(
            MapStrategyEyeriss(self.layer['BASE'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['LGFIL'] = next(
            MapStrategyEyeriss(self.layer['LGFIL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        self.nld['POOL'] = next(
            MapStrategyEyeriss(self.layer['POOL'], self.batch_size, 1,
                               dim_array).gen_nested_loop_desc())
        # Fake nested loop, with zero filter size.
        self.nld['ZERO_FIL'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(0, 1000, 800),
            usize_regf=(0, 3, 1),
            unit_access=((0, 1000, 800), (0, 1000, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)
        # Fake nested loop, with zero ifmap size.
        self.nld['ZERO_IFM'] = NestedLoopDesc(
            loopcnt=(12, 10, 4),
            usize_gbuf=(9, 0, 800),
            usize_regf=(3, 0, 1),
            unit_access=((9, 0, 800), (9, 0, 800), (3, 9, 7), (1, 1, 1)),
            data_loops=(DataDimLoops(le.IFM,
                                     le.OFM), DataDimLoops(le.IFM, le.BAT),
                        DataDimLoops(le.OFM, le.BAT)),
            unit_ops=1,
            unit_time=1)

        # Fake partition scheme.
        self.part = PartitionScheme(range(pe.NUM), ((1, 1), ) * pe.NUM)

        # Fake buffer sharing scheme.
        self.bufshr = BufShrScheme(proc_region, self.part)

        # Options.
        self.options = {}
        # Basic.
        self.options['BASE'] = Option(ntops=2**30)
        # Multiprocessing.
        self.options['MP'] = Option(ntops=2**30, nprocesses=8)
        # Limited top schemes.
        self.options['NTOPS'] = Option(ntops=10)
        # Bypass.
        self.options['BYP'] = Option(sw_gbuf_bypass=(True, ) * 3, ntops=2**30)
        # Bypass solver.
        self.options['BYPSOL'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                        sw_solve_loopblocking=True,
                                        ntops=2**30)
        # Access forwarding.
        self.options['ACCFWD'] = Option(hw_access_forwarding=True, ntops=2**30)
        # Buffer sharing.
        self.options['BUFSHR'] = Option(hw_gbuf_sharing=True, ntops=2**30)
        # Buffer sharing with bypassing.
        self.options['BUFSHR-BYP'] = Option(sw_gbuf_bypass=(True, ) * 3,
                                            hw_gbuf_sharing=True,
                                            ntops=2**30)

        # Constraint.
        self.none_cstr = SchedulingConstraint()
        self.cstr = SchedulingConstraint(topifm=1, topbat=1)

        # Cost.
        self.cost = Cost(mac_op=1,
                         mem_hier=(200, 6, 2, 1),
                         noc_hop=50,
                         idl_unit=50)
Пример #30
0
 def test_concat_invalid_type(self):
     ''' Concat invalid type. '''
     with self.assertRaisesRegex(TypeError, 'DataLayout: .*concat.*'):
         _ = DataLayout.concat(self.dl1, self.frng1)
     with self.assertRaisesRegex(TypeError, 'DataLayout: .*concat.*'):
         _ = DataLayout.concat(self.dl1, PhyDim2(1, 3))