Пример #1
0
    def instantiate(self):
        self.name = 'tb'

        self.input_size = 4
        self.block_size = 12
        self.in_sets = self.block_size // self.input_size
        self.num_nonzero = 5
        self.preserve_order = True

        self.in_chn = Channel()
        self.mid_chn = Channel()
        self.out_chn = Channel()

        self.converter = Converter(self.in_chn, self.mid_chn, self.input_size, self.block_size)
        #self.pruner = NaivePruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        self.pruner = ClusteredPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        #self.pruner = ThresholdPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)

        self.iterations = 10
        self.iteration = 0
        self.curr_set = 0
        self.out_counter = 0
        self.test_data = [[randint(1,5) if randint(0,3)>1 else 0\
            for j in range(self.block_size)]\
            for i in range(self.iterations+1)] 
            # send in one extra iteration to flush out last outputs
        print("Stimulus:")
        print("[")
        for i in range(len(self.test_data)-1):
            print(self.test_data[i])
        print("]")
Пример #2
0
    def instantiate(self, arr_x, arr_y, chn_per_word, done_chn,
                    ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        self.name = 'conv_tb'

        self.image_size = None
        self.filter_size = None
        self.full_in_chn = None
        self.full_out_chn = None

        self.ceil_in_chn = None
        self.ceil_out_chn = None

        self.in_chn = arr_y
        self.out_chn = arr_x
        self.done_chn = done_chn

        self.chn_per_word = chn_per_word

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel(name='arch_input_chn')
        self.output_chn = Channel(name='arch_output_chn')

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.done_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth, weight_glb_depth)
Пример #3
0
    def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 8
        self.out_chn = 16
        self.chn_per_word = 4

        self.arr_x = self.out_chn // 2
        self.arr_y = self.in_chn // 2

        self.input_chn = Channel()
        self.output_chn = Channel()
        self.psum_chn = Channel(128)
        self.curr_pass = 0
        self.tick_counter = 0

        ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \
                (self.in_chn//2)//self.chn_per_word
        psum_glb_depth = self.image_size[0]*self.image_size[1]* \
                (self.out_chn//2)//self.chn_per_word

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.psum_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth)

        self.configuration_done = False
Пример #4
0
    def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 4
        self.out_chn = 8
        self.chn_per_word = 4
        self.num_tiles = 4

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel()
        self.output_chn = Channel()

        self.finish_signal_chn = Channel()

        self.stat_type = 'show'
        self.raw_stats = {}

        ifmap_glb_depth = self.image_size[0] * self.image_size[
            1] * self.num_tiles * self.in_chn // self.chn_per_word
        # psum_glb_depth = self.image_size[0]*self.image_size[1]*self.out_chn//self.chn_per_word
        print("ifmap glb depth:", ifmap_glb_depth)
        print("weight glb depth: 0")

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn,
                                 self.finish_signal_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth)

        self.configuration_done = False
Пример #5
0
    def instantiate(self):
        self.name = 'tb'
        self.image_size = (4, 4)
        self.filter_size = (3, 3)
        self.in_chn = 4
        self.out_chn = 8
        self.chn_per_word = 4

        self.arr_x = self.out_chn
        self.arr_y = self.in_chn

        self.input_chn = Channel()
        self.output_chn = Channel()

        ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \
                self.in_chn//self.chn_per_word
        print("ifmap glb depth:", ifmap_glb_depth)
        psum_glb_depth = self.image_size[0]*self.image_size[1]* \
                self.out_chn//self.chn_per_word
        print("psum glb depth:", psum_glb_depth)

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
                                 self.input_chn, self.output_chn)
        self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn,
                          self.output_chn, self.chn_per_word, ifmap_glb_depth,
                          psum_glb_depth)

        self.configuration_done = False
Пример #6
0
    def instantiate(self, image_size, filter_size, in_chn, out_chn, block_size,
                    ifmap, weights, bias, pruner_name, num_nonzero):
        self.name = 'tb'

        # if (debug):
        #     self.image_size = (4, 4)
        #     self.filter_size = (3, 3)
        #     self.in_chn = 2
        #     self.out_chn = 4
        #     self.block_size = 2
        #     self.num_nonzero = 1  #number of non-zero values in each blok, help test the correctness of the arch
        # else:
        #     self.image_size = (16, 16)
        #     self.filter_size = (3, 3)
        #     self.in_chn = 16
        #     self.out_chn = 8
        #     self.block_size = 4
        #     self.num_nonzero = 4

        self.image_size = image_size
        self.filter_size = filter_size
        self.in_chn = in_chn
        self.out_chn = out_chn
        self.block_size = block_size
        self.num_nonzero = num_nonzero  #number of non-zero values in each blok, help test the correctness of the arch

        #the inputs to this specific layer
        self.ifmap = ifmap
        self.weights = weights
        self.bias = bias
        self.pruner_name = pruner_name

        self.arr_y = self.out_chn
        self.input_chn = Channel()
        self.output_chn = Channel()

        ifmap_glb_depth = (self.filter_size[1] + (self.filter_size[0]-1)*\
            self.image_size[1]) * self.in_chn // self.block_size
        psum_glb_depth = self.out_chn // self.block_size
        weight_glb_depth = self.filter_size[0]*self.filter_size[1]* \
                self.in_chn*self.out_chn//self.block_size

        self.stimulus = Stimulus(self.arr_y, self.block_size, self.num_nonzero,
                                 self.input_chn, self.output_chn,
                                 self.pruner_name)
        self.dut = OSArch(self.arr_y, self.input_chn, self.output_chn,
                          self.block_size, self.num_nonzero, ifmap_glb_depth,
                          psum_glb_depth, weight_glb_depth)

        self.configuration_done = False
Пример #7
0
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False
Пример #8
0
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.full_fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile_in = 0
        self.tile_out = 0
        self.wr_done = False
        self.task_done = True
Пример #9
0
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.glb_depth = glb_depth
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (glb_depth, chn_per_word),
            'ifmap_glb_rd': 0,
            'ifmap_glb_wr': 0
        }

        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False
Пример #10
0
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    block_size, num_nonzero):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.name = 'psum_glb'
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, block_size, nports=2, dtype=np.float16)
        self.last_read = Channel(3)

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False
Пример #11
0
    def instantiate(self, arch_input_chn, arr_y, block_size, num_nonzero,
                    pruner_name):
        # PE static configuration (immutable)
        #self.arr_x = arr_x
        self.arr_y = arr_y
        #self.chn_per_word = chn_per_word
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.convert_chn = Channel()
        self.prune_chn = Channel()
        self.arch_input_chn = arch_input_chn

        # Although both InputSerializer and pruner will be pushing to arch_input_chn
        # There is no conflict issue because all weights will be pushed by IS first
        # then all inputs by pruner
        self.converter = Converter(self.convert_chn, self.prune_chn, \
            self.block_size, self.block_size)
        # self.pruner = NaivePruner(self.prune_chn,self.arch_input_chn, \
        #     self.num_nonzero,True)

        #user defined pruner for this layer, default to naive pruner
        self.pruner = getattr(pruner, pruner_name)(self.prune_chn,self.arch_input_chn, \
            self.num_nonzero, self.block_size, True)

        self.ifmap = None
        self.weights = None
        self.bias = None

        self.image_size = (0, 0)
        self.filter_size = (0, 0)

        self.ifmap_psum_done = True
        self.pass_done = Reg(False)

        # State Counters
        self.curr_set = 0
        self.curr_filter = 0
        self.iteration = 0
        self.fmap_idx = 0
        self.curr_chn = 0
        self.curr_x = 0  # run through first two dimensions of input
        self.curr_y = 0
        self.bias_set = 0
Пример #12
0
    def instantiate(self, arr_x, arr_y, chn_per_word, done_chn, ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        self.name = 'fc_tb'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.batch_size = None
        self.input_size = None
        self.output_size = None

        self.ceil_batch = None
        self.ceil_output = None

        self.input_chn = Channel(name='arch_input_chn')
        self.output_chn = Channel(name='arch_output_chn')
        self.done_chn = done_chn

        self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word,
            self.input_chn, self.output_chn, self.done_chn)
        self.dut = OSArch(self.arr_x, self.arr_y, self.input_chn,
                self.output_chn, self.chn_per_word, ifmap_glb_depth,
                weight_glb_depth)
Пример #13
0
    def instantiate(self, wr_chn, rd_chn, glb_depth, block_size):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.name = 'weight_glb'

        self.filter_size = (0, 0)
        self.image_size = (0, 0)
        self.wr_done = False
        self.iteration = 0
        self.addr = 0
        self.in_chn = 0
        self.out_chn = 0
        #self.arr_y = 0
        #self.out_sets = 0
        self.block_size = block_size

        self.sram = SRAM(glb_depth, block_size)
        self.last_read = Channel(3)

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}
Пример #14
0
    def instantiate(self, wr_chn, rd_chn, arr_y, glb_depth, block_size,
                    num_nonzero):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, num_nonzero), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, num_nonzero * 3)
        self.last_read = Channel(3)
        self.glb_depth = glb_depth

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        #self.curr_filt_x = 0
        #self.curr_filt_y = 0
        self.ifmap_done = False

        self.needed_addr = 0
        self.ready_to_output = False  # ready to output a filter_size block of inputs
        self.curr_data = [0 for i in range(3 * num_nonzero)]
        self.data_idx = num_nonzero  # block other operations while actively working through data
Пример #15
0
    def instantiate(self, arr_x, arr_y, chn_per_word, layers, batch_size):
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word
        self.layers = layers
        self.batch_size = batch_size

        self.name = 'meta'

        self.started = False
        self.done_chn = Channel()

        self.ifmap_glb_depth = 0
        self.psum_glb_depth = 0
        self.weights_glb_depth = 0

        use_conv = False
        use_fc = False

        self.conv_tb = None
        self.fc_tb = None

        cur_image_size = None
        cur_in_chn = None
        is_conv = False

        num_convs = 0
        num_fc = 0

        for layer in self.layers:
            if isinstance(layer, Conv):
                if cur_image_size is None:
                    pass
                elif cur_image_size != layer.image_size or cur_in_chn != layer.in_chn:
                    raise Exception('Invalid conv image size for %s: %s %s' %
                                    (layer.name, (cur_image_size, cur_in_chn),
                                     (layer.image_size, layer.in_chn)))
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = WSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word,
                    layer.image_size, layer.filter_size, layer.in_chn,
                    layer.out_chn)
                use_conv = True
                output_shape = layer.new_shape((self.batch_size, ) +
                                               layer.image_size +
                                               (layer.out_chn, ))
                cur_image_size = output_shape[1:3]
                cur_in_chn = output_shape[3]
                is_conv = True
                num_convs += 1
            elif isinstance(layer, FC):
                if cur_image_size is None:
                    pass
                elif not is_conv and cur_image_size != layer.input_size:
                    raise Exception('Invalid fc dimension transition for ' +
                                    layer.name)
                elif is_conv and cur_image_size[0] * cur_image_size[
                        1] * cur_in_chn != layer.input_size:
                    raise Exception(
                        'Invalid conv to fc dimension transition to ' +
                        layer.name)
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = OSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word, self.batch_size,
                    layer.input_size, layer.output_size)
                use_fc = True
                _, cur_image_size = layer.new_shape(
                    (self.batch_size, layer.output_size))
                is_conv = False
                num_fc += 1
            else:
                raise Exception('layer not valid')
            self.ifmap_glb_depth = max(self.ifmap_glb_depth, ifmap_glb_depth)
            self.psum_glb_depth = max(self.psum_glb_depth, psum_glb_depth)
            self.weights_glb_depth = max(self.weights_glb_depth,
                                         weights_glb_depth)

        if use_conv:
            self.conv_tb = WSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                    self.done_chn, self.ifmap_glb_depth,
                                    self.psum_glb_depth,
                                    self.weights_glb_depth)
        if use_fc:
            self.fc_tb = OSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                  self.done_chn, self.ifmap_glb_depth,
                                  self.psum_glb_depth, self.weights_glb_depth)

        self.layer_step = 0
        self.batch_step = 0
        self.conv_inputs = [None] * self.batch_size
        self.fc_input = None

        self.conv_weights = [None] * num_convs
        self.conv_bias = [None] * num_convs

        self.fc_weights = [None] * num_fc
        self.fc_bias = [None] * num_fc

        self.cur_conv = 0
        self.cur_fc = 0
Пример #16
0
 def instantiate(self):
     self.channel = Channel(4)
     self.push_count = 0
     self.free_count = 0
     self.test_size = 100
Пример #17
0
    def instantiate(self, arr_y,
            input_chn, output_chn,
            block_size, num_nonzero,
            ifmap_glb_depth, psum_glb_depth, weight_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        #self.arr_x = arr_x
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        
        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel()
        self.psum_wr_chn = Channel()
        self.weights_wr_chn = Channel()
        self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn,
                self.weights_wr_chn, self.psum_wr_chn, arr_y,
                block_size, num_nonzero)

        self.psum_output_chn = Channel()
        self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_rd_chn = Channel(3)
        #self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, arr_y,
        #        ifmap_glb_depth, block_size, num_nonzero)

        self.psum_rd_chn = Channel(3)
        self.psum_noc_wr_chn = Channel()
        self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn,
                psum_glb_depth, block_size, num_nonzero)

        self.weights_rd_chn = Channel()
        #self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, block_size)

        self.ifmap_weights_glb = IFMapWeightsGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,\
            self.weights_wr_chn, self.weights_rd_chn, arr_y, ifmap_glb_depth,\
            weight_glb_depth, block_size, num_nonzero)
        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_in_chns = ModuleList()
        self.pe_psum_out_chns = ModuleList()

        # Actual array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_in_chns.append(ModuleList())
            self.pe_psum_out_chns.append(ModuleList())
            for x in range(1):
                self.pe_ifmap_chns[y].append(Channel(32))
                self.pe_filter_chns[y].append(Channel(32))
                self.pe_psum_in_chns[y].append(Channel(32))
                self.pe_psum_out_chns[y].append(Channel(32))
                self.pe_array[y].append(
                    PE(x, y,
                        self.pe_ifmap_chns[y][x],
                        self.pe_filter_chns[y][x],
                        self.pe_psum_in_chns[y][x],
                        self.pe_psum_out_chns[y][x]
                    )
                )

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, block_size)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns)
        self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_in_chns, self.arr_y, block_size)
        self.psum_wr_noc = PSumWrNoC(self.pe_psum_out_chns, self.psum_noc_wr_chn, self.psum_output_chn, self.arr_y, block_size)
Пример #18
0
    def instantiate(self, ifmap_wr_chn, ifmap_rd_chn, weights_wr_chn, weights_rd_chn,\
            arr_y, ifmap_glb_depth, weights_glb_depth, \
            block_size, num_nonzero):
        self.ifmap_wr_chn = ifmap_wr_chn
        self.ifmap_rd_chn = ifmap_rd_chn
        self.weights_wr_chn = weights_wr_chn
        self.weights_rd_chn = weights_rd_chn
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        self.name = 'ifmap_weights_glb'

        self.in_chn = 0
        self.out_chn = 0

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (ifmap_glb_depth, num_nonzero * 3),
            'rd': 0,
            'wr': 0
        }

        self.isram = SRAM(ifmap_glb_depth, num_nonzero * 3, dtype=np.float16)
        self.ilast_read = Channel(3)
        self.ifmap_glb_depth = ifmap_glb_depth

        self.wsram = SRAM(weights_glb_depth, block_size, dtype=np.float16)
        self.wlast_read = Channel(1)
        # Channel depth of one here prevents SRAM reads from colliding
        # was having issues with a later read 'replacing' an earlier one
        # and thus getting the wrong data
        # having only one extant write on an SRAM at a time prevents this
        self.weights_glb_depth = weights_glb_depth

        # Channel to hold indices of weights that need to be sent
        # to NoC
        self.weights_to_send = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.iwr_done = False
        self.wwr_done = False

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        #self.curr_filt_x = 0
        #self.curr_filt_y = 0
        self.ifmap_done = False

        # for weights
        self.addr = 0
        self.base_addr = 0  # to store values from self.weights_to_send
        self.base_addr_wo_chn = -1  # to keep track of current position within 3x3 filter

        # invalid weights and inputs to use at the end to flush out last outputs
        self.weights_to_flush = 0
        self.inputs_to_flush = 0

        self.needed_addr = 0
        self.ready_to_output = False  # ready to output a filter_size block of inputs
        self.curr_data = [0 for i in range(3 * num_nonzero)]
        self.curr_weights = [0 for i in range(block_size)]
        self.data_idx = num_nonzero  # block other operations while actively working through data
Пример #19
0
    def instantiate(self, arr_x, arr_y,
            input_chn, output_chn,
            chn_per_word,
            ifmap_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.post_tr_x = arr_x # num output channels = 8
        self.post_tr_y = 4 # num tiles = 4

        self.pre_tr_ifmap_x = arr_y # num input channels = 4
        self.pre_tr_ifmap_y = 4 # num tiles = 4

        self.pre_tr_weights_x = arr_y # num input channels = 4
        self.pre_tr_weights_y = arr_x # num output channels = 8

        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel()
        self.weights_wr_chn = Channel()
        self.bias_wr_chn = Channel()
        self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn,
                self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y,
                chn_per_word)

        self.psum_output_chn = Channel()
        self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_glb_wr_chn = Channel(3)
        self.ifmap_rd_chn = Channel(3)
        self.ifmap_glb = IFMapGLB(self.ifmap_glb_wr_chn, self.ifmap_rd_chn,
                ifmap_glb_depth, chn_per_word)

        self.psum_rd_chn = Channel(3)
        self.psum_noc_wr_chn = Channel()
        #  self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn,
        #          psum_glb_depth, chn_per_word)

        self.weights_glb_wr_chn = Channel(3)
        self.weights_rd_chn = Channel()
        self.weights_glb = WeightsGLB(self.weights_glb_wr_chn, self.weights_rd_chn)

        self.bias_rd_chn = Channel()
        self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn)

        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_chns = ModuleList()
        self.pe_psum_chns.append(ModuleList())
        for x in range(self.arr_x):
            self.pe_psum_chns[0].append(Channel(32))

        # Actual PE array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_chns.append(ModuleList())
            for x in range(self.arr_x):
                self.pe_ifmap_chns[y].append(Channel(32))
                self.pe_filter_chns[y].append(Channel(32))
                self.pe_psum_chns[y+1].append(Channel(32))
                self.pe_array[y].append(
                    PE(x, y,
                        self.pe_ifmap_chns[y][x],
                        self.pe_filter_chns[y][x],
                        self.pe_psum_chns[y][x],
                        self.pe_psum_chns[y+1][x]
                    )
                )

        # Pre Transform IFMap array and local channel declaration
        self.pre_tr_ifmap_array = ModuleList()
        self.pre_tr_ifmap_in_chns = ModuleList()
        self.pre_tr_ifmap_out_chns = ModuleList()

        # Actual pre transform IFMap array instantiation
        for y in range(self.pre_tr_ifmap_y):
            self.pre_tr_ifmap_array.append(ModuleList())
            self.pre_tr_ifmap_in_chns.append(ModuleList())
            self.pre_tr_ifmap_out_chns.append(ModuleList())
            for x in range(self.pre_tr_ifmap_x):
                self.pre_tr_ifmap_in_chns[y].append(Channel(32))
                self.pre_tr_ifmap_out_chns[y].append(Channel(32))
                self.pre_tr_ifmap_array[y].append(
                    PreTransformIFMap(x, y,
                        self.pre_tr_ifmap_in_chns[y][x],
                        self.pre_tr_ifmap_out_chns[y][x]
                        )
                )

        # Pre Transform Weight array and local channel declaration
        self.pre_tr_weights_array = ModuleList()
        self.pre_tr_weights_in_chns = ModuleList()
        self.pre_tr_weights_out_chns = ModuleList()

        # Actual pre transform Weight array instantiation
        for y in range(self.pre_tr_weights_y):
            self.pre_tr_weights_array.append(ModuleList())
            self.pre_tr_weights_in_chns.append(ModuleList())
            self.pre_tr_weights_out_chns.append(ModuleList())
            for x in range(self.pre_tr_weights_x):
                self.pre_tr_weights_in_chns[y].append(Channel(32))
                self.pre_tr_weights_out_chns[y].append(Channel(32))
                self.pre_tr_weights_array[y].append(
                    PreTransformWeights(x, y,
                        self.pre_tr_weights_in_chns[y][x],
                        self.pre_tr_weights_out_chns[y][x]
                        )
                )

        # Post Transform Array and local channel declaration
        self.post_tr_array = ModuleList()
        self.post_tr_bias_chns = ModuleList()
        self.post_tr_ofmap_in_chns = ModuleList()
        self.post_tr_ofmap_out_chns = ModuleList()

        # Actual post transform array instantiation
        for y in range(self.post_tr_y):
            self.post_tr_array.append(ModuleList())
            self.post_tr_bias_chns.append(ModuleList())
            self.post_tr_ofmap_in_chns.append(ModuleList())
            self.post_tr_ofmap_out_chns.append(ModuleList())
            for x in range(self.post_tr_x):
                self.post_tr_bias_chns[y].append(Channel(32))
                self.post_tr_ofmap_in_chns[y].append(Channel(32))
                self.post_tr_ofmap_out_chns[y].append(Channel(32))
                self.post_tr_array[y].append(
                    PostTransform(x, y,
                        self.post_tr_bias_chns[y][x],
                        self.post_tr_ofmap_in_chns[y][x],
                        self.post_tr_ofmap_out_chns[y][x]
                        )
                )

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word)
        self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word)
        #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word)
        self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word)

        # Setup NoC for post transform blocks
        self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word)
        self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word)

        # Instantiate tiler for ifmaps
        self.ifmap_tiler = IFMapTiler(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word)

        # Setup NoC for pre transform blocks
        #self.pre_tr_ifmap_wr_noc = PreTrIFMapWrNoC(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word)
        self.pre_tr_ifmap_rd_noc = PreTrIFMapRdNoC(self.pre_tr_ifmap_out_chns, self.ifmap_glb_wr_chn, self.chn_per_word)
        self.pre_tr_weights_wr_noc = PreTrWeightsWrNoC(self.weights_wr_chn, self.pre_tr_weights_in_chns, self.chn_per_word)
        self.pre_tr_weights_rd_noc = PreTrWeightsRdNoC(self.pre_tr_weights_out_chns, self.weights_glb_wr_chn, self.chn_per_word)
Пример #20
0
    def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word,
                    ifmap_glb_depth, psum_glb_depth):
        # PE static configuration (immutable)
        self.name = 'chip'
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word

        self.stat_type = 'show'

        # Instantiate DRAM IO channels
        self.input_chn = input_chn
        self.output_chn = output_chn

        # Instantiate input deserializer and output serializer
        self.ifmap_wr_chn = Channel(name="ifmap_wr_chn")
        self.psum_wr_chn = Channel(name="psum_wr_chn")
        self.weights_wr_chn = Channel(name="weights_wr_chn")
        self.deserializer = InputDeserializer(self.input_chn,
                                              self.ifmap_wr_chn,
                                              self.weights_wr_chn,
                                              self.psum_wr_chn, arr_x, arr_y,
                                              chn_per_word)

        self.psum_output_chn = Channel(name="psum_output_chn")
        self.serializer = OutputSerializer(self.output_chn,
                                           self.psum_output_chn)

        # Instantiate GLB and GLB channels
        self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn')
        self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,
                                  ifmap_glb_depth, chn_per_word)

        self.psum_rd_chn = Channel(3, name='psum_rd_chn')
        self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn')
        self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn,
                                self.psum_rd_chn, psum_glb_depth, chn_per_word)

        self.weights_rd_chn = Channel(name='weights_rd_chn')
        self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn)

        # PE Array and local channel declaration
        self.pe_array = ModuleList()
        self.pe_ifmap_chns = ModuleList()
        self.pe_filter_chns = ModuleList()
        self.pe_psum_chns = ModuleList()
        self.pe_psum_chns.append(ModuleList())
        for x in range(self.arr_x):
            self.pe_psum_chns[0].append(
                Channel(32, name='pe_psum_chns_{}_{}'.format(x, 0)))

        # Actual array instantiation
        for y in range(self.arr_y):
            self.pe_array.append(ModuleList())
            self.pe_ifmap_chns.append(ModuleList())
            self.pe_filter_chns.append(ModuleList())
            self.pe_psum_chns.append(ModuleList())
            for x in range(self.arr_x):
                self.pe_ifmap_chns[y].append(
                    Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y)))
                self.pe_filter_chns[y].append(
                    Channel(32, name='pe_filter_chns_{}_{}'.format(x, y)))
                self.pe_psum_chns[y + 1].append(
                    Channel(32, name='pe_psum_chns_{}_{}'.format(x, y)))
                self.pe_array[y].append(
                    PE(x, y, self.pe_ifmap_chns[y][x],
                       self.pe_filter_chns[y][x], self.pe_psum_chns[y][x],
                       self.pe_psum_chns[y + 1][x]))

        # Setup NoC to deliver weights, ifmaps and psums
        self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns,
                                     self.chn_per_word)
        self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns,
                                  self.arr_x, self.chn_per_word)
        self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0],
                                     self.chn_per_word)
        self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1],
                                     self.psum_noc_wr_chn,
                                     self.psum_output_chn, self.chn_per_word)