Пример #1
0
class ConverterTB(Module):
    def instantiate(self):
        self.name = 'tb'

        self.input_size = 4
        self.block_size = 12
        self.in_sets = self.block_size // self.input_size
        self.num_nonzero = 5
        self.preserve_order = True

        self.in_chn = Channel()
        self.mid_chn = Channel()
        self.out_chn = Channel()

        self.converter = Converter(self.in_chn, self.mid_chn, self.input_size, self.block_size)
        #self.pruner = NaivePruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        self.pruner = ClusteredPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)
        #self.pruner = ThresholdPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order)

        self.iterations = 10
        self.iteration = 0
        self.curr_set = 0
        self.out_counter = 0
        self.test_data = [[randint(1,5) if randint(0,3)>1 else 0\
            for j in range(self.block_size)]\
            for i in range(self.iterations+1)] 
            # send in one extra iteration to flush out last outputs
        print("Stimulus:")
        print("[")
        for i in range(len(self.test_data)-1):
            print(self.test_data[i])
        print("]")

    def tick(self):
        if (self.in_chn.vacancy() and not self.iteration == self.iterations+1):
            imin = self.curr_set*self.input_size
            imax = imin+self.input_size
            data = [self.test_data[self.iteration][i] for i in range(imin, imax)]
            self.in_chn.push(data)

            self.curr_set += 1
            if (self.curr_set == self.in_sets):
                self.curr_set = 0
                self.iteration += 1
        if (self.out_chn.valid()):
            data = self.out_chn.pop()
            print(data)
            #print("out_counter: ", self.out_counter)
            self.out_counter += 1
            if (self.out_counter == self.iterations):
                raise Finish("Check manually")
Пример #2
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.glb_depth = glb_depth
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (glb_depth, chn_per_word),
            'ifmap_glb_rd': 0,
            'ifmap_glb_wr': 0
        }

        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration
        self.curr_tile = 0
        self.num_tiles = 4
        self.addr = 0
        print("ifmap glb_size: ", self.glb_depth)

    def tick(self):
        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['ifmap_glb_wr'] += len(data)
                # print "ifmap_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets * self.curr_tile + self.curr_set + self.fmap_idx * self.num_tiles
                #print ("ifmap_to_glb: ", self.curr_tile, self.fmap_idx, addr)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.curr_tile += 1
                if self.curr_tile == self.num_tiles:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.curr_tile = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.wr_done = True
        else:
            if self.rd_chn.vacancy(1) and self.addr < self.glb_depth:
                # Read from GLB and deal with SRAM latency
                self.sram.request(RD, self.addr)
                #print ("read_ifmap_glb: ", self.addr)
                self.addr += 1
                self.last_read.push(False)

                # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                #print ("ifmap_glb_to_noc")
                is_zero = self.last_read.pop()
                data = [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
                self.raw_stats['ifmap_glb_rd'] += len(data)
Пример #3
0
class MetaArchTB(Module):
    def instantiate(self, arr_x, arr_y, chn_per_word, layers, batch_size):
        self.arr_x = arr_x
        self.arr_y = arr_y
        self.chn_per_word = chn_per_word
        self.layers = layers
        self.batch_size = batch_size

        self.name = 'meta'

        self.started = False
        self.done_chn = Channel()

        self.ifmap_glb_depth = 0
        self.psum_glb_depth = 0
        self.weights_glb_depth = 0

        use_conv = False
        use_fc = False

        self.conv_tb = None
        self.fc_tb = None

        cur_image_size = None
        cur_in_chn = None
        is_conv = False

        num_convs = 0
        num_fc = 0

        for layer in self.layers:
            if isinstance(layer, Conv):
                if cur_image_size is None:
                    pass
                elif cur_image_size != layer.image_size or cur_in_chn != layer.in_chn:
                    raise Exception('Invalid conv image size for %s: %s %s' %
                                    (layer.name, (cur_image_size, cur_in_chn),
                                     (layer.image_size, layer.in_chn)))
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = WSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word,
                    layer.image_size, layer.filter_size, layer.in_chn,
                    layer.out_chn)
                use_conv = True
                output_shape = layer.new_shape((self.batch_size, ) +
                                               layer.image_size +
                                               (layer.out_chn, ))
                cur_image_size = output_shape[1:3]
                cur_in_chn = output_shape[3]
                is_conv = True
                num_convs += 1
            elif isinstance(layer, FC):
                if cur_image_size is None:
                    pass
                elif not is_conv and cur_image_size != layer.input_size:
                    raise Exception('Invalid fc dimension transition for ' +
                                    layer.name)
                elif is_conv and cur_image_size[0] * cur_image_size[
                        1] * cur_in_chn != layer.input_size:
                    raise Exception(
                        'Invalid conv to fc dimension transition to ' +
                        layer.name)
                ifmap_glb_depth, psum_glb_depth, weights_glb_depth = OSArchTB.required_glb_depth(
                    self.arr_x, self.arr_y, self.chn_per_word, self.batch_size,
                    layer.input_size, layer.output_size)
                use_fc = True
                _, cur_image_size = layer.new_shape(
                    (self.batch_size, layer.output_size))
                is_conv = False
                num_fc += 1
            else:
                raise Exception('layer not valid')
            self.ifmap_glb_depth = max(self.ifmap_glb_depth, ifmap_glb_depth)
            self.psum_glb_depth = max(self.psum_glb_depth, psum_glb_depth)
            self.weights_glb_depth = max(self.weights_glb_depth,
                                         weights_glb_depth)

        if use_conv:
            self.conv_tb = WSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                    self.done_chn, self.ifmap_glb_depth,
                                    self.psum_glb_depth,
                                    self.weights_glb_depth)
        if use_fc:
            self.fc_tb = OSArchTB(self.arr_x, self.arr_y, self.chn_per_word,
                                  self.done_chn, self.ifmap_glb_depth,
                                  self.psum_glb_depth, self.weights_glb_depth)

        self.layer_step = 0
        self.batch_step = 0
        self.conv_inputs = [None] * self.batch_size
        self.fc_input = None

        self.conv_weights = [None] * num_convs
        self.conv_bias = [None] * num_convs

        self.fc_weights = [None] * num_fc
        self.fc_bias = [None] * num_fc

        self.cur_conv = 0
        self.cur_fc = 0

    def tick(self):
        if not self.started or self.done_chn.valid():
            self.started = True
            old_layer = self.layers[self.layer_step]

            if self.done_chn.valid():
                valid = self.done_chn.pop()
                if not valid:
                    raise Finish('Validation Failed')
                if isinstance(old_layer, Conv):
                    self.conv_inputs[
                        self.batch_step] = self.conv_tb.get_output()
                    self.batch_step += 1
                    if self.batch_step == self.batch_size:
                        self.conv_inputs = [
                            batch for batch in old_layer.activation(
                                np.array(self.conv_inputs))
                        ]
                        self.batch_step = 0
                        self.layer_step += 1
                        self.cur_conv += 1
                else:
                    self.fc_input = self.fc_tb.get_output()
                    self.fc_input = old_layer.activation(self.fc_input)
                    self.layer_step += 1
                    self.cur_fc += 1
                if self.layer_step == len(self.layers):
                    raise Finish('Success')

            layer = self.layers[self.layer_step]

            # handle conv to fc transition
            if isinstance(
                    layer, FC
            ) and self.fc_input is None and self.conv_inputs[0] is not None:
                if self.name != None:
                    self.output_file.write("FC MODE\n")
                self.fc_input = np.zeros(
                    (self.batch_size, layer.input_size)).astype(np.int64)
                for i in range(self.batch_size):
                    self.fc_input[i] = self.conv_inputs[i].reshape(
                        layer.input_size)

            if isinstance(layer, Conv):
                if self.name != None:
                    self.output_file.write("CONV MODE\n")
                if self.conv_inputs[self.batch_step] is None:
                    _, weights, bias = self.conv_tb.configure(
                        layer.image_size, layer.filter_size, layer.in_chn,
                        layer.out_chn)
                    self.conv_weights[self.cur_conv] = weights
                    self.conv_bias[self.cur_conv] = bias
                elif self.conv_weights[
                        self.cur_conv] is None or self.conv_bias[
                            self.cur_conv] is None:
                    weights, bias = self.conv_tb.configure_fixed_image(
                        self.conv_inputs[self.batch_step], layer.filter_size,
                        layer.in_chn, layer.out_chn)
                    self.conv_weights[self.cur_conv] = weights
                    self.conv_bias[self.cur_conv] = bias
                else:
                    self.conv_tb.configure_fixed(
                        self.conv_inputs[self.batch_step],
                        self.conv_weights[self.cur_conv],
                        self.conv_bias[self.cur_conv])

            elif isinstance(layer, FC):
                if self.fc_input is None:
                    _, weights, bias = self.fc_tb.configure(
                        self.batch_size, layer.input_size, layer.output_size)
                    self.fc_weights[self.cur_fc] = weights
                    self.fc_bias[self.cur_fc] = bias
                elif self.fc_weights[self.cur_fc] is None or self.fc_bias[
                        self.cur_fc] is None:
                    weights, bias = self.fc_tb.configure_fixed_image(
                        self.fc_input, layer.output_size)
                    self.fc_weights[self.cur_fc] = weights
                    self.fc_bias[self.cur_fc] = bias
                else:
                    self.fc_tb.configure_fixed(self.fc_input,
                                               self.fc_weights[self.cur_fc],
                                               self.fc_bias[self.cur_fc])
            else:
                raise Exception('layer not valid')
Пример #4
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

    def tick(self):
        # self.iteration is which weight we are currently using
        # It's weight stationary so we fully use a set of filter weights
        # before continuing on.
        # (first weight in each filter, second weight in each filter, etc...)
        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        # This is the first tick since initializing
        # INITIALIZATION CODE
        # Write all ifmaps and psums? to sram
        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                self.raw_stats['wr'] += 1
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.fmap_sets * self.fmap_idx + self.curr_set
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print "ifmap req zero", self.iteration, self.fmap_idx
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y * self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets * fmap_idx + self.curr_set
                    # print "ifmap req glb", self.iteration, self.fmap_idx
                    self.sram.request(RD, addr)
                    self.last_read.push(False)
                self.curr_set += 1

                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                # self.raw_stats['rd'] += 1
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
                self.raw_stats['rd'] += 1
Пример #5
0
class PSumGLB(Module):
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def configure(self, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def tick(self):
        num_iteration = self.filter_size[0] * self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.dram_wr_chn.valid():
                data = self.dram_wr_chn.pop()
                self.raw_stats['wr'] += 1
                # print "psum_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.fmap_sets * self.fmap_wr_idx + self.wr_set
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=0)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            # print self.rd_chn.vacancy(1), self.rd_chn.rd_ptr.rd(), self.rd_chn.wr_ptr.rd()
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.fmap_sets * self.fmap_rd_idx + self.rd_set
                # print "psum req glb", self.iteration, self.fmap_rd_idx, self.rd_set
                self.sram.request(RD, addr, port=0)
                self.last_read.push(False)
                self.rd_set += 1

                if self.rd_set == self.fmap_sets:
                    self.rd_set = 0
                    self.fmap_rd_idx += 1
                if self.fmap_rd_idx == self.fmap_per_iteration:
                    self.fmap_rd_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                self.rd_chn.push(data)
                self.raw_stats['rd'] += 1
                # print "psum rd glb", data

            # If we can pull an elemnt off of the write channel, do it
            # and write it into the location specificed by the current
            # fmap_Sets, fmap_wr_idx, and wr_set!
            if self.noc_wr_chn.valid():
                # print "psum_to_glb: ", self.fmap_wr_idx, self.wr_set
                data = self.noc_wr_chn.pop()
                self.raw_stats['wr'] += 1
                addr = self.fmap_sets * self.fmap_wr_idx + self.wr_set
                # print "psum wr glb", self.fmap_wr_idx, self.wr_set, data
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
Пример #6
0
class WeightsGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'weight_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.filter_size = (0, 0)
        self.in_sets = 0
        self.out_sets = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile = 0
        self.wr_done = False

    def configure(self, filter_size, in_sets, out_sets):
        self.wr_done = False

        self.filter_size = filter_size
        self.in_sets = in_sets
        self.out_sets = out_sets
        self.tile = 0
        self.stuff = []

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.in_sets*(self.out_sets*self.iteration+self.fmap_idx) + self.curr_set
                self.stuff.append(data)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                self.raw_stats['wr'] += len(data)
                if self.curr_set == self.in_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.out_sets:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.iteration += 1
                    if self.iteration == num_iteration:
                        self.iteration = 0
                        self.wr_done = True
        else:
            did_read = False
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.in_sets*(self.out_sets*self.iteration+self.fmap_idx) + self.curr_set
                # print "ifmap req glb", self.iteration, self.fmap_idx
                self.sram.request(RD, addr)
                self.raw_stats['rd'] += self.chn_per_word
                self.last_read.push(False)
                did_read = True
                self.curr_set += 1

                if self.curr_set == self.in_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.out_sets:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
            elif not did_read:
                if self.iteration == num_iteration:
                    self.iteration = 0
                    self.wr_done = False
Пример #7
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word, name=self.name)
        self.last_read = Channel(3, name='last_read')

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.full_fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.tile_in = 0
        self.tile_out = 0
        self.wr_done = False
        self.task_done = True

    def configure(self, image_size, filter_size, fmap_sets, full_fmap_sets, tiles_out, fmap_per_iteration):
        self.wr_done = False
        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.full_fmap_sets = full_fmap_sets
        self.fmap_per_iteration = fmap_per_iteration
        self.tiles_out = tiles_out
        self.tile_in = 0
        self.tile_out = 0
        self.task_done = False

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]
        offset_x = (self.filter_size[0] - 1)//2
        offset_y = (self.filter_size[1] - 1)//2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y
        tiles_in = self.full_fmap_sets // self.fmap_sets

        if self.task_done:
            return

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                # print "ifmap_glb wr"
                # Write ifmap to glb
                # print "ifmap_to_glb: ", in_sets, self.fmap_idx, self.curr_set
                addr = self.full_fmap_sets*self.fmap_idx + self.curr_set
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                self.raw_stats['wr'] += len(data)
                if self.curr_set == self.full_fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            did_read = False
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration and self.tile_in < tiles_in:
                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx  // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print "ifmap req zero", self.iteration, self.fmap_idx
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y*self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets*(fmap_idx*tiles_in+self.tile_in) + self.curr_set
                    # print "ifmap req glb", self.iteration, self.fmap_idx
                    self.sram.request(RD, addr)
                    self.raw_stats['rd'] += self.chn_per_word
                    self.last_read.push(False)
                did_read = True
                self.curr_set += 1

                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                # print "ifmap rd glb", data
                self.rd_chn.push(data)
            elif not did_read:
                if self.iteration == num_iteration:
                    self.iteration = 0
                    self.tile_in += 1
                    if self.tile_in == tiles_in:
                        self.tile_in = 0
                        self.tile_out += 1
                        if self.tile_out == self.tiles_out:
                            self.tile_out = 0
                            self.task_done = True
Пример #8
0
class PSumGLB(Module):
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth, chn_per_word):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'psum_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'psum_glb_rd': 0, 'psum_glb_wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word, nports=2)
        self.last_read = Channel(3)

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def configure(self, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.dram_wr_chn.valid():
                data = self.dram_wr_chn.pop()
                self.raw_stats['psum_glb_wr'] += len(data)
                # print "psum_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets*self.fmap_wr_idx + self.wr_set
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_wr_idx = 0
                    self.wr_done = True
                #print ("psum orig write, fmap_sets, fmap_wr_idx, wr_set, addr, data: ",self.fmap_sets, self.fmap_wr_idx, self.wr_set, addr, data)
        else:
            # Read from GLB and deal with SRAM latency
            # print self.rd_chn.vacancy(1), self.rd_chn.rd_ptr.rd(), self.rd_chn.wr_ptr.rd()
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                addr = self.fmap_sets*self.fmap_rd_idx + self.rd_set
                #print("psum req glb", self.iteration, self.fmap_rd_idx, self.rd_set)
                self.sram.request(RD, addr, port=0)
                self.last_read.push(False)
                self.rd_set += 1

                if self.rd_set == self.fmap_sets:
                    self.rd_set = 0
                    self.fmap_rd_idx += 1
                if self.fmap_rd_idx == self.fmap_per_iteration:
                    self.fmap_rd_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                self.rd_chn.push(data)
                self.raw_stats['psum_glb_rd'] += len(data)
                #print("psum rd glb: data", data)

            if self.noc_wr_chn.valid():
                data = self.noc_wr_chn.pop()
                #print("psum_to_glb: ", self.fmap_wr_idx, self.wr_set, data)

                self.raw_stats['psum_glb_wr'] += len(data)
                addr = self.fmap_sets*self.fmap_wr_idx + self.wr_set
                #print("noc psum wr glb", self.fmap_wr_idx, self.wr_set, data)
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=1)
                if self.wr_set == self.fmap_sets:
                    self.wr_set = 0
                    self.fmap_wr_idx += 1
                if self.fmap_wr_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    #self.sram.dump()
                    self.fmap_wr_idx = 0
Пример #9
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size' : (glb_depth, chn_per_word), 'ifmap_glb_rd': 0, 'ifmap_glb_wr': 0}


        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets, fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.read_ctr = 0

    def tick(self):
        num_iteration = self.filter_size[0]*self.filter_size[1]
        offset_x = (self.filter_size[0] - 1)//2
        offset_y = (self.filter_size[1] - 1)//2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['ifmap_glb_wr'] += len(data)
                # print "ifmap_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets*self.fmap_idx + self.curr_set
                # print("ifmap_to_glb: fmap idx, curr set, addr ",  self.fmap_idx, self.curr_set, addr)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:

                self.read_ctr += 1
                #print("ifmap glb read ctr ", self.read_ctr)

                fmap_x = self.fmap_idx % self.image_size[0]
                fmap_y = self.fmap_idx  // self.image_size[0]
                ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
                if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
                        (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
                    # print("ifmap req zero: iter, fmap idx ", self.iteration, self.fmap_idx)
                    self.last_read.push(True)
                else:
                    fmap_idx = (ifmap_y*self.image_size[0]) + ifmap_x
                    addr = self.fmap_sets*fmap_idx + self.curr_set
                    # print("addr fmap idx, addr: ", fmap_idx, addr)
                    #print("ifmap req glb: iter, fmap idx, addr ", self.iteration, self.fmap_idx, addr)
                    self.sram.request(RD, addr)
                    self.last_read.push(False)
                self.curr_set += 1
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # print("fmap idx, fmap per iter: ", self.fmap_idx, self.fmap_per_iteration)
                    self.fmap_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.chn_per_word if is_zero else \
                        [e for e in self.sram.response()]
                #print("ifmap rd glb", data, self.iteration)
                self.rd_chn.push(data)
                self.raw_stats['ifmap_glb_rd'] += len(data)
Пример #10
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.chn_per_word = chn_per_word
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, chn_per_word)
        self.last_read = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

    def configure(self, image_size, filter_size, fmap_sets,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.fmap_sets = fmap_sets
        self.fmap_per_iteration = fmap_per_iteration

        self.fmap_idx_ctr = 0

    def tick(self):
        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0]  #- offset_x
        filter_y = self.iteration // self.filter_size[0]  #- offset_y

        if not self.wr_done:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['wr'] += len(data)
                # print "ifmap_glb wr"
                # Write ifmap to glb
                addr = self.fmap_sets * self.fmap_idx + self.curr_set
                #print("ifmap_to_glb: fmap idx, curr set, addr ",  self.fmap_idx, self.curr_set, addr)
                self.curr_set += 1
                self.sram.request(WR, addr, data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == 16:  # self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        else:
            # Read from GLB and deal with SRAM latency
            #if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
            #    fmap_x = self.fmap_idx % self.image_size[0]
            #    fmap_y = self.fmap_idx  // self.image_size[0]
            #    ifmap_x, ifmap_y = (fmap_x + filter_x, fmap_y + filter_y)
            #    if (ifmap_x < 0) or (ifmap_x >= self.image_size[0]) or \
            #            (ifmap_y < 0) or (ifmap_y >= self.image_size[1]):
            #        # print("ifmap req zero: iter, fmap idx ", self.iteration, self.fmap_idx)
            #        self.last_read.push(True)
            #    else:
            #        fmap_idx = (ifmap_y*self.image_size[0]) + ifmap_x
            #        # addr = self.fmap_sets*fmap_idx + self.curr_set
            #        #print("addr fmap idx, addr: ", fmap_idx, addr)
            #        print("ifmap req glb: iter, fmap idx, addr ", self.iteration, self.fmap_idx, addr)
            #        self.sram.request(RD, addr)
            #        self.last_read.push(False)
            #    self.curr_set += 1
            #    if self.curr_set == self.fmap_sets:
            #        self.curr_set = 0
            #        self.fmap_idx += 1
            #    if self.fmap_idx == self.fmap_per_iteration:
            #        # print("fmap idx, fmap per iter: ", self.fmap_idx, self.fmap_per_iteration)
            #        self.fmap_idx = 0
            #        self.iteration += 1

            # TODO: fix this

            fmap_indices = [
                0, 1, 4, 5, 1, 2, 5, 6, 2, 3, 6, 7, 4, 5, 8, 9, 5, 6, 9, 10, 6,
                7, 10, 11, 8, 9, 12, 13, 9, 10, 13, 14, 10, 11, 14, 15
            ]

            if self.rd_chn.vacancy(
                    1) and self.iteration < num_iteration:  # 9 iterations

                fmap_idx = fmap_indices[self.fmap_idx_ctr]
                addr = fmap_idx

                #print("addr fmap idx, addr: ", fmap_idx, addr)
                print("ifmap req glb: fmap_idx_ctr, addr ", self.fmap_idx_ctr,
                      addr)
                self.sram.request(RD, addr)
                self.last_read.push(False)

                self.fmap_idx_ctr += 1

                if (self.fmap_idx_ctr % 4) == 0:
                    self.iteration += 1

            if self.last_read.valid():
                if self.last_read.pop():
                    pass  # do nothing
                else:  # push data to ifmap NOC
                    data = [e for e in self.sram.response()]
                    print("ifmap rd glb", data)
                    self.rd_chn.push(data)
                    self.raw_stats['rd'] += len(data)
Пример #11
0
class IFMapWeightsGLB(Module):
    def instantiate(self, ifmap_wr_chn, ifmap_rd_chn, weights_wr_chn, weights_rd_chn,\
            arr_y, ifmap_glb_depth, weights_glb_depth, \
            block_size, num_nonzero):
        self.ifmap_wr_chn = ifmap_wr_chn
        self.ifmap_rd_chn = ifmap_rd_chn
        self.weights_wr_chn = weights_wr_chn
        self.weights_rd_chn = weights_rd_chn
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        self.name = 'ifmap_weights_glb'

        self.in_chn = 0
        self.out_chn = 0

        self.stat_type = 'show'
        self.raw_stats = {
            'size': (ifmap_glb_depth, num_nonzero * 3),
            'rd': 0,
            'wr': 0
        }

        self.isram = SRAM(ifmap_glb_depth, num_nonzero * 3, dtype=np.float16)
        self.ilast_read = Channel(3)
        self.ifmap_glb_depth = ifmap_glb_depth

        self.wsram = SRAM(weights_glb_depth, block_size, dtype=np.float16)
        self.wlast_read = Channel(1)
        # Channel depth of one here prevents SRAM reads from colliding
        # was having issues with a later read 'replacing' an earlier one
        # and thus getting the wrong data
        # having only one extant write on an SRAM at a time prevents this
        self.weights_glb_depth = weights_glb_depth

        # Channel to hold indices of weights that need to be sent
        # to NoC
        self.weights_to_send = Channel(3)

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.iwr_done = False
        self.wwr_done = False

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        #self.curr_filt_x = 0
        #self.curr_filt_y = 0
        self.ifmap_done = False

        # for weights
        self.addr = 0
        self.base_addr = 0  # to store values from self.weights_to_send
        self.base_addr_wo_chn = -1  # to keep track of current position within 3x3 filter

        # invalid weights and inputs to use at the end to flush out last outputs
        self.weights_to_flush = 0
        self.inputs_to_flush = 0

        self.needed_addr = 0
        self.ready_to_output = False  # ready to output a filter_size block of inputs
        self.curr_data = [0 for i in range(3 * num_nonzero)]
        self.curr_weights = [0 for i in range(block_size)]
        self.data_idx = num_nonzero  # block other operations while actively working through data
        # send one data point at a time (of num_nonzero)

    def configure(self, image_size, filter_size, in_chn, out_chn,
                  fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.in_chn = in_chn
        self.out_chn = out_chn
        self.fmap_per_iteration = fmap_per_iteration

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        self.curr_filt_x = 0
        self.curr_filt_y = 0
        self.curr_filt_set = 0
        self.ifmap_done = False

        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        # The first address needed to be filled in order to start sending
        #self.needed_addr = (self.image_size[0]*(1+offset_y) + 1+offset_x) *\
        #    (self.in_chn // self.block_size) - 1
        self.needed_addr = (self.image_size[0]*(offset_y) + 1+offset_x) *\
            (self.in_chn // self.block_size) - 1
        # Goes high to transfer sram control to output
        # Doing them synchronously would be better, but complicates things
        self.ready_to_output = False

    def tick(self):

        # WEIGHTS-------------------------------------------------------------------
        num_iterations = self.image_size[0] * self.image_size[
            1] * self.in_chn // self.block_size
        max_addr = self.filter_size[0] * self.filter_size[
            1] * self.in_chn * self.out_chn // self.block_size

        verbose = False

        if not self.wwr_done:
            if self.weights_wr_chn.valid():
                data = self.weights_wr_chn.pop()
                self.raw_stats['wr'] += len(data)
                #print(self.addr)
                #print("weights (iw glb) at addr {}".format(self.addr))
                #print(data)
                #print(self.addr)
                self.wsram.request(WR, self.addr, np.asarray(data))
                self.addr += 1
                #print("storing weights (wi glb)")
                #print(data)
                if (self.addr == max_addr):
                    self.addr = self.out_chn // self.block_size
                    self.wwr_done = True
                    #print("Done storing weights (wi glb)")
                    #print("--------------------------")
                    #print(self.wsram.data)
                    #print("--------------------------")

        # within this block of code self.addr is re-used
        # here it is more analogous to curr_set
        # and refers to the current block of filters (last index of the four)
        # that is being read
        else:
            # Catch addresses that correspond to nonzero inputs
            # search "self.weights_to_send.push(waddr)" below
            if (self.weights_to_send.valid()
                    and self.addr == self.out_chn // self.block_size):
                self.base_addr = self.weights_to_send.pop()
                self.addr = 0
            # cycle through channels using self.addr
            # make requests to memory; will pick these up in the next if statement below
            elif (self.wlast_read.vacancy()
                  and not self.addr == self.out_chn // self.block_size):
                full_addr = self.base_addr + self.addr
                self.wsram.request(RD, full_addr)
                self.wlast_read.push(False)
                #print("Request weights (wi glb):")
                #print(full_addr)
                self.addr += 1
        # catch requests from memory; send results to WeightsNoC
        if self.wlast_read.valid() and self.weights_rd_chn.vacancy(1):
            is_zero = self.wlast_read.pop()
            data = [e for e in self.wsram.response()]
            self.weights_rd_chn.push(data)
            #print("weights sent (from iw glb)")
            #print(data)
            self.raw_stats['rd'] += len(data)

        # these two if statements take care of an issue that occurs at the end
        # PEs don't automatically detect the end of the computation without inputs
        # from another location
        # So we send in some dummy inputs to flush out the last outputs
        if self.weights_rd_chn.vacancy(1) and not self.wlast_read.valid() and \
            not self.weights_to_send.valid() and self.addr == self.out_chn // self.block_size\
            and self.weights_to_flush > 0:
            self.weights_to_flush -= 1
            self.weights_rd_chn.push([0 for i in range(self.block_size)])
        if self.ifmap_done and self.inputs_to_flush > 0 and self.ifmap_rd_chn.vacancy(
                1):
            self.inputs_to_flush -= 1
            self.ifmap_rd_chn.push([-1, 0, 0])

        # IFMAP-------------------------------------------------------------------
        if not (self.ifmap_done and not self.ilast_read.valid()
                and not self.ready_to_output):
            verbose = False

            # shorthand values that will be useful later
            num_iteration = self.filter_size[0] * self.filter_size[1]
            offset_x = (self.filter_size[0] - 1) // 2
            offset_y = (self.filter_size[1] - 1) // 2
            filter_x = self.iteration % self.filter_size[0] - offset_x
            filter_y = self.iteration // self.filter_size[0] - offset_y
            in_sets = self.in_chn // self.block_size
            out_sets = self.out_chn // self.block_size
            if not self.iwr_done and not self.ready_to_output:
                # Write to GLB
                if self.ifmap_wr_chn.valid():
                    data = self.ifmap_wr_chn.pop()
                    data = np.reshape(np.asarray(data), (-1))

                    full_addr = in_sets * self.fmap_idx + self.curr_set
                    #print("ifmap (wi glb) received data:")
                    #print(data)
                    #print("{} >?= {}".format(full_addr, self.needed_addr))
                    self.curr_set += 1
                    addr = full_addr % self.ifmap_glb_depth

                    # if we have enough inputs in memory to start sending
                    if (full_addr == self.needed_addr):
                        self.ready_to_output = True
                        self.needed_addr += in_sets

                    self.isram.request(WR, addr, data)
                    self.raw_stats['wr'] += len(data)
                    #print("ifmap, iw glb")
                    #print("{} written to {}".format(data, addr))

                    if self.curr_set == self.fmap_sets:
                        self.curr_set = 0
                        self.fmap_idx += 1
                    if self.fmap_idx == self.fmap_per_iteration:
                        # Done initializing ifmaps and psums
                        print("iw glb: Finished filling ifmap buffer")
                        self.fmap_idx = 0
                        self.iwr_done = True
            elif self.ready_to_output:
                increment_vals = False
                # send data to NoC
                if (self.ilast_read.valid() and self.ifmap_rd_chn.vacancy(1)):
                    is_zero = self.ilast_read.pop()
                    #print(is_zero)
                    if (not is_zero):
                        self.curr_data = [e for e in self.isram.response()]
                        self.data_idx = 0
                    else:
                        increment_vals = True
                elif (not self.data_idx == self.num_nonzero
                      and self.weights_to_send.vacancy()
                      and self.base_addr_wo_chn >= 0):
                    data = [self.curr_data[i] for i in \
                        range(self.data_idx*3, self.data_idx*3 + 3)]
                    data_mod = [self.curr_x*self.image_size[1]+self.curr_y,\
                        data[1], data[2]]
                    self.ifmap_rd_chn.push(data_mod)
                    #print("iw glb inputs sent: {},{},{}".format(self.curr_x, self.curr_y, self.curr_chn))
                    #print(data_mod)
                    self.raw_stats['rd'] += 1

                    # Assertion checks that we will not attempt to read data that
                    # has not yet been stored in memory
                    waddr = self.base_addr_wo_chn + int(data[0]) * out_sets
                    assert (self.wwr_done or waddr < self.addr)
                    #self.wsram.request(RD, waddr)
                    #self.wlast_read.push(False)
                    self.weights_to_send.push(waddr)
                    #print("Send request (wi glb):")
                    #print(waddr)

                    self.data_idx += 1
                    #if (self.data_idx == self.num_nonzero):
                    if (data[2] == 1):
                        self.data_idx = self.num_nonzero
                        increment_vals = True
                    if (self.data_idx == self.num_nonzero):
                        self.base_addr_wo_chn = -1
                    #print(self.data_idx)
                if (increment_vals):
                    #print(self.send_idx)
                    self.curr_chn += 1
                    if (self.curr_chn == in_sets):
                        self.curr_chn = 0
                        self.send_idx += 1
                    if (self.send_idx == self.filter_size[0] *
                            self.filter_size[1]):
                        #print("Ready to shift input glb frame ({},{})".format(self.curr_x, self.curr_y))
                        self.send_idx = 0
                        self.curr_y += 1
                        if (self.curr_y == self.image_size[1]):
                            self.curr_y = 0
                            self.curr_x += 1
                        if (self.curr_x == self.image_size[0]):
                            self.curr_x = 0
                            self.ifmap_done = True
                            #print("Done sending inputs from iw glb")
                            self.ready_to_output = False
                            self.inputs_to_flush = 1
                            self.weights_to_flush = self.arr_y // self.block_size
                        elif (not self.iwr_done):
                            self.ready_to_output = False
                        #print(self.ifmap_wr_chn.valid())

                # stage one of these at a time
                # request data from SRAM
                if (not self.ifmap_done and self.ilast_read.vacancy(1) and \
                    self.data_idx == self.num_nonzero and self.weights_to_send.vacancy()\
                    and self.base_addr_wo_chn == -1):
                    # and not (self.curr_x == self.image_size[0]):
                    x_adj = (self.curr_x + self.curr_filt_x - offset_x)
                    y_adj = self.curr_y + self.curr_filt_y - offset_y
                    idx = x_adj * self.image_size[1] + y_adj
                    #print("{},{},{} input requested".format(x_adj, y_adj, self.curr_filt_set))
                    #print(idx)
                    if (x_adj < 0 or x_adj >= self.image_size[0] or y_adj < 0
                            or y_adj >= self.image_size[1]):
                        self.ilast_read.push(True)
                    else:
                        addr = (idx * in_sets +
                                self.curr_filt_set) % self.ifmap_glb_depth
                        self.isram.request(RD, addr)
                        self.ilast_read.push(False)
                        # set up for corresponding weights to be sent later
                        self.base_addr_wo_chn = self.curr_filt_x*self.filter_size[1]\
                            *self.in_chn*out_sets + \
                            self.curr_filt_y*self.in_chn*out_sets + \
                            self.curr_filt_set*self.block_size*out_sets
                        #print("Next base addr = {}".format(self.base_addr_wo_chn))

                    self.curr_filt_set += 1
                    if (self.curr_filt_set == in_sets):
                        self.curr_filt_set = 0
                        self.curr_filt_y += 1
                    if (self.curr_filt_y == self.filter_size[1]):
                        self.curr_filt_y = 0
                        self.curr_filt_x += 1
                    if (self.curr_filt_x == self.filter_size[0]):
                        self.curr_filt_x = 0
Пример #12
0
class WeightsGLB(Module):
    def instantiate(self, wr_chn, rd_chn, glb_depth, block_size):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.name = 'weight_glb'

        self.filter_size = (0, 0)
        self.image_size = (0, 0)
        self.wr_done = False
        self.iteration = 0
        self.addr = 0
        self.in_chn = 0
        self.out_chn = 0
        #self.arr_y = 0
        #self.out_sets = 0
        self.block_size = block_size

        self.sram = SRAM(glb_depth, block_size)
        self.last_read = Channel(3)

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}

    def configure(self, filter_size, image_size, in_chn, out_chn):
        self.filter_size = filter_size
        self.image_size = image_size
        self.iteration = 0
        self.addr = 0
        self.in_chn = in_chn
        self.out_chn = out_chn
        #self.arr_y = arr_y
        #self.out_sets = out_sets

        self.wr_done = False

    def tick(self):
        # num_iterations = times to read out all weights
        # max_addr = number of slots to hold all blocks of weights
        num_iterations = self.image_size[0] * self.image_size[
            1] * self.in_chn // self.block_size
        max_addr = self.filter_size[0] * self.filter_size[
            1] * self.in_chn * self.out_chn // self.block_size

        verbose = False

        if not self.wr_done:
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                self.raw_stats['wr'] += len(data)
                #print(self.addr)
                #print(data)
                #print(type(data))
                self.sram.request(WR, self.addr, np.asarray(data))
                self.addr += 1
                if (self.addr == max_addr):
                    self.addr = 0
                    self.wr_done = True
                if (verbose):
                    print("weight_glb")
                    print(data)

        elif self.rd_chn.vacancy(1):
            if (self.iteration < num_iterations):
                self.sram.request(RD, self.addr)
                self.last_read.push(False)
                self.addr += 1
                if (self.addr == max_addr):
                    self.addr = 0
                    self.iteration += 1
            #self.rd_chn.push(data)
            #self.raw_stats['rd'] += len(data)
        if self.last_read.valid():
            is_zero = self.last_read.pop()
            data = [e for e in self.sram.response()]
            self.rd_chn.push(data)
            #print(self.iteration)
            #print(data)
            self.raw_stats['rd'] += len(data)
Пример #13
0
class PSumGLB(Module):
    def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth,
                    block_size, num_nonzero):
        self.dram_wr_chn = dram_wr_chn
        self.noc_wr_chn = noc_wr_chn
        self.rd_chn = rd_chn
        self.name = 'psum_glb'
        self.block_size = block_size
        self.num_nonzero = num_nonzero

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, block_size, nports=2, dtype=np.float16)
        self.last_read = Channel(3)

        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def configure(self, filter_size, out_chn, fmap_per_iteration):
        self.wr_done = False

        self.filter_size = filter_size
        self.out_chn = out_chn
        self.fmap_per_iteration = fmap_per_iteration

        self.rd_set = 0
        self.fmap_rd_idx = 0
        self.iteration = 0

        self.wr_set = 0
        self.fmap_wr_idx = 0
        self.wr_done = False

    def tick(self):
        num_iteration = 1  #self.filter_size[0]*self.filter_size[1]

        if not self.wr_done:
            # Write to GLB
            if self.dram_wr_chn.valid():
                data = self.dram_wr_chn.pop()
                # Write ifmap to glb
                #print("psum_glb")
                #print(data)
                #addr = self.fmap_sets*self.fmap_wr_idx + self.wr_set
                addr = self.wr_set
                self.wr_set += 1
                self.sram.request(WR, addr, data, port=0)
                self.raw_stats['wr'] += len(data)
                if self.wr_set == self.out_chn // self.block_size:
                    self.wr_set = 0
                    self.wr_done = True
                    #self.fmap_wr_idx += 1
                #if self.fmap_wr_idx == self.fmap_per_iteration:
                #    # Done initializing ifmaps and psums
                #    # self.sram.dump()
                #    #print("done!")
                #    self.fmap_wr_idx = 0
                #    self.wr_done = True

        else:
            # Read from GLB and deal with SRAM latency
            if self.rd_chn.vacancy(1) and self.iteration < num_iteration:
                #addr = self.fmap_sets*self.fmap_rd_idx + self.rd_set
                addr = self.rd_set
                self.sram.request(RD, addr, port=0)
                self.last_read.push(False)
                self.rd_set += 1
                if self.rd_set == self.out_chn // self.block_size:
                    self.rd_set = 0
                    self.fmap_rd_idx += 1
                if self.fmap_rd_idx == self.fmap_per_iteration:
                    self.fmap_rd_idx = 0
                    self.iteration += 1

            # Process the last read sent to the GLB SRAM
            if self.last_read.valid():
                is_zero = self.last_read.pop()
                data = [0]*self.block_size if is_zero else \
                        [e for e in self.sram.response()]
                self.rd_chn.push(data)
                self.raw_stats['rd'] += len(data)
Пример #14
0
class IFMapGLB(Module):
    def instantiate(self, wr_chn, rd_chn, arr_y, glb_depth, block_size,
                    num_nonzero):
        self.wr_chn = wr_chn
        self.rd_chn = rd_chn
        self.arr_y = arr_y
        self.block_size = block_size
        self.num_nonzero = num_nonzero
        self.name = 'ifmap_glb'

        self.stat_type = 'show'
        self.raw_stats = {'size': (glb_depth, num_nonzero), 'rd': 0, 'wr': 0}

        self.sram = SRAM(glb_depth, num_nonzero * 3)
        self.last_read = Channel(3)
        self.glb_depth = glb_depth

        self.image_size = (0, 0)
        self.filter_size = (0, 0)
        self.fmap_sets = 0
        self.fmap_per_iteration = 0

        self.curr_set = 0
        self.fmap_idx = 0
        self.iteration = 0
        self.wr_done = False

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        #self.curr_filt_x = 0
        #self.curr_filt_y = 0
        self.ifmap_done = False

        self.needed_addr = 0
        self.ready_to_output = False  # ready to output a filter_size block of inputs
        self.curr_data = [0 for i in range(3 * num_nonzero)]
        self.data_idx = num_nonzero  # block other operations while actively working through data
        # send one data point at a time (of num_nonzero)

    def configure(self, image_size, filter_size, in_chn, fmap_per_iteration):
        self.wr_done = False

        self.image_size = image_size
        self.filter_size = filter_size
        self.in_chn = in_chn
        self.fmap_per_iteration = fmap_per_iteration

        # For managing convolution
        self.curr_x = 0
        self.curr_y = 0
        self.curr_chn = 0
        self.request_idx = 0
        self.send_idx = 0
        self.curr_filt_x = 0
        self.curr_filt_y = 0
        self.curr_filt_set = 0
        self.ifmap_done = False

        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        # The first address needed to be filled in order to start sending
        self.needed_addr = (self.image_size[0]*(1+offset_y) + 1+offset_x) *\
            (self.in_chn // self.block_size) - 1
        # Goes high to transfer sram control to output
        # Doing them synchronously would be better, but complicates things
        self.ready_to_output = False

    def tick(self):
        if (self.ifmap_done and not self.last_read.valid()
                and not self.ready_to_output):
            return

        verbose = False

        num_iteration = self.filter_size[0] * self.filter_size[1]
        offset_x = (self.filter_size[0] - 1) // 2
        offset_y = (self.filter_size[1] - 1) // 2
        filter_x = self.iteration % self.filter_size[0] - offset_x
        filter_y = self.iteration // self.filter_size[0] - offset_y
        in_sets = self.in_chn // self.block_size
        #print(filter_x)

        if not self.wr_done and not self.ready_to_output:
            # Write to GLB
            if self.wr_chn.valid():
                data = self.wr_chn.pop()
                data = np.reshape(np.asarray(data), (-1))
                # Write ifmap to glb
                #print("ifmap_glb")
                #print(data)

                full_addr = in_sets * self.fmap_idx + self.curr_set
                self.curr_set += 1
                addr = full_addr % self.glb_depth

                # if we have enough inputs in memory to start sending
                if (full_addr == self.needed_addr):
                    self.ready_to_output = True
                    self.needed_addr += in_sets

                self.sram.request(WR, addr, data)
                self.raw_stats['wr'] += len(data)
                if self.curr_set == self.fmap_sets:
                    self.curr_set = 0
                    self.fmap_idx += 1
                if self.fmap_idx == self.fmap_per_iteration:
                    # Done initializing ifmaps and psums
                    # self.sram.dump()
                    self.fmap_idx = 0
                    self.wr_done = True
        elif self.ready_to_output:
            # send data to NoC
            if (self.last_read.valid() and self.rd_chn.vacancy(1)
                    and self.data_idx == 0):
                xmin = self.curr_filt_x
                xmax = xmin + self.arr_y
                #print("{}-{},{},{}".format(xmin, xmax, self.holder_y, self.curr_chn))
                #data = [self.holder[x][self.holder_y][self.curr_chn] for x in range(xmin, xmax)]
                is_zero = self.last_read.pop()
                if (not is_zero):
                    self.curr_data = [e for e in self.sram.response()]
                    self.data_idx = 0

            if (not self.data_idx == self.num_nonzero):
                data = [self.curr_data[i] for i in \
                    range(self.data_idx*3, self.data_idx*3 + 3)]
                self.rd_chn.push(data)
                self.raw_stats['rd'] += len(data)
                self.data_idx += 1
                if (self.data_idx == num_nonzero):
                    self.data_idx = 0
                    self.curr_chn += 1
                    if (self.curr_chn == self.arr_y):
                        self.curr_chn = 0
                        self.send_idx += 1
                    if (self.send_idx == self.filter_size[0] *
                            self.filter_size[1]):
                        if (verbose):
                            print("Ready to shift input glb frame")
                        self.send_idx = 0
                        self.curr_y += 1
                        if (self.curr_y == self.image_size[1]):
                            self.curr_y = 0
                            self.curr_x += 1
                        if (self.curr_x == self.image_size[0]):
                            self.curr_x = 0
                            self.ifmap_done = True
                            self.ready_to_output = False
                        elif (not self.wr_done):
                            self.ready_to_output = False

                #print("{},{},{}".format(self.holder_x, self.holder_y, self.curr_chn))
                #print(data)

            # stage one of these at a time
            # request data from SRAM
            if (not self.ifmap_done and self.last_read.vacancy(1)
                    and self.data_idx == num_nonzero):
                # and not (self.curr_x == self.image_size[0]):
                idx = (self.curr_x + self.curr_filt_x -
                       offset_x) * self.image_size[
                           1] + self.curr_y + self.curr_filt_y - offset_y
                #print(idx)
                if (idx >= self.image_size[0] * self.image_size[1] or idx < 0):
                    self.last_read.push(True)
                else:
                    addr = idx * in_sets + self.curr_filt_set
                    self.sram.request(RD, addr)
                    self.last_read.push(False)

                self.curr_filt_set += 1
                if (self.curr_filt_set == in_sets):
                    self.curr_filt_set = 0
                    self.curr_filt_y += 1
                if (self.curr_filt_y == self.filter_size[1]):
                    self.curr_filt_y = 0
                    self.curr_filt_x += 1
                if (self.curr_filt_x == self.filter_size[0]):
                    self.curr_filt_x = 0