def instantiate(self): self.name = 'tb' self.input_size = 4 self.block_size = 12 self.in_sets = self.block_size // self.input_size self.num_nonzero = 5 self.preserve_order = True self.in_chn = Channel() self.mid_chn = Channel() self.out_chn = Channel() self.converter = Converter(self.in_chn, self.mid_chn, self.input_size, self.block_size) #self.pruner = NaivePruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order) self.pruner = ClusteredPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order) #self.pruner = ThresholdPruner(self.mid_chn,self.out_chn,self.num_nonzero, self.block_size, self.preserve_order) self.iterations = 10 self.iteration = 0 self.curr_set = 0 self.out_counter = 0 self.test_data = [[randint(1,5) if randint(0,3)>1 else 0\ for j in range(self.block_size)]\ for i in range(self.iterations+1)] # send in one extra iteration to flush out last outputs print("Stimulus:") print("[") for i in range(len(self.test_data)-1): print(self.test_data[i]) print("]")
def instantiate(self, arr_x, arr_y, chn_per_word, done_chn, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): self.name = 'conv_tb' self.image_size = None self.filter_size = None self.full_in_chn = None self.full_out_chn = None self.ceil_in_chn = None self.ceil_out_chn = None self.in_chn = arr_y self.out_chn = arr_x self.done_chn = done_chn self.chn_per_word = chn_per_word self.arr_x = self.out_chn self.arr_y = self.in_chn self.input_chn = Channel(name='arch_input_chn') self.output_chn = Channel(name='arch_output_chn') self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word, self.input_chn, self.output_chn, self.done_chn) self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn, self.output_chn, self.chn_per_word, ifmap_glb_depth, psum_glb_depth, weight_glb_depth)
def instantiate(self): self.name = 'tb' self.image_size = (4, 4) self.filter_size = (3, 3) self.in_chn = 8 self.out_chn = 16 self.chn_per_word = 4 self.arr_x = self.out_chn // 2 self.arr_y = self.in_chn // 2 self.input_chn = Channel() self.output_chn = Channel() self.psum_chn = Channel(128) self.curr_pass = 0 self.tick_counter = 0 ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \ (self.in_chn//2)//self.chn_per_word psum_glb_depth = self.image_size[0]*self.image_size[1]* \ (self.out_chn//2)//self.chn_per_word self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word, self.input_chn, self.output_chn, self.psum_chn) self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn, self.output_chn, self.chn_per_word, ifmap_glb_depth, psum_glb_depth) self.configuration_done = False
def instantiate(self): self.name = 'tb' self.image_size = (4, 4) self.filter_size = (3, 3) self.in_chn = 4 self.out_chn = 8 self.chn_per_word = 4 self.num_tiles = 4 self.arr_x = self.out_chn self.arr_y = self.in_chn self.input_chn = Channel() self.output_chn = Channel() self.finish_signal_chn = Channel() self.stat_type = 'show' self.raw_stats = {} ifmap_glb_depth = self.image_size[0] * self.image_size[ 1] * self.num_tiles * self.in_chn // self.chn_per_word # psum_glb_depth = self.image_size[0]*self.image_size[1]*self.out_chn//self.chn_per_word print("ifmap glb depth:", ifmap_glb_depth) print("weight glb depth: 0") self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word, self.input_chn, self.output_chn, self.finish_signal_chn) self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn, self.output_chn, self.chn_per_word, ifmap_glb_depth) self.configuration_done = False
def instantiate(self): self.name = 'tb' self.image_size = (4, 4) self.filter_size = (3, 3) self.in_chn = 4 self.out_chn = 8 self.chn_per_word = 4 self.arr_x = self.out_chn self.arr_y = self.in_chn self.input_chn = Channel() self.output_chn = Channel() ifmap_glb_depth = self.image_size[0]*self.image_size[1]* \ self.in_chn//self.chn_per_word print("ifmap glb depth:", ifmap_glb_depth) psum_glb_depth = self.image_size[0]*self.image_size[1]* \ self.out_chn//self.chn_per_word print("psum glb depth:", psum_glb_depth) self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word, self.input_chn, self.output_chn) self.dut = WSArch(self.arr_x, self.arr_y, self.input_chn, self.output_chn, self.chn_per_word, ifmap_glb_depth, psum_glb_depth) self.configuration_done = False
def instantiate(self, image_size, filter_size, in_chn, out_chn, block_size, ifmap, weights, bias, pruner_name, num_nonzero): self.name = 'tb' # if (debug): # self.image_size = (4, 4) # self.filter_size = (3, 3) # self.in_chn = 2 # self.out_chn = 4 # self.block_size = 2 # self.num_nonzero = 1 #number of non-zero values in each blok, help test the correctness of the arch # else: # self.image_size = (16, 16) # self.filter_size = (3, 3) # self.in_chn = 16 # self.out_chn = 8 # self.block_size = 4 # self.num_nonzero = 4 self.image_size = image_size self.filter_size = filter_size self.in_chn = in_chn self.out_chn = out_chn self.block_size = block_size self.num_nonzero = num_nonzero #number of non-zero values in each blok, help test the correctness of the arch #the inputs to this specific layer self.ifmap = ifmap self.weights = weights self.bias = bias self.pruner_name = pruner_name self.arr_y = self.out_chn self.input_chn = Channel() self.output_chn = Channel() ifmap_glb_depth = (self.filter_size[1] + (self.filter_size[0]-1)*\ self.image_size[1]) * self.in_chn // self.block_size psum_glb_depth = self.out_chn // self.block_size weight_glb_depth = self.filter_size[0]*self.filter_size[1]* \ self.in_chn*self.out_chn//self.block_size self.stimulus = Stimulus(self.arr_y, self.block_size, self.num_nonzero, self.input_chn, self.output_chn, self.pruner_name) self.dut = OSArch(self.arr_y, self.input_chn, self.output_chn, self.block_size, self.num_nonzero, ifmap_glb_depth, psum_glb_depth, weight_glb_depth) self.configuration_done = False
def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth, chn_per_word): self.dram_wr_chn = dram_wr_chn self.noc_wr_chn = noc_wr_chn self.rd_chn = rd_chn self.chn_per_word = chn_per_word self.name = 'psum_glb' self.stat_type = 'show' self.raw_stats = {'size': (glb_depth, chn_per_word), 'rd': 0, 'wr': 0} self.sram = SRAM(glb_depth, chn_per_word, nports=2, name=self.name) self.last_read = Channel(3, name='last_read') self.filter_size = (0, 0) self.fmap_sets = 0 self.fmap_per_iteration = 0 self.rd_set = 0 self.fmap_rd_idx = 0 self.iteration = 0 self.wr_set = 0 self.fmap_wr_idx = 0 self.wr_done = False
def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word): self.wr_chn = wr_chn self.rd_chn = rd_chn self.chn_per_word = chn_per_word self.name = 'ifmap_glb' self.stat_type = 'show' self.raw_stats = {'size' : (glb_depth, chn_per_word), 'rd': 0, 'wr': 0} self.sram = SRAM(glb_depth, chn_per_word, name=self.name) self.last_read = Channel(3, name='last_read') self.image_size = (0, 0) self.filter_size = (0, 0) self.fmap_sets = 0 self.full_fmap_sets = 0 self.fmap_per_iteration = 0 self.curr_set = 0 self.fmap_idx = 0 self.iteration = 0 self.tile_in = 0 self.tile_out = 0 self.wr_done = False self.task_done = True
def instantiate(self, wr_chn, rd_chn, glb_depth, chn_per_word): self.wr_chn = wr_chn self.rd_chn = rd_chn self.chn_per_word = chn_per_word self.glb_depth = glb_depth self.name = 'ifmap_glb' self.stat_type = 'show' self.raw_stats = { 'size': (glb_depth, chn_per_word), 'ifmap_glb_rd': 0, 'ifmap_glb_wr': 0 } self.sram = SRAM(glb_depth, chn_per_word) self.last_read = Channel(3) self.image_size = (0, 0) self.filter_size = (0, 0) self.fmap_sets = 0 self.fmap_per_iteration = 0 self.curr_set = 0 self.fmap_idx = 0 self.iteration = 0 self.wr_done = False
def instantiate(self, dram_wr_chn, noc_wr_chn, rd_chn, glb_depth, block_size, num_nonzero): self.dram_wr_chn = dram_wr_chn self.noc_wr_chn = noc_wr_chn self.rd_chn = rd_chn self.name = 'psum_glb' self.block_size = block_size self.num_nonzero = num_nonzero self.stat_type = 'show' self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0} self.sram = SRAM(glb_depth, block_size, nports=2, dtype=np.float16) self.last_read = Channel(3) self.filter_size = (0, 0) self.fmap_sets = 0 self.fmap_per_iteration = 0 self.rd_set = 0 self.fmap_rd_idx = 0 self.iteration = 0 self.wr_set = 0 self.fmap_wr_idx = 0 self.wr_done = False
def instantiate(self, arch_input_chn, arr_y, block_size, num_nonzero, pruner_name): # PE static configuration (immutable) #self.arr_x = arr_x self.arr_y = arr_y #self.chn_per_word = chn_per_word self.block_size = block_size self.num_nonzero = num_nonzero self.convert_chn = Channel() self.prune_chn = Channel() self.arch_input_chn = arch_input_chn # Although both InputSerializer and pruner will be pushing to arch_input_chn # There is no conflict issue because all weights will be pushed by IS first # then all inputs by pruner self.converter = Converter(self.convert_chn, self.prune_chn, \ self.block_size, self.block_size) # self.pruner = NaivePruner(self.prune_chn,self.arch_input_chn, \ # self.num_nonzero,True) #user defined pruner for this layer, default to naive pruner self.pruner = getattr(pruner, pruner_name)(self.prune_chn,self.arch_input_chn, \ self.num_nonzero, self.block_size, True) self.ifmap = None self.weights = None self.bias = None self.image_size = (0, 0) self.filter_size = (0, 0) self.ifmap_psum_done = True self.pass_done = Reg(False) # State Counters self.curr_set = 0 self.curr_filter = 0 self.iteration = 0 self.fmap_idx = 0 self.curr_chn = 0 self.curr_x = 0 # run through first two dimensions of input self.curr_y = 0 self.bias_set = 0
def instantiate(self, arr_x, arr_y, chn_per_word, done_chn, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): self.name = 'fc_tb' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.batch_size = None self.input_size = None self.output_size = None self.ceil_batch = None self.ceil_output = None self.input_chn = Channel(name='arch_input_chn') self.output_chn = Channel(name='arch_output_chn') self.done_chn = done_chn self.stimulus = Stimulus(self.arr_x, self.arr_y, self.chn_per_word, self.input_chn, self.output_chn, self.done_chn) self.dut = OSArch(self.arr_x, self.arr_y, self.input_chn, self.output_chn, self.chn_per_word, ifmap_glb_depth, weight_glb_depth)
def instantiate(self, wr_chn, rd_chn, glb_depth, block_size): self.wr_chn = wr_chn self.rd_chn = rd_chn self.name = 'weight_glb' self.filter_size = (0, 0) self.image_size = (0, 0) self.wr_done = False self.iteration = 0 self.addr = 0 self.in_chn = 0 self.out_chn = 0 #self.arr_y = 0 #self.out_sets = 0 self.block_size = block_size self.sram = SRAM(glb_depth, block_size) self.last_read = Channel(3) self.stat_type = 'show' self.raw_stats = {'size': (glb_depth, block_size), 'rd': 0, 'wr': 0}
def instantiate(self, wr_chn, rd_chn, arr_y, glb_depth, block_size, num_nonzero): self.wr_chn = wr_chn self.rd_chn = rd_chn self.arr_y = arr_y self.block_size = block_size self.num_nonzero = num_nonzero self.name = 'ifmap_glb' self.stat_type = 'show' self.raw_stats = {'size': (glb_depth, num_nonzero), 'rd': 0, 'wr': 0} self.sram = SRAM(glb_depth, num_nonzero * 3) self.last_read = Channel(3) self.glb_depth = glb_depth self.image_size = (0, 0) self.filter_size = (0, 0) self.fmap_sets = 0 self.fmap_per_iteration = 0 self.curr_set = 0 self.fmap_idx = 0 self.iteration = 0 self.wr_done = False # For managing convolution self.curr_x = 0 self.curr_y = 0 self.curr_chn = 0 self.request_idx = 0 self.send_idx = 0 #self.curr_filt_x = 0 #self.curr_filt_y = 0 self.ifmap_done = False self.needed_addr = 0 self.ready_to_output = False # ready to output a filter_size block of inputs self.curr_data = [0 for i in range(3 * num_nonzero)] self.data_idx = num_nonzero # block other operations while actively working through data
def instantiate(self, arr_x, arr_y, chn_per_word, layers, batch_size): self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.layers = layers self.batch_size = batch_size self.name = 'meta' self.started = False self.done_chn = Channel() self.ifmap_glb_depth = 0 self.psum_glb_depth = 0 self.weights_glb_depth = 0 use_conv = False use_fc = False self.conv_tb = None self.fc_tb = None cur_image_size = None cur_in_chn = None is_conv = False num_convs = 0 num_fc = 0 for layer in self.layers: if isinstance(layer, Conv): if cur_image_size is None: pass elif cur_image_size != layer.image_size or cur_in_chn != layer.in_chn: raise Exception('Invalid conv image size for %s: %s %s' % (layer.name, (cur_image_size, cur_in_chn), (layer.image_size, layer.in_chn))) ifmap_glb_depth, psum_glb_depth, weights_glb_depth = WSArchTB.required_glb_depth( self.arr_x, self.arr_y, self.chn_per_word, layer.image_size, layer.filter_size, layer.in_chn, layer.out_chn) use_conv = True output_shape = layer.new_shape((self.batch_size, ) + layer.image_size + (layer.out_chn, )) cur_image_size = output_shape[1:3] cur_in_chn = output_shape[3] is_conv = True num_convs += 1 elif isinstance(layer, FC): if cur_image_size is None: pass elif not is_conv and cur_image_size != layer.input_size: raise Exception('Invalid fc dimension transition for ' + layer.name) elif is_conv and cur_image_size[0] * cur_image_size[ 1] * cur_in_chn != layer.input_size: raise Exception( 'Invalid conv to fc dimension transition to ' + layer.name) ifmap_glb_depth, psum_glb_depth, weights_glb_depth = OSArchTB.required_glb_depth( self.arr_x, self.arr_y, self.chn_per_word, self.batch_size, layer.input_size, layer.output_size) use_fc = True _, cur_image_size = layer.new_shape( (self.batch_size, layer.output_size)) is_conv = False num_fc += 1 else: raise Exception('layer not valid') self.ifmap_glb_depth = max(self.ifmap_glb_depth, ifmap_glb_depth) self.psum_glb_depth = max(self.psum_glb_depth, psum_glb_depth) self.weights_glb_depth = max(self.weights_glb_depth, weights_glb_depth) if use_conv: self.conv_tb = WSArchTB(self.arr_x, self.arr_y, self.chn_per_word, self.done_chn, self.ifmap_glb_depth, self.psum_glb_depth, self.weights_glb_depth) if use_fc: self.fc_tb = OSArchTB(self.arr_x, self.arr_y, self.chn_per_word, self.done_chn, self.ifmap_glb_depth, self.psum_glb_depth, self.weights_glb_depth) self.layer_step = 0 self.batch_step = 0 self.conv_inputs = [None] * self.batch_size self.fc_input = None self.conv_weights = [None] * num_convs self.conv_bias = [None] * num_convs self.fc_weights = [None] * num_fc self.fc_bias = [None] * num_fc self.cur_conv = 0 self.cur_fc = 0
def instantiate(self): self.channel = Channel(4) self.push_count = 0 self.free_count = 0 self.test_size = 100
def instantiate(self, arr_y, input_chn, output_chn, block_size, num_nonzero, ifmap_glb_depth, psum_glb_depth, weight_glb_depth): # PE static configuration (immutable) self.name = 'chip' #self.arr_x = arr_x self.arr_y = arr_y self.block_size = block_size self.num_nonzero = num_nonzero self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.psum_wr_chn = Channel() self.weights_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_y, block_size, num_nonzero) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3) #self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, arr_y, # ifmap_glb_depth, block_size, num_nonzero) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, block_size, num_nonzero) self.weights_rd_chn = Channel() #self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn, weight_glb_depth, block_size) self.ifmap_weights_glb = IFMapWeightsGLB(self.ifmap_wr_chn, self.ifmap_rd_chn,\ self.weights_wr_chn, self.weights_rd_chn, arr_y, ifmap_glb_depth,\ weight_glb_depth, block_size, num_nonzero) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_in_chns = ModuleList() self.pe_psum_out_chns = ModuleList() # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_in_chns.append(ModuleList()) self.pe_psum_out_chns.append(ModuleList()) for x in range(1): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_in_chns[y].append(Channel(32)) self.pe_psum_out_chns[y].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_in_chns[y][x], self.pe_psum_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, block_size) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_in_chns, self.arr_y, block_size) self.psum_wr_noc = PSumWrNoC(self.pe_psum_out_chns, self.psum_noc_wr_chn, self.psum_output_chn, self.arr_y, block_size)
def instantiate(self, ifmap_wr_chn, ifmap_rd_chn, weights_wr_chn, weights_rd_chn,\ arr_y, ifmap_glb_depth, weights_glb_depth, \ block_size, num_nonzero): self.ifmap_wr_chn = ifmap_wr_chn self.ifmap_rd_chn = ifmap_rd_chn self.weights_wr_chn = weights_wr_chn self.weights_rd_chn = weights_rd_chn self.arr_y = arr_y self.block_size = block_size self.num_nonzero = num_nonzero self.name = 'ifmap_weights_glb' self.in_chn = 0 self.out_chn = 0 self.stat_type = 'show' self.raw_stats = { 'size': (ifmap_glb_depth, num_nonzero * 3), 'rd': 0, 'wr': 0 } self.isram = SRAM(ifmap_glb_depth, num_nonzero * 3, dtype=np.float16) self.ilast_read = Channel(3) self.ifmap_glb_depth = ifmap_glb_depth self.wsram = SRAM(weights_glb_depth, block_size, dtype=np.float16) self.wlast_read = Channel(1) # Channel depth of one here prevents SRAM reads from colliding # was having issues with a later read 'replacing' an earlier one # and thus getting the wrong data # having only one extant write on an SRAM at a time prevents this self.weights_glb_depth = weights_glb_depth # Channel to hold indices of weights that need to be sent # to NoC self.weights_to_send = Channel(3) self.image_size = (0, 0) self.filter_size = (0, 0) self.fmap_sets = 0 self.fmap_per_iteration = 0 self.curr_set = 0 self.fmap_idx = 0 self.iteration = 0 self.iwr_done = False self.wwr_done = False # For managing convolution self.curr_x = 0 self.curr_y = 0 self.curr_chn = 0 self.request_idx = 0 self.send_idx = 0 #self.curr_filt_x = 0 #self.curr_filt_y = 0 self.ifmap_done = False # for weights self.addr = 0 self.base_addr = 0 # to store values from self.weights_to_send self.base_addr_wo_chn = -1 # to keep track of current position within 3x3 filter # invalid weights and inputs to use at the end to flush out last outputs self.weights_to_flush = 0 self.inputs_to_flush = 0 self.needed_addr = 0 self.ready_to_output = False # ready to output a filter_size block of inputs self.curr_data = [0 for i in range(3 * num_nonzero)] self.curr_weights = [0 for i in range(block_size)] self.data_idx = num_nonzero # block other operations while actively working through data
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.post_tr_x = arr_x # num output channels = 8 self.post_tr_y = 4 # num tiles = 4 self.pre_tr_ifmap_x = arr_y # num input channels = 4 self.pre_tr_ifmap_y = 4 # num tiles = 4 self.pre_tr_weights_x = arr_y # num input channels = 4 self.pre_tr_weights_y = arr_x # num output channels = 8 self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel() self.weights_wr_chn = Channel() self.bias_wr_chn = Channel() self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.bias_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel() self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_glb_wr_chn = Channel(3) self.ifmap_rd_chn = Channel(3) self.ifmap_glb = IFMapGLB(self.ifmap_glb_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3) self.psum_noc_wr_chn = Channel() # self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, # psum_glb_depth, chn_per_word) self.weights_glb_wr_chn = Channel(3) self.weights_rd_chn = Channel() self.weights_glb = WeightsGLB(self.weights_glb_wr_chn, self.weights_rd_chn) self.bias_rd_chn = Channel() self.bias_glb = BiasGLB(self.bias_wr_chn, self.bias_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append(Channel(32)) # Actual PE array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append(Channel(32)) self.pe_filter_chns[y].append(Channel(32)) self.pe_psum_chns[y+1].append(Channel(32)) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y+1][x] ) ) # Pre Transform IFMap array and local channel declaration self.pre_tr_ifmap_array = ModuleList() self.pre_tr_ifmap_in_chns = ModuleList() self.pre_tr_ifmap_out_chns = ModuleList() # Actual pre transform IFMap array instantiation for y in range(self.pre_tr_ifmap_y): self.pre_tr_ifmap_array.append(ModuleList()) self.pre_tr_ifmap_in_chns.append(ModuleList()) self.pre_tr_ifmap_out_chns.append(ModuleList()) for x in range(self.pre_tr_ifmap_x): self.pre_tr_ifmap_in_chns[y].append(Channel(32)) self.pre_tr_ifmap_out_chns[y].append(Channel(32)) self.pre_tr_ifmap_array[y].append( PreTransformIFMap(x, y, self.pre_tr_ifmap_in_chns[y][x], self.pre_tr_ifmap_out_chns[y][x] ) ) # Pre Transform Weight array and local channel declaration self.pre_tr_weights_array = ModuleList() self.pre_tr_weights_in_chns = ModuleList() self.pre_tr_weights_out_chns = ModuleList() # Actual pre transform Weight array instantiation for y in range(self.pre_tr_weights_y): self.pre_tr_weights_array.append(ModuleList()) self.pre_tr_weights_in_chns.append(ModuleList()) self.pre_tr_weights_out_chns.append(ModuleList()) for x in range(self.pre_tr_weights_x): self.pre_tr_weights_in_chns[y].append(Channel(32)) self.pre_tr_weights_out_chns[y].append(Channel(32)) self.pre_tr_weights_array[y].append( PreTransformWeights(x, y, self.pre_tr_weights_in_chns[y][x], self.pre_tr_weights_out_chns[y][x] ) ) # Post Transform Array and local channel declaration self.post_tr_array = ModuleList() self.post_tr_bias_chns = ModuleList() self.post_tr_ofmap_in_chns = ModuleList() self.post_tr_ofmap_out_chns = ModuleList() # Actual post transform array instantiation for y in range(self.post_tr_y): self.post_tr_array.append(ModuleList()) self.post_tr_bias_chns.append(ModuleList()) self.post_tr_ofmap_in_chns.append(ModuleList()) self.post_tr_ofmap_out_chns.append(ModuleList()) for x in range(self.post_tr_x): self.post_tr_bias_chns[y].append(Channel(32)) self.post_tr_ofmap_in_chns[y].append(Channel(32)) self.post_tr_ofmap_out_chns[y].append(Channel(32)) self.post_tr_array[y].append( PostTransform(x, y, self.post_tr_bias_chns[y][x], self.post_tr_ofmap_in_chns[y][x], self.post_tr_ofmap_out_chns[y][x] ) ) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.pe_psum_chns[0], self.chn_per_word) #self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_output_chn, self.chn_per_word) self.bias_noc = BiasNoC(self.bias_rd_chn, self.post_tr_bias_chns, self.chn_per_word) # Setup NoC for post transform blocks self.post_tr_wr_noc = PostTrWrNoC(self.pe_psum_chns[-1], self.post_tr_ofmap_in_chns, self.chn_per_word) self.post_tr_rd_noc = PostTrRdNoC(self.post_tr_ofmap_out_chns, self.psum_output_chn, self.chn_per_word) # Instantiate tiler for ifmaps self.ifmap_tiler = IFMapTiler(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) # Setup NoC for pre transform blocks #self.pre_tr_ifmap_wr_noc = PreTrIFMapWrNoC(self.ifmap_wr_chn, self.pre_tr_ifmap_in_chns, self.chn_per_word) self.pre_tr_ifmap_rd_noc = PreTrIFMapRdNoC(self.pre_tr_ifmap_out_chns, self.ifmap_glb_wr_chn, self.chn_per_word) self.pre_tr_weights_wr_noc = PreTrWeightsWrNoC(self.weights_wr_chn, self.pre_tr_weights_in_chns, self.chn_per_word) self.pre_tr_weights_rd_noc = PreTrWeightsRdNoC(self.pre_tr_weights_out_chns, self.weights_glb_wr_chn, self.chn_per_word)
def instantiate(self, arr_x, arr_y, input_chn, output_chn, chn_per_word, ifmap_glb_depth, psum_glb_depth): # PE static configuration (immutable) self.name = 'chip' self.arr_x = arr_x self.arr_y = arr_y self.chn_per_word = chn_per_word self.stat_type = 'show' # Instantiate DRAM IO channels self.input_chn = input_chn self.output_chn = output_chn # Instantiate input deserializer and output serializer self.ifmap_wr_chn = Channel(name="ifmap_wr_chn") self.psum_wr_chn = Channel(name="psum_wr_chn") self.weights_wr_chn = Channel(name="weights_wr_chn") self.deserializer = InputDeserializer(self.input_chn, self.ifmap_wr_chn, self.weights_wr_chn, self.psum_wr_chn, arr_x, arr_y, chn_per_word) self.psum_output_chn = Channel(name="psum_output_chn") self.serializer = OutputSerializer(self.output_chn, self.psum_output_chn) # Instantiate GLB and GLB channels self.ifmap_rd_chn = Channel(3, name='ifmap_rd_chn') self.ifmap_glb = IFMapGLB(self.ifmap_wr_chn, self.ifmap_rd_chn, ifmap_glb_depth, chn_per_word) self.psum_rd_chn = Channel(3, name='psum_rd_chn') self.psum_noc_wr_chn = Channel(name='psum_noc_wr_chn') self.psum_glb = PSumGLB(self.psum_wr_chn, self.psum_noc_wr_chn, self.psum_rd_chn, psum_glb_depth, chn_per_word) self.weights_rd_chn = Channel(name='weights_rd_chn') self.weights_glb = WeightsGLB(self.weights_wr_chn, self.weights_rd_chn) # PE Array and local channel declaration self.pe_array = ModuleList() self.pe_ifmap_chns = ModuleList() self.pe_filter_chns = ModuleList() self.pe_psum_chns = ModuleList() self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_psum_chns[0].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, 0))) # Actual array instantiation for y in range(self.arr_y): self.pe_array.append(ModuleList()) self.pe_ifmap_chns.append(ModuleList()) self.pe_filter_chns.append(ModuleList()) self.pe_psum_chns.append(ModuleList()) for x in range(self.arr_x): self.pe_ifmap_chns[y].append( Channel(32, name='pe_ifmap_chns_{}_{}'.format(x, y))) self.pe_filter_chns[y].append( Channel(32, name='pe_filter_chns_{}_{}'.format(x, y))) self.pe_psum_chns[y + 1].append( Channel(32, name='pe_psum_chns_{}_{}'.format(x, y))) self.pe_array[y].append( PE(x, y, self.pe_ifmap_chns[y][x], self.pe_filter_chns[y][x], self.pe_psum_chns[y][x], self.pe_psum_chns[y + 1][x])) # Setup NoC to deliver weights, ifmaps and psums self.filter_noc = WeightsNoC(self.weights_rd_chn, self.pe_filter_chns, self.chn_per_word) self.ifmap_noc = IFMapNoC(self.ifmap_rd_chn, self.pe_ifmap_chns, self.arr_x, self.chn_per_word) self.psum_rd_noc = PSumRdNoC(self.psum_rd_chn, self.pe_psum_chns[0], self.chn_per_word) self.psum_wr_noc = PSumWrNoC(self.pe_psum_chns[-1], self.psum_noc_wr_chn, self.psum_output_chn, self.chn_per_word)