def _impl(self): addr_t = self.addr_t value_t = self.value_t item_mask_t = Bits(self.ITEMS) # bitmap used to quickly detect position of an empty node item_valid = self._reg("item_valid", item_mask_t, def_val=0) item_last = self._reg("item_last", item_mask_t, def_val=0) # an address on where next item should be inserted insert_addr_next = self._reg("insert_addr_next", addr_t, def_val=0) insert_addr_next( oneHotToBin( self, rename_signal( self, ~item_valid.next, "item_become_invalid"))) # get index of first non valid pop = self.pop insert = self.insert insert.addr_ret(insert_addr_next) insert.rd(item_valid != mask(self.ITEMS)) pop_one_hot = rename_signal(self, binToOneHot(pop.addr, en=pop.vld & pop.rd), "pop_one_hot") insert_one_hot = rename_signal( self, binToOneHot(insert_addr_next, en=insert.vld & insert.rd), "insert_one_hot") insert_parent_one_hot = rename_signal( self, binToOneHot(insert.addr, en=insert.vld & insert.rd & insert.append), "insert_parent_one_hot") item_valid((item_valid & ~pop_one_hot) | insert_one_hot) item_last((item_last & ~insert_parent_one_hot) | insert_one_hot) values = self._sig("values", value_t[self.ITEMS]) next_ptrs = self._sig("next_ptrs", addr_t[self.ITEMS]) If( self.clk._onRisingEdge(), If( insert.vld & insert.rd, next_ptrs[insert.addr] (insert_addr_next), # append behind parent node at insert_ptr values[insert_addr_next](insert.data), )) pop.data(values[pop.addr]) pop.addr_next(next_ptrs[pop.addr]) pop.last(item_last[pop.addr] & item_valid[pop.addr]) pop.vld(item_valid != 0)
def _impl(self): m = self.lru_mem victim_req_r, victim_req_w = m.port[:2] # victim selection ports victim_req = self.victim_req victim_req_r.en(victim_req.vld) victim_req_r.addr(victim_req.addr) victim_req_tmp = self._reg( "victim_req_tmp", HStruct( (victim_req.addr._dtype, "index"), (BIT, "vld") ), def_val={"vld": 0} ) set_ = self.set victim_data = self.victim_data victim_req.rd(~set_.vld & (~victim_req_tmp.vld | victim_data.rd)) If((~victim_req_tmp.vld | victim_data.rd) & ~set_.vld, victim_req_tmp.index(victim_req.addr), victim_req_tmp.vld(victim_req.vld), ) incr_rw = list(grouper(2, m.port[2:])) # in the first stp we have to collect all pending addresses # because we need it in order to resolve any potential access merging incr_tmp_mask_oh = [] for i, (incr_in, (incr_r, _)) in enumerate(zip(self.incr, incr_rw)): incr_tmp = self._reg( f"incr_tmp{i:d}", HStruct( (incr_in.index._dtype, "index"), (incr_in.way._dtype, "way"), (BIT, "vld") ), def_val={"vld": 0} ) incr_tmp.index(incr_in.index) incr_tmp.way(incr_in.way) incr_tmp.vld(incr_in.vld & ~set_.vld) incr_val_oh = rename_signal(self, binToOneHot(incr_in.way), f"incr_val{i:d}_oh") incr_tmp_mask_oh.append((incr_tmp, incr_val_oh)) lru = PseudoLru(victim_req_r.dout) victim = rename_signal(self, lru.get_lru(), "victim") victim_oh = rename_signal(self, binToOneHot(victim), "victim_oh") victim_data.data(victim) victim_data.vld(victim_req_tmp.vld & ~set_.vld) succ_writes = [ (incr2_tmp.vld & incr2_tmp.index._eq(victim_req_tmp.index), incr2_val_oh) for incr2_tmp, incr2_val_oh in incr_tmp_mask_oh ] _victim_oh = self.merge_successor_writes_into_incr_one_hot(succ_writes, victim_oh) _victim_oh = rename_signal(self, _victim_oh, f"victim_val{i:d}_oh_final") set_.rd(1) If(set_.vld, # repurpose victim req victim_req_w port for an intialization of LRU array using "set" port victim_req_w.en(1), victim_req_w.addr(set_.addr), victim_req_w.din(set_.data), ).Else( # use victim_req_w port for a victim req write back as usuall victim_req_w.en(victim_req_tmp.vld), victim_req_w.addr(victim_req_tmp.index), victim_req_w.din(lru.mark_use_many(_victim_oh)), ) for i, (incr_in, (incr_r, incr_w), (incr_tmp, incr_val_oh)) in enumerate(zip(self.incr, incr_rw, incr_tmp_mask_oh)): # drive incr_r port incr_r.addr(incr_in.index) incr_r.en(incr_in.vld) incr_in.rd(~set_.vld) # resolve the final mask of LRU incrementation for this port and drive incr_w prev_writes = [ victim_req_tmp.vld & victim_req_tmp.index._eq(incr_tmp.index) ] + [ incr2_tmp.vld & incr2_tmp.index._eq(incr_tmp.index) for incr2_tmp, _ in incr_tmp_mask_oh[:i] ] # if any of previous port writes to same index we need to ommit this write as it is writen by some previous port incr_w.addr(incr_tmp.index) incr_w.en(incr_tmp.vld & ~Or(*prev_writes) & ~set_.vld) succ_writes = [ (incr2_tmp.vld & incr2_tmp.index._eq(incr_tmp.index), incr2_val_oh) for incr2_tmp, incr2_val_oh in incr_tmp_mask_oh[i + 1:] ] # if collides with others merge the incr_val_oh incr_val_oh = self.merge_successor_writes_into_incr_one_hot(succ_writes, incr_val_oh) incr_val_oh = rename_signal(self, incr_val_oh, f"incr_val{i:d}_oh_final") incr_w.din(PseudoLru(incr_r.dout).mark_use_many(incr_val_oh)) propagateClkRstn(self)
def flush_or_read_node( self, d_arr_r: RamHsR, d_arr_w: AddrDataHs, st2_out: HsStructIntf, data_arr_read: Axi4_r, tag_update: AxiCacheTagArrayUpdateIntf, # out ): ########################## st1 - post (victim flushing, read forwarding) ###################### in_w = AxiSBuilder(self, self.s.w)\ .buff(self.tag_array.LOOKUP_LATENCY + 4)\ .end st2 = st2_out.data d_arr_w.addr( self.addr_in_data_array(st2.victim_way, self.parse_addr(st2.replacement_addr)[1])) data_arr_read_data = d_arr_r.data # HsBuilder(self, d_arr_r.data).buff(1, latency=(1, 2)).end d_arr_w.data(in_w.data) d_arr_w.mask(in_w.strb) self.s.b.id(st2.write_id) self.s.b.resp(RESP_OKAY) data_arr_read.id(st2.read_id) data_arr_read.data(data_arr_read_data.data) data_arr_read.resp(RESP_OKAY) data_arr_read.last(1) m = self.m m.aw.addr(st2.victim_addr) m.aw.id(st2.write_id) m.aw.len(0) self.axiAddrDefaults(m.aw) m.w.data(data_arr_read_data.data) m.w.strb(mask(m.w.data._dtype.bit_length() // 8)) m.w.last(1) # flushing needs to have higher priority then read in order # to prevent deadlock # write replacement after victim load with higher priority # else if found just write the data to data array is_flush = st2.data_array_op._eq(data_trans_t.write_and_flush) contains_write = rename_signal( self, In(st2.data_array_op, [ data_trans_t.write, data_trans_t.write_and_flush, data_trans_t.read_and_write ]), "contains_write") contains_read = rename_signal( self, In(st2.data_array_op, [ data_trans_t.read, data_trans_t.write_and_flush, data_trans_t.read_and_write ]), "contains_read") contains_read_data = rename_signal( self, In(st2.data_array_op, [data_trans_t.read, data_trans_t.read_and_write]), "contains_read_data") flush_or_read_node = StreamNode( [st2_out, data_arr_read_data, in_w ], # collect read data from data array, collect write data [data_arr_read, m.aw, m.w, d_arr_w, self.s.b ], # to read block or to slave connected on "m" interface # write data to data array and send write acknowledge extraConds={ data_arr_read_data: contains_read, in_w: contains_write, data_arr_read: contains_read_data, m.aw: is_flush, m.w: is_flush, d_arr_w: contains_write, self.s.b: contains_write, }, skipWhen={ data_arr_read_data: ~contains_read, in_w: ~contains_write, data_arr_read: ~contains_read_data, m.aw: ~is_flush, m.w: ~is_flush, d_arr_w: ~contains_write, self.s.b: ~contains_write, }) flush_or_read_node.sync() m.b.ready(1) tag_update.vld(st2_out.vld & contains_write) tag_update.delete(0) tag_update.way_en(binToOneHot(st2.victim_way)) tag_update.addr(st2.replacement_addr) # [TODO] initial clean lru_array_set = self.lru_array.set lru_array_set.addr(None) lru_array_set.data(None) lru_array_set.vld(0)
def read_request_section(self, read_ack: RtlSignal, item_vld: RtlSignal, waiting_transaction_id: RtlSignal, waiting_transaction_vld: RtlSignal, data_copy_override: VldSynced): s = self.s m = self.m addr_cam = self.addr_cam ITEMS = addr_cam.ITEMS addr_cam_out = self.add_addr_cam_out_reg(item_vld) with self._paramsShared(): s_ar_tmp = self.s_ar_tmp = AxiSReg(s.AR_CLS) last_cam_insert_match = self._reg("last_cam_insert_match", Bits(ITEMS), def_val=0) match_res = rename_signal( self, item_vld & (addr_cam_out.data | last_cam_insert_match) & ~waiting_transaction_vld, "match_res") blocking_access = rename_signal( self, s.ar.valid & (item_vld[s.ar.id] | (s_ar_tmp.dataOut.valid & (s.ar.id._eq(s_ar_tmp.dataOut.id)))), "blocking_access") s_ar_node = StreamNode( [s.ar], [addr_cam.match[0], s_ar_tmp.dataIn], ) s_ar_node.sync(~blocking_access) # s_ar_node_ack = s_ar_node.ack() & ~blocking_access s_ar_tmp.dataIn(s.ar, exclude={s.ar.valid, s.ar.ready}) parent_transaction_id = oneHotToBin(self, match_res, "parent_transaction_id") m_ar_node = StreamNode( [s_ar_tmp.dataOut, addr_cam_out], [m.ar], extraConds={m.ar: match_res._eq(0)}, skipWhen={m.ar: match_res != 0}, ) m_ar_node.sync() m.ar(s_ar_tmp.dataOut, exclude={m.ar.valid, m.ar.ready}) addr_cam.match[0].data(s.ar.addr[:self.CACHE_LINE_OFFSET_BITS]) ar_ack = rename_signal(self, m_ar_node.ack(), "ar_ack") # insert into cam on empty position specified by id of this transaction acw = addr_cam.write acw.addr(s_ar_tmp.dataOut.id) acw.data(s_ar_tmp.dataOut.addr[:self.CACHE_LINE_OFFSET_BITS]) acw.vld(addr_cam_out.vld) #If(s_ar_node_ack, last_cam_insert_match( binToOneHot( s_ar_tmp.dataOut.id, en=~blocking_access & s.ar.valid & s_ar_tmp.dataOut.valid & s_ar_tmp.dataOut.addr[:self.CACHE_LINE_OFFSET_BITS]._eq( s.ar.addr[:self.CACHE_LINE_OFFSET_BITS]))) #) for trans_id in range(ITEMS): # it becomes ready if we are requested for it on "s" interface this_trans_start = s_ar_tmp.dataOut.id._eq(trans_id) & \ (data_copy_override.vld | ar_ack) # item becomes invalid if we read last data word this_trans_end = read_ack & s.r.id._eq(trans_id) & s.r.last this_trans_end = rename_signal(self, this_trans_end, f"this_trans_end{trans_id:d}") item_vld[trans_id](apply_set_and_clear(item_vld[trans_id], this_trans_start, this_trans_end)) waiting_transaction_start = (ar_ack & (match_res != 0) & parent_transaction_id._eq(trans_id) & ~this_trans_end) # note: this_trans_end in this context is for parent transactio # which was not started just now, so it may be ending just now waiting_transaction_start = rename_signal( self, waiting_transaction_start, f"waiting_transaction_start{trans_id:d}") _waiting_transaction_vld = apply_set_and_clear( waiting_transaction_vld[trans_id], waiting_transaction_start, this_trans_end) waiting_transaction_vld[trans_id](rename_signal( self, _waiting_transaction_vld, f"waiting_transaction_vld{trans_id:d}")) If( self.clk._onRisingEdge(), If((match_res != 0) & ar_ack, waiting_transaction_id[parent_transaction_id]( s_ar_tmp.dataOut.id))) # parent transaction is finishing just now # we need to quickly grab the data in data buffer and copy it also # for this transaction data_copy_override.vld(s_ar_tmp.dataOut.valid & read_ack & (match_res != 0) & s.r.id._eq(parent_transaction_id) & s.r.last) data_copy_override.data(s_ar_tmp.dataOut.id)