def construct( s, nports, mem_ifc_dtypes=[mk_mem_msg(8,32,32), mk_mem_msg(8,32,32)], latency=1, mem_nbytes=2**20 ): # Local constants s.nports = nports req_classes = [ x for (x,y) in mem_ifc_dtypes ] resp_classes = [ y for (x,y) in mem_ifc_dtypes ] s.mem = MemoryFL( mem_nbytes ) # Interface s.ifc = [ MemMinionIfcCL( req_classes[i], resp_classes[i] ) for i in range(nports) ] # Queues req_latency = min(1, latency) resp_latency = latency - req_latency s.req_qs = [ DelayPipeDeqCL( req_latency )( enq = s.ifc[i].req ) for i in range(nports) ] s.resp_qs = [ DelayPipeSendCL( resp_latency )( send = s.ifc[i].resp ) for i in range(nports) ] @s.update def up_mem(): for i in range(s.nports): if s.req_qs[i].deq.rdy() and s.resp_qs[i].enq.rdy(): # Dequeue memory request message req = s.req_qs[i].deq() len_ = int(req.len) if len_ == 0: len_ = req_classes[i].data_nbits >> 3 if req.type_ == MemMsgType.READ: resp = resp_classes[i]( req.type_, req.opaque, 0, req.len, s.mem.read( req.addr, len_ ) ) elif req.type_ == MemMsgType.WRITE: s.mem.write( req.addr, len_, req.data ) # FIXME do we really set len=0 in response when doing subword wr? # resp = resp_classes[i]( req.type_, req.opaque, 0, req.len, 0 ) resp = resp_classes[i]( req.type_, req.opaque, 0, 0, 0 ) else: # AMOS resp = resp_classes[i]( req.type_, req.opaque, 0, req.len, s.mem.amo( req.type_, req.addr, len_, req.data ) ) s.resp_qs[i].enq( resp )
def construct( s, proc_cls, xcel_cls=NullXcelRTL, dump_vcd=False, src_delay=0, sink_delay=0, mem_stall_prob=0, mem_latency=1 ): s.commit_inst = OutPort( Bits1 ) req, resp = mk_mem_msg( 8, 32, 32 ) s.src = TestSrcCL ( Bits32, [], src_delay, src_delay ) s.sink = TestSinkCL( Bits32, [], sink_delay, sink_delay ) s.proc = proc_cls() s.xcel = xcel_cls() s.mem = MemoryCL(2, latency = mem_latency) connect_pairs( s.proc.commit_inst, s.commit_inst, # Processor <-> Proc/Mngr s.src.send, s.proc.mngr2proc, s.proc.proc2mngr, s.sink.recv, # Processor <-> Memory s.proc.imem, s.mem.ifc[0], s.proc.dmem, s.mem.ifc[1], ) connect( s.proc.xcel, s.xcel.xcel )
def construct( s, cls, nports, src_msgs, sink_msgs, stall_prob, mem_latency, src_initial, src_interval, sink_initial, sink_interval, arrival_time=None ): ReqType, RespType = mk_mem_msg(8,32,32) s.srcs = [ TestSrcCL( ReqType, src_msgs[i], src_initial, src_interval ) for i in range(nports) ] s.mem = cls( nports, [(ReqType, RespType)]*nports, mem_latency ) s.sinks = [ TestSinkCL( RespType, sink_msgs[i], sink_initial, sink_interval, arrival_time ) for i in range(nports) ] # Connections for i in range(nports): connect( s.srcs[i].send, s.mem.ifc[i].req ) connect( s.mem.ifc[i].resp, s.sinks[i].recv )
def construct(s, ProcClass, XcelClass): req_class, resp_class = mk_mem_msg(8, 32, 32) s.commit_inst = OutPort(Bits1) # Instruction Memory Request/Response Interface s.proc = ProcClass()(commit_inst=s.commit_inst) s.xcel = XcelClass()(xcel=s.proc.xcel) if isinstance(s.proc.imem, MemMasterIfcRTL): # RTL proc s.mngr2proc = RecvIfcRTL(Bits32) s.proc2mngr = SendIfcRTL(Bits32) s.imem = MemMasterIfcRTL(req_class, resp_class) s.dmem = MemMasterIfcRTL(req_class, resp_class) elif isinstance(s.proc.imem, MemMasterIfcCL): # CL proc s.mngr2proc = NonBlockingCalleeIfc(Bits32) s.proc2mngr = NonBlockingCallerIfc(Bits32) s.imem = MemMasterIfcCL(req_class, resp_class) s.dmem = MemMasterIfcCL(req_class, resp_class) elif isinstance(s.proc.imem, MemMasterIfcFL): # FL proc s.mngr2proc = GetIfcFL() s.proc2mngr = SendIfcFL() s.imem = MemMasterIfcFL() s.dmem = MemMasterIfcFL() s.connect_pairs( s.mngr2proc, s.proc.mngr2proc, s.proc2mngr, s.proc.proc2mngr, s.imem, s.proc.imem, s.dmem, s.proc.dmem, )
def construct( s, proc_cls, xcel_cls, dump_vcd, src_delay, sink_delay, mem_stall_prob, mem_latency ): s.commit_inst = OutPort( Bits1 ) req, resp = mk_mem_msg( 8, 32, 32 ) s.src = TestSrcCL ( Bits32, [], src_delay, src_delay ) s.sink = TestSinkCL( Bits32, [], sink_delay, sink_delay ) s.dut = ProcXcel( proc_cls, xcel_cls )( commit_inst = s.commit_inst ) s.mem = MemoryCL(2, latency = mem_latency) connect_pairs( # Processor <-> Proc/Mngr s.src.send, s.dut.mngr2proc, s.dut.proc2mngr, s.sink.recv, # Processor <-> Memory s.proc.imem, s.mem.ifc[0], s.proc.dmem, s.mem.ifc[1], )
def construct(s): s.master = [SomeMasterFL(i * 0x222, i) for i in range(2)] s.minion = [ SomeMinionCL(*mk_mem_msg(8, 32, 32))(mem=s.master[i].mem) for i in range(2) ]
'or': MemMsgType.AMO_OR, 'xg': MemMsgType.AMO_SWAP, 'mn': MemMsgType.AMO_MIN, } resp_type_dict = { 'rd': MemMsgType.READ, 'wr': MemMsgType.WRITE, 'ad': MemMsgType.AMO_ADD, 'an': MemMsgType.AMO_AND, 'or': MemMsgType.AMO_OR, 'xg': MemMsgType.AMO_SWAP, 'mn': MemMsgType.AMO_MIN, } req_cls, resp_cls = mk_mem_msg(8, 32, 32) b32 = Bits32 def req(type_, opaque, addr, len, data): return req_cls(req_type_dict[type_], opaque, addr, len, b32(data)) def resp(type_, opaque, len, data): return resp_cls(resp_type_dict[type_], opaque, 0, len, b32(data)) #---------------------------------------------------------------------- # Test Case: basic #----------------------------------------------------------------------
# FooPRTL_test #========================================================================= from __future__ import print_function import pytest import random from pymtl3 import * from pymtl3.stdlib.test import mk_test_case_table, run_sim, config_model from pymtl3.stdlib.test import TestSrcCL, TestSinkCL from pymtl3.stdlib.ifcs import mk_mem_msg, MemMsgType from tut8_sram.SramMinionRTL import SramMinionRTL MemReqType, MemRespType = mk_mem_msg( 8, 32, 64 ) #------------------------------------------------------------------------- # TestHarness #------------------------------------------------------------------------- class TestHarness( Component ): def construct( s, dut ): # Instantiate models s.src = TestSrcCL( MemReqType ) s.sram = dut s.sink = TestSinkCL( MemRespType )
def construct(s): memreq_cls, memresp_cls = mk_mem_msg(8, 32, 32) xreq_class, xresp_class = mk_xcel_msg(5, 32) # Interface s.commit_inst = OutPort(Bits1) s.imem = MemMasterIfcCL(memreq_cls, memresp_cls) s.dmem = MemMasterIfcCL(memreq_cls, memresp_cls) s.xcel = XcelMasterIfcCL(xreq_class, xresp_class) s.proc2mngr = NonBlockingCallerIfc() s.mngr2proc = NonBlockingCalleeIfc() # Buffers to hold input messages s.imemresp_q = DelayPipeDeqCL(0)(enq=s.imem.resp) s.dmemresp_q = DelayPipeDeqCL(1)(enq=s.dmem.resp) s.mngr2proc_q = DelayPipeDeqCL(1)(enq=s.mngr2proc) s.xcelresp_q = DelayPipeDeqCL(0)(enq=s.xcel.resp) s.pc = b32(0x200) s.R = RegisterFile(32) s.F_DXM_queue = PipeQueueCL(1) s.DXM_W_queue = PipeQueueCL(1) s.F_status = PipelineStatus.idle s.DXM_status = PipelineStatus.idle s.W_status = PipelineStatus.idle @s.update def F(): s.F_status = PipelineStatus.idle if s.reset: s.pc = b32(0x200) return if s.imem.req.rdy() and s.F_DXM_queue.enq.rdy(): if s.redirected_pc_DXM >= 0: s.imem.req( memreq_cls(MemMsgType.READ, 0, s.redirected_pc_DXM)) s.pc = s.redirected_pc_DXM else: s.imem.req(memreq_cls(MemMsgType.READ, 0, s.pc)) s.F_DXM_queue.enq(s.pc) s.F_status = PipelineStatus.work s.pc += 4 else: s.F_status = PipelineStatus.stall s.redirected_pc_DXM = -1 s.raw_inst = b32(0) @s.update def DXM(): s.redirected_pc_DXM = -1 s.DXM_status = PipelineStatus.idle if s.F_DXM_queue.deq.rdy() and s.imemresp_q.deq.rdy(): if not s.DXM_W_queue.enq.rdy(): s.DXM_status = PipelineStatus.stall else: pc = s.F_DXM_queue.peek() s.raw_inst = s.imemresp_q.peek().data inst = TinyRV0Inst(s.raw_inst) inst_name = inst.name s.DXM_status = PipelineStatus.work if inst_name == "nop": pass elif inst_name == "add": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] + s.R[inst.rs2], DXM_W.arith)) elif inst_name == "sub": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] - s.R[inst.rs2], DXM_W.arith)) elif inst_name == "mul": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] * s.R[inst.rs2], DXM_W.arith)) elif inst_name == "sll": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] << (s.R[inst.rs2] & 0x1F), DXM_W.arith)) elif inst_name == "slt": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1].int() < s.R[inst.rs2].int(), DXM_W.arith)) elif inst_name == "sltu": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] < s.R[inst.rs2], DXM_W.arith)) elif inst_name == "sra": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1].int() >> (s.R[inst.rs2].uint() & 0x1F), DXM_W.arith)) elif inst_name == "srl": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] >> (s.R[inst.rs2].uint() & 0x1F), DXM_W.arith)) # ''' TUTORIAL TASK '''''''''''''''''''''''''''''''''''''''''''' # Implement instruction AND in CL processor # ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''\/ # ; Make an "elif" statement here to implement instruction AND # ; that applies bit-wise "and" operator to rs1 and rs2 and # ; pass the result to the pipeline. elif inst_name == "and": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] & s.R[inst.rs2], DXM_W.arith)) elif inst_name == "or": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] | s.R[inst.rs2], DXM_W.arith)) elif inst_name == "xor": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] ^ s.R[inst.rs2], DXM_W.arith)) # ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''/\ elif inst_name == "addi": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] + inst.i_imm.int(), DXM_W.arith)) elif inst_name == "andi": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] & inst.i_imm.int(), DXM_W.arith)) elif inst_name == "ori": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] | inst.i_imm.int(), DXM_W.arith)) elif inst_name == "xori": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] ^ inst.i_imm.int(), DXM_W.arith)) elif inst_name == "slli": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] << inst.i_imm.int(), DXM_W.arith)) elif inst_name == "srli": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] >> inst.i_imm.int(), DXM_W.arith)) elif inst_name == "srai": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1].int() >> (inst.i_imm.uint() & 0x1F), DXM_W.arith)) elif inst_name == "slti": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1].int() < inst.i_imm.int(), DXM_W.arith)) elif inst_name == "sltiu": s.DXM_W_queue.enq( (inst.rd, s.R[inst.rs1] < sext(inst.i_imm, 32), DXM_W.arith)) elif inst_name == "auipc": s.DXM_W_queue.enq((inst.rd, (s.pc - 4) + inst.u_imm, DXM_W.arith)) #not elegant elif inst_name == "lui": s.DXM_W_queue.enq((inst.rd, inst.u_imm, DXM_W.arith)) elif inst_name == "sh": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.WRITE, 0, s.R[inst.rs1] + inst.s_imm.int(), 2, s.R[inst.rs2][0:16])) s.DXM_W_queue.enq((0, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "sb": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.WRITE, 0, s.R[inst.rs1] + inst.s_imm.int(), 1, s.R[inst.rs2][0:8])) s.DXM_W_queue.enq((0, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "sw": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.WRITE, 0, s.R[inst.rs1] + inst.s_imm.int(), 0, s.R[inst.rs2])) s.DXM_W_queue.enq((0, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "lw": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.READ, 0, s.R[inst.rs1] + inst.i_imm.int(), 0)) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "lb": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.READ, 0, s.R[inst.rs1] + inst.i_imm.int(), 1)) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.mem_signed)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "lh": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.READ, 0, s.R[inst.rs1] + inst.i_imm.int(), 2)) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.mem_signed)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "lbu": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.READ, 0, s.R[inst.rs1] + inst.i_imm.int(), 1)) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "lhu": if s.dmem.req.rdy(): s.dmem.req( memreq_cls(MemMsgType.READ, 0, s.R[inst.rs1] + inst.i_imm.int(), 2)) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.mem)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "jal": s.DXM_W_queue.enq((inst.rd, s.pc, DXM_W.arith)) s.pc = s.pc + sext(inst.j_imm, 32) - 4 elif inst_name == "jalr": s.DXM_W_queue.enq((inst.rd, s.pc, DXM_W.arith)) s.pc = (s.R[inst.rs1] + sext(inst.i_imm, 32)) & 0xFFFFFFFE elif inst_name == "bne": if s.R[inst.rs1] != s.R[inst.rs2]: s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "beq": if s.R[inst.rs1] == s.R[inst.rs2]: s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "blt": if s.R[inst.rs1].int() < s.R[inst.rs2].int(): s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "bge": if s.R[inst.rs1].int() >= s.R[inst.rs2].int(): s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "bltu": if s.R[inst.rs1] < s.R[inst.rs2]: s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "bgeu": if s.R[inst.rs1] >= s.R[inst.rs2]: s.redirected_pc_DXM = pc + inst.b_imm.int() s.DXM_W_queue.enq(None) elif inst_name == "csrw": if inst.csrnum == 0x7C0: # CSR: proc2mngr # We execute csrw in W stage s.DXM_W_queue.enq((0, s.R[inst.rs1], DXM_W.mngr)) elif 0x7E0 <= inst.csrnum <= 0x7FF: if s.xcel.req.rdy(): s.xcel.req( xreq_class(XcelMsgType.WRITE, inst.csrnum[0:5], s.R[inst.rs1])) s.DXM_W_queue.enq((0, 0, DXM_W.xcel)) else: s.DXM_status = PipelineStatus.stall elif inst_name == "csrr": if inst.csrnum == 0xFC0: # CSR: mngr2proc if s.mngr2proc_q.deq.rdy(): s.DXM_W_queue.enq( (inst.rd, s.mngr2proc_q.deq(), DXM_W.arith)) else: s.DXM_status = PipelineStatus.stall elif 0x7E0 <= inst.csrnum <= 0x7FF: if s.xcel.req.rdy(): s.xcel.req( xreq_class(XcelMsgType.READ, inst.csrnum[0:5], s.R[inst.rs1])) s.DXM_W_queue.enq((inst.rd, 0, DXM_W.xcel)) else: s.DXM_status = PipelineStatus.stall # If we execute any instruction, we pop from queues if s.DXM_status == PipelineStatus.work: s.F_DXM_queue.deq() s.imemresp_q.deq() s.rd = b5(0) @s.update def W(): s.commit_inst = Bits1(0) s.W_status = PipelineStatus.idle if s.DXM_W_queue.deq.rdy(): entry = s.DXM_W_queue.peek() if entry is not None: rd, data, entry_type = entry s.rd = rd if entry_type == DXM_W.mem: if s.dmemresp_q.deq.rdy(): if rd > 0: # load s.R[rd] = Bits32(s.dmemresp_q.deq().data) else: # store s.dmemresp_q.deq() s.W_status = PipelineStatus.work else: s.W_status = PipelineStatus.stall elif entry_type == DXM_W.mem_signed: if s.dmemresp_q.deq.rdy(): if rd > 0: # load s.R[rd] = Bits32( sext(s.dmemresp_q.deq().data, 32)) else: # store s.dmemresp_q.deq() s.W_status = PipelineStatus.work else: s.W_status = PipelineStatus.stall elif entry_type == DXM_W.xcel: if s.xcelresp_q.deq.rdy(): if rd > 0: # csrr s.R[rd] = Bits32(s.xcelresp_q.deq().data) else: # csrw s.xcelresp_q.deq() s.W_status = PipelineStatus.work else: s.W_status = PipelineStatus.stall elif entry_type == DXM_W.mngr: if s.proc2mngr.rdy(): s.proc2mngr(data) s.W_status = PipelineStatus.work else: s.W_status = PipelineStatus.stall else: # other WB insts assert entry_type == DXM_W.arith if rd > 0: s.R[rd] = Bits32(data) s.W_status = PipelineStatus.work else: # non-WB insts s.W_status = PipelineStatus.work if s.W_status == PipelineStatus.work: s.DXM_W_queue.deq() s.commit_inst = Bits1(1)
def test_basic_1x2(dump_vcd, test_verilog): # Instantiate and elaborate the model model = Router(mk_mem_msg(8, 32, 32)[1], 2) config_model(model, dump_vcd, test_verilog) model.elaborate() # We can call apply if we are 100% sure the top level is not tagged model.apply(TranslationImportPass()) # Create a simulator model.apply(SimulationPass()) # Reset test harness model.sim_reset(print_line_trace=True) # Helper function def t(in_, out): # Write the input value to the input ports model.in_.en = in_[0] model.in_.msg = in_[1] model.out[0].rdy = in_[2] model.out[1].rdy = in_[3] # Ensure that all combinational concurrent blocks are called model.eval_combinational() # Display line trace model.print_line_trace() # Verify reference output port assert model.in_.rdy == out[0] assert model.out[0].en == out[1] assert model.out[0].msg == out[2] assert model.out[1].en == out[3] assert model.out[1].msg == out[4] # Tick simulator by one cycle model.tick() # Helper function to make messages def msg(type_, opaque, test, len_, data): return mk_mem_msg(8, 32, 32)[1](type_, opaque, test, len_, data) # Cycle-by-cycle tests # in_ in_ out[0] out[1] in_ out[0] out[0] out[1] out[1] # .en .msg .rdy rdy .rdy .en .msg .en .msg t([1, msg(0, 0, 0, 0, 4), 1, 1], [1, 1, msg(0, 0, 0, 0, 4), 0, msg(0, 0, 0, 0, 0)]) t([1, msg(0, 1, 0, 0, 5), 1, 1], [1, 0, msg(0, 0, 0, 0, 0), 1, msg(0, 1, 0, 0, 5)]) t([0, msg(0, 1, 0, 0, 5), 1, 1], [1, 0, msg(0, 0, 0, 0, 0), 0, msg(0, 0, 0, 0, 0)]) t([1, msg(0, 1, 0, 0, 6), 1, 1], [1, 0, msg(0, 0, 0, 0, 0), 1, msg(0, 1, 0, 0, 6)]) t([1, msg(0, 0, 0, 0, 7), 0, 1], [1, 0, msg(0, 0, 0, 0, 0), 0, msg(0, 0, 0, 0, 0)]) t([0, msg(0, 1, 0, 0, 8), 0, 1], [0, 0, msg(0, 0, 0, 0, 0), 0, msg(0, 0, 0, 0, 0)]) t([0, msg(0, 1, 0, 0, 8), 1, 1], [0, 1, msg(0, 0, 0, 0, 7), 0, msg(0, 0, 0, 0, 0)]) t([1, msg(0, 1, 0, 0, 9), 0, 1], [1, 0, msg(0, 0, 0, 0, 0), 1, msg(0, 1, 0, 0, 9)]) model.tick() model.tick() model.tick()
def msg(type_, opaque, test, len_, data): return mk_mem_msg(8, 32, 32)[1](type_, opaque, test, len_, data)
def construct(s, num_banks=0): CacheReqType, CacheRespType = mk_mem_msg(8, 32, 32) MemReqType, MemRespType = mk_mem_msg(8, 32, 128) if num_banks <= 0: idx_shamt = 0 else: idx_shamt = clog2(num_banks) # Proc <-> Cache s.cache = MemMinionIfcRTL(CacheReqType, CacheRespType) # Cache <-> Mem s.mem = MemMasterIfcRTL(MemReqType, MemRespType) s.ctrl = BlockingCacheCtrlPRTL(idx_shamt)( # Cache request cachereq_en=s.cache.req.en, cachereq_rdy=s.cache.req.rdy, # Cache response cacheresp_en=s.cache.resp.en, cacheresp_rdy=s.cache.resp.rdy, # Memory request memreq_en=s.mem.req.en, memreq_rdy=s.mem.req.rdy, # Memory response memresp_en=s.mem.resp.en, memresp_rdy=s.mem.resp.rdy, ) s.dpath = BlockingCacheDpathPRTL(idx_shamt)( # Cache request cachereq_msg=s.cache.req.msg, # Cache response cacheresp_msg=s.cache.resp.msg, # Memory request memreq_msg=s.mem.req.msg, # Memory response memresp_msg=s.mem.resp.msg, ) # control signals (ctrl->dpath) s.dpath.amo_sel //= s.ctrl.amo_sel s.dpath.cachereq_enable //= s.ctrl.cachereq_enable s.dpath.memresp_enable //= s.ctrl.memresp_enable s.dpath.is_refill //= s.ctrl.is_refill s.dpath.tag_array_0_wen //= s.ctrl.tag_array_0_wen s.dpath.tag_array_0_ren //= s.ctrl.tag_array_0_ren s.dpath.tag_array_1_wen //= s.ctrl.tag_array_1_wen s.dpath.tag_array_1_ren //= s.ctrl.tag_array_1_ren s.dpath.way_sel //= s.ctrl.way_sel s.dpath.way_sel_current //= s.ctrl.way_sel_current s.dpath.data_array_wen //= s.ctrl.data_array_wen s.dpath.data_array_ren //= s.ctrl.data_array_ren s.dpath.skip_read_data_reg //= s.ctrl.skip_read_data_reg # width of cacheline divided by number of bits per byte s.dpath.data_array_wben //= s.ctrl.data_array_wben s.dpath.read_data_reg_en //= s.ctrl.read_data_reg_en s.dpath.read_tag_reg_en //= s.ctrl.read_tag_reg_en s.dpath.read_byte_sel //= s.ctrl.read_byte_sel s.dpath.memreq_type //= s.ctrl.memreq_type s.dpath.cacheresp_type //= s.ctrl.cacheresp_type s.dpath.cacheresp_hit //= s.ctrl.cacheresp_hit # status signals (dpath->ctrl) s.ctrl.cachereq_type //= s.dpath.cachereq_type s.ctrl.cachereq_addr //= s.dpath.cachereq_addr s.ctrl.tag_match_0 //= s.dpath.tag_match_0 s.ctrl.tag_match_1 //= s.dpath.tag_match_1
def construct(s, num_cores=1): # Configurations MemReqMsg, MemRespMsg = mk_mem_msg(8, 32, 32) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Starting F16 we turn core_id into input ports to # enable module reusability. In the past it was passed as arguments. s.core_id = InPort(Bits32) # Proc/Mngr Interface s.mngr2proc = RecvIfcRTL(Bits32) s.proc2mngr = SendIfcRTL(Bits32) # Instruction Memory Request/Response Interface s.imem = MemMasterIfcRTL(MemReqMsg, MemRespMsg) # Data Memory Request/Response Interface s.dmem = MemMasterIfcRTL(MemReqMsg, MemRespMsg) # Accelerator Request/Response Interface s.xcel = XcelMasterIfcRTL(XcelReqMsg, XcelRespMsg) # val_W port used for counting commited insts. s.commit_inst = OutPort() # stats_en s.stats_en = OutPort() from os import path s.config_placeholder = VerilogPlaceholderConfigs( # Path to the Verilog source file src_file=path.dirname(__file__) + '/ProcVRTL.v', # Name of the Verilog top level module top_module='proc_ProcVRTL', # Parameters of the Verilog module params={'p_num_cores': num_cores}, # Port name map port_map={ 'core_id': 'core_id', 'commit_inst': 'commit_inst', 'stats_en': 'stats_en', 'imem.req.en': 'imemreq_en', 'imem.req.rdy': 'imemreq_rdy', 'imem.req.msg': 'imemreq_msg', 'imem.resp.en': 'imemresp_en', 'imem.resp.rdy': 'imemresp_rdy', 'imem.resp.msg': 'imemresp_msg', 'dmem.req.en': 'dmemreq_en', 'dmem.req.rdy': 'dmemreq_rdy', 'dmem.req.msg': 'dmemreq_msg', 'dmem.resp.en': 'dmemresp_en', 'dmem.resp.rdy': 'dmemresp_rdy', 'dmem.resp.msg': 'dmemresp_msg', 'xcel.req.en': 'xcelreq_en', 'xcel.req.rdy': 'xcelreq_rdy', 'xcel.req.msg': 'xcelreq_msg', 'xcel.resp.en': 'xcelresp_en', 'xcel.resp.rdy': 'xcelresp_rdy', 'xcel.resp.msg': 'xcelresp_msg', 'proc2mngr.en': 'proc2mngr_en', 'proc2mngr.rdy': 'proc2mngr_rdy', 'proc2mngr.msg': 'proc2mngr_msg', 'mngr2proc.en': 'mngr2proc_en', 'mngr2proc.rdy': 'mngr2proc_rdy', 'mngr2proc.msg': 'mngr2proc_msg', }, ) s.config_verilog_import = VerilatorImportConfigs( # Enable native Verilog line trace through Verilator vl_line_trace=True, )
def construct(s, num_cores=1): MemReqMsg, MemRespMsg = mk_mem_msg(8, 32, 32) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Starting F16 we turn core_id into input ports to # enable module reusability. In the past it was passed as arguments. s.core_id = InPort(Bits32) # Proc/Mngr Interface s.mngr2proc = RecvIfcRTL(Bits32) s.proc2mngr = SendIfcRTL(Bits32) # Instruction Memory Request/Response Interface s.imem = MemMasterIfcRTL(MemReqMsg, MemRespMsg) # Data Memory Request/Response Interface s.dmem = MemMasterIfcRTL(MemReqMsg, MemRespMsg) # Accelerator Request/Response Interface s.xcel = XcelMasterIfcRTL(XcelReqMsg, XcelRespMsg) # val_W port used for counting commited insts. s.commit_inst = OutPort() # stats_en s.stats_en = OutPort() #--------------------------------------------------------------------- # Structural composition #--------------------------------------------------------------------- # Bypass queues s.imemreq_q = BypassQueueRTL(MemReqMsg, 2) s.imemreq_q.deq.ret //= s.imem.req.msg @s.update def send_imemreq(): both_rdy = s.imem.req.rdy & s.imemreq_q.deq.rdy s.imemreq_q.deq.en = both_rdy s.imem.req.en = both_rdy # We have to turn input receive interface into get interface s.imemresp_q = BypassQueueRTL(MemRespMsg, 1)(enq=s.imem.resp) s.dmemresp_q = BypassQueueRTL(MemRespMsg, 1)(enq=s.dmem.resp) s.mngr2proc_q = BypassQueueRTL(Bits32, 1)(enq=s.mngr2proc) s.xcelresp_q = BypassQueueRTL(XcelRespMsg, 1)(enq=s.xcel.resp) # imem drop unit s.imemresp_drop_unit = DropUnitPRTL(MemRespMsg)(in_=s.imemresp_q.deq, ) # control logic s.ctrl = ProcCtrlPRTL()( # imem port imemresp_drop=s.imemresp_drop_unit.drop, imemreq_en=s.imemreq_q.enq.en, imemreq_rdy=s.imemreq_q.enq.rdy, imemresp_en=s.imemresp_drop_unit.out.en, imemresp_rdy=s.imemresp_drop_unit.out.rdy, # dmem port dmemreq_en=s.dmem.req.en, dmemreq_rdy=s.dmem.req.rdy, dmemresp_en=s.dmemresp_q.deq.en, dmemresp_rdy=s.dmemresp_q.deq.rdy, # xcel port xcelreq_en=s.xcel.req.en, xcelreq_rdy=s.xcel.req.rdy, xcelresp_en=s.xcelresp_q.deq.en, xcelresp_rdy=s.xcelresp_q.deq.rdy, # proc2mngr and mngr2proc proc2mngr_en=s.proc2mngr.en, proc2mngr_rdy=s.proc2mngr.rdy, mngr2proc_en=s.mngr2proc_q.deq.en, mngr2proc_rdy=s.mngr2proc_q.deq.rdy, # commit inst for counting commit_inst=s.commit_inst, ) # data path s.dpath = ProcDpathPRTL(num_cores)( core_id=s.core_id, stats_en=s.stats_en, # imem ports imemreq_msg=s.imemreq_q.enq.msg, imemresp_msg=s.imemresp_drop_unit.out.ret, # dmem ports dmemresp_msg=s.dmemresp_q.deq.ret, # xcel ports xcelresp_msg=s.xcelresp_q.deq.ret, # mngr mngr2proc_data=s.mngr2proc_q.deq.ret, proc2mngr_data=s.proc2mngr.msg, ) # Connect parameters s.xcel.req.msg //= lambda: XcelReqMsg( s.ctrl.xcelreq_type, s.dpath.xcelreq_addr, s.dpath.xcelreq_data) s.dmem.req.msg //= lambda: MemReqMsg(s.ctrl.dmemreq_type, b8( 0), s.dpath.dmemreq_addr, b2(0), s.dpath.dmemreq_data) # Ctrl <-> Dpath s.ctrl.reg_en_F //= s.dpath.reg_en_F s.ctrl.pc_sel_F //= s.dpath.pc_sel_F s.ctrl.reg_en_D //= s.dpath.reg_en_D s.ctrl.csrr_sel_D //= s.dpath.csrr_sel_D s.ctrl.op1_byp_sel_D //= s.dpath.op1_byp_sel_D s.ctrl.op2_byp_sel_D //= s.dpath.op2_byp_sel_D s.ctrl.op1_sel_D //= s.dpath.op1_sel_D s.ctrl.op2_sel_D //= s.dpath.op2_sel_D s.ctrl.imm_type_D //= s.dpath.imm_type_D s.ctrl.imul_req_en_D //= s.dpath.imul_req_en_D s.ctrl.imul_req_rdy_D //= s.dpath.imul_req_rdy_D s.ctrl.reg_en_X //= s.dpath.reg_en_X s.ctrl.alu_fn_X //= s.dpath.alu_fn_X s.ctrl.ex_result_sel_X //= s.dpath.ex_result_sel_X s.ctrl.imul_resp_en_X //= s.dpath.imul_resp_en_X s.ctrl.imul_resp_rdy_X //= s.dpath.imul_resp_rdy_X s.ctrl.reg_en_M //= s.dpath.reg_en_M s.ctrl.wb_result_sel_M //= s.dpath.wb_result_sel_M s.ctrl.reg_en_W //= s.dpath.reg_en_W s.ctrl.rf_waddr_W //= s.dpath.rf_waddr_W s.ctrl.rf_wen_W //= s.dpath.rf_wen_W s.ctrl.stats_en_wen_W //= s.dpath.stats_en_wen_W s.dpath.inst_D //= s.ctrl.inst_D s.dpath.br_cond_eq_X //= s.ctrl.br_cond_eq_X s.dpath.br_cond_lt_X //= s.ctrl.br_cond_lt_X s.dpath.br_cond_ltu_X //= s.ctrl.br_cond_ltu_X
def construct( s, idx_shamt=0 ): CacheReqType, CacheRespType = mk_mem_msg( 8, 32, 32 ) MemReqType, MemRespType = mk_mem_msg( 8, 32, 128 ) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Cache request s.cachereq_msg = InPort ( CacheReqType ) # Cache response s.cacheresp_msg = OutPort( CacheRespType ) # Memory request s.memreq_msg = OutPort( MemReqType ) # Memory response s.memresp_msg = InPort ( MemRespType ) # control signals (ctrl->dpath) s.amo_sel = InPort( Bits2 ) s.cachereq_enable = InPort() s.memresp_enable = InPort() s.is_refill = InPort() s.tag_array_0_wen = InPort() s.tag_array_0_ren = InPort() s.tag_array_1_wen = InPort() s.tag_array_1_ren = InPort() s.way_sel = InPort() s.way_sel_current = InPort() s.data_array_wen = InPort() s.data_array_ren = InPort() s.skip_read_data_reg = InPort() # width of cacheline divided by number of bits per byte s.data_array_wben = InPort( mk_bits(clw//8/4) ) s.read_data_reg_en = InPort() s.read_tag_reg_en = InPort() s.read_byte_sel = InPort( mk_bits(clog2(clw/dbw)) ) s.memreq_type = InPort( Bits4 ) s.cacheresp_type = InPort( Bits4 ) s.cacheresp_hit = InPort() # status signals (dpath->ctrl) s.cachereq_type = OutPort ( Bits4 ) s.cachereq_addr = OutPort ( mk_bits(abw) ) s.tag_match_0 = OutPort () s.tag_match_1 = OutPort () # Register the unpacked cachereq_msg s.cachereq_type_reg = RegEnRst( Bits4, reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.type_, out = s.cachereq_type ) s.cachereq_addr_reg = RegEnRst( mk_bits(abw), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.addr, out = s.cachereq_addr ) s.cachereq_opaque_reg = RegEnRst( mk_bits(o), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.opaque, out = s.cacheresp_msg.opaque, ) s.cachereq_data_reg = RegEnRst( mk_bits(dbw), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.data, ) # Register the unpacked data from memresp_msg s.memresp_data_reg = RegEnRst( mk_bits(clw), reset_value=0 )( en = s.memresp_enable, in_ = s.memresp_msg.data, ) # Generate cachereq write data which will be the data field or some # calculation with the read data for amos s.cachereq_data_reg_out_add = Wire( mk_bits(dbw) ) s.cachereq_data_reg_out_and = Wire( mk_bits(dbw) ) s.cachereq_data_reg_out_or = Wire( mk_bits(dbw) ) @s.update def comb_connect_wires(): s.cachereq_data_reg_out_add = s.cachereq_data_reg.out + s.read_byte_sel_mux.out s.cachereq_data_reg_out_and = s.cachereq_data_reg.out & s.read_byte_sel_mux.out s.cachereq_data_reg_out_or = s.cachereq_data_reg.out | s.read_byte_sel_mux.out s.amo_sel_mux = Mux( mk_bits(dbw), ninputs=4 )( in_ = { 0: s.cachereq_data_reg.out, 1: s.cachereq_data_reg_out_add, 2: s.cachereq_data_reg_out_and, 3: s.cachereq_data_reg_out_or, }, sel = s.amo_sel, ) # Replicate cachereq_write_data s.cachereq_write_data_replicated = Wire( mk_bits(dbw*clw/dbw) ) for i in range(0, clw, dbw): s.cachereq_write_data_replicated[i:i+dbw] //= s.amo_sel_mux.out # Refill mux s.refill_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.cachereq_write_data_replicated, 1: s.memresp_msg.data, }, sel = s.is_refill, ) # Taking slices of the cache request address # byte offset: 2 bits wide # word offset: 2 bits wide # index: $clog2(nblocks) bits wide - 1 bits wide # nbits: width of tag = width of addr - $clog2(nblocks) - 4 # entries: 256*8/128 = 16 s.cachereq_tag = Wire( mk_bits(abw-4) ) s.cachereq_idx = Wire( mk_bits(idw) ) s.cachereq_tag //= s.cachereq_addr_reg.out[4:abw] s.cachereq_idx //= s.cachereq_addr_reg.out[4:idw_off] # Concat s.temp_cachereq_tag = Wire( mk_bits(abw) ) s.cachereq_msg_addr = Wire( mk_bits(abw) ) s.cur_cachereq_idx = Wire( mk_bits(idw) ) s.data_array_0_wen = Wire() s.data_array_1_wen = Wire() s.sram_tag_0_en = Wire() s.sram_tag_1_en = Wire() s.sram_data_0_en = Wire() s.sram_data_1_en = Wire() @s.update def comb_tag(): s.cachereq_msg_addr = s.cachereq_msg.addr s.temp_cachereq_tag = concat( b4(0), s.cachereq_tag ) if s.cachereq_enable: s.cur_cachereq_idx = s.cachereq_msg_addr[4:idw_off] else: s.cur_cachereq_idx = s.cachereq_idx s.data_array_0_wen = s.data_array_wen & (s.way_sel_current == b1(0)) s.data_array_1_wen = s.data_array_wen & (s.way_sel_current == b1(1)) s.sram_tag_0_en = s.tag_array_0_wen | s.tag_array_0_ren s.sram_tag_1_en = s.tag_array_1_wen | s.tag_array_1_ren s.sram_data_0_en = (s.data_array_wen & (s.way_sel_current == b1(0))) | s.data_array_ren s.sram_data_1_en = (s.data_array_wen & (s.way_sel_current == b1(1))) | s.data_array_ren # Tag array 0 s.tag_array_0_read_out = Wire( mk_bits(abw) ) s.tag_array_0 = SramRTL( 32, 256 )( port0_val = s.sram_tag_0_en, port0_type = s.tag_array_0_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.tag_array_0_read_out, port0_wdata = s.temp_cachereq_tag, ) # Tag array 1 s.tag_array_1_read_out = Wire( mk_bits(abw) ) s.tag_array_1 = SramRTL( 32, 256 )( port0_val = s.sram_tag_1_en, port0_type = s.tag_array_1_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.tag_array_1_read_out, port0_wdata = s.temp_cachereq_tag, ) # Data array 0 s.data_array_0_read_out = Wire( mk_bits(clw) ) s.data_array_0 = SramRTL( 128, 256, mask_size=4 )( port0_val = s.sram_data_0_en, port0_type = s.data_array_0_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.data_array_0_read_out, port0_wben = s.data_array_wben, port0_wdata = s.refill_mux.out, ) # Data array 1 s.data_array_1_read_out = Wire( mk_bits(clw) ) s.data_array_1 = SramRTL( 128, 256, mask_size=4 )( port0_val = s.sram_data_1_en, port0_type = s.data_array_1_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.data_array_1_read_out, port0_wben = s.data_array_wben, port0_wdata = s.refill_mux.out, ) # Data read mux s.data_read_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.data_array_0_read_out, 1: s.data_array_1_read_out, }, sel = s.way_sel_current ) # Eq comparator to check for tag matching (tag_compare_0) s.tag_compare_0 = m = EqComparator( mk_bits(abw - 4) )( in0 = s.cachereq_tag, in1 = s.tag_array_0_read_out[0:abw-4], out = s.tag_match_0, ) # Eq comparator to check for tag matching (tag_compare_1) s.tag_compare_1 = m = EqComparator( mk_bits(abw - 4) )( in0 = s.cachereq_tag, in1 = s.tag_array_1_read_out[0:abw-4], out = s.tag_match_1, ) # Mux that selects between the ways for requesting from memory s.way_sel_mux = Mux( mk_bits(abw - 4), ninputs = 2 )( in_ = { 0: s.tag_array_0_read_out[0:abw-4], 1: s.tag_array_1_read_out[0:abw-4], }, sel = s.way_sel_current ) # Read data register s.read_data_reg = RegEnRst( mk_bits(clw), reset_value=0 )( en = s.read_data_reg_en, in_ = s.data_read_mux.out, out = s.memreq_msg.data, ) # Read tag register s.read_tag_reg = RegEnRst( mk_bits(abw - 4), reset_value=0 )( en = s.read_tag_reg_en, in_ = s.way_sel_mux.out, ) # Memreq Type Mux s.memreq_type_mux_out = Wire( mk_bits(abw - 4) ) s.tag_mux = Mux( mk_bits(abw - 4), ninputs = 2 )( in_ = { 0: s.cachereq_tag, 1: s.read_tag_reg.out, }, sel = s.memreq_type[0], out = s.memreq_type_mux_out, ) # Pack address for memory request s.memreq_addr = Wire( mk_bits(abw) ) @s.update def comb_addr_evict(): s.memreq_addr = concat(s.memreq_type_mux_out, b4(0)) # Skip read data reg mux s.read_data = Wire( mk_bits(clw) ) s.skip_read_data_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.read_data_reg.out, 1: s.data_read_mux.out, }, sel = s.skip_read_data_reg, out = s.read_data, ) # Select byte for cache response s.read_byte_sel_mux = Mux( mk_bits(dbw), ninputs=4 )( in_ = { 0: s.read_data[0: dbw], 1: s.read_data[1*dbw: 2*dbw], 2: s.read_data[2*dbw: 3*dbw], 3: s.read_data[3*dbw: 4*dbw], }, sel = s.read_byte_sel, ) @s.update def comb_addr_refill(): if s.cacheresp_type == b4(0): s.cacheresp_msg.data = s.read_byte_sel_mux.out else : s.cacheresp_msg.data = b32(0) @s.update def comb_cacherespmsgpack(): s.cacheresp_msg.type_ = s.cacheresp_type s.cacheresp_msg.test = concat( b1(0), s.cacheresp_hit ) s.cacheresp_msg.len = b2(0) @s.update def comb_memrespmsgpack(): s.memreq_msg.type_ = s.memreq_type s.memreq_msg.opaque = b8(0) s.memreq_msg.addr = s.memreq_addr s.memreq_msg.len = b4(0)
def msg(type_, opaque, addr, len_, data): return mk_mem_msg(8, 32, 32)[0](type_, opaque, addr, len_, data)
def construct(s): # size is fixed as 64x64 num_bits = 64 num_words = 64 addr_width = clog2(num_words) addr_start = clog2(num_bits / 8) addr_end = addr_start + addr_width BitsAddr = mk_bits(addr_width) BitsData = mk_bits(num_bits) # Default memory message has 8 bits opaque field and 32 bits address. MemReqType, MemRespType = mk_mem_msg(8, 32, num_bits) # Interface s.minion = MemMinionIfcRTL(MemReqType, MemRespType) #--------------------------------------------------------------------- # M0 stage #--------------------------------------------------------------------- s.sram_addr_M0 = Wire(BitsAddr) s.sram_wen_M0 = Wire(Bits1) s.sram_en_M0 = Wire(Bits1) s.sram_wdata_M0 = Wire(BitsData) # translation work around MEM_MSG_TYPE_WRITE = b4(MemMsgType.WRITE) @s.update def comb_M0(): s.sram_addr_M0 = s.minion.req.msg.addr[addr_start:addr_end] s.sram_wen_M0 = s.minion.req.en & (s.minion.req.msg.type_ == MEM_MSG_TYPE_WRITE) s.sram_en_M0 = s.minion.req.en s.sram_wdata_M0 = s.minion.req.msg.data # SRAM s.sram = SramRTL( num_bits, num_words )\ ( port0_idx = s.sram_addr_M0, port0_type = s.sram_wen_M0, port0_val = s.sram_en_M0, port0_wdata = s.sram_wdata_M0, ) #--------------------------------------------------------------------- # M1 stage #--------------------------------------------------------------------- # Pipeline registers s.memreq_val_reg_M1 = RegRst( Bits1 )\ ( in_ = s.minion.req.en ) s.memreq_msg_reg_M1 = Reg( MemReqType ) \ ( in_ = s.minion.req.msg ) # Create the memory response message with data from SRAM if read s.memresp_msg_M1 = Wire(MemRespType) # translation work around MEM_MSG_TYPE_READ = b4(MemMsgType.READ) @s.update def comb_M1a(): s.memresp_msg_M1.type_ = s.memreq_msg_reg_M1.out.type_ s.memresp_msg_M1.opaque = s.memreq_msg_reg_M1.out.opaque s.memresp_msg_M1.test = b2(0) s.memresp_msg_M1.len = s.memreq_msg_reg_M1.out.len if s.memreq_msg_reg_M1.out.type_ == MEM_MSG_TYPE_READ: s.memresp_msg_M1.data = s.sram.port0_rdata else: s.memresp_msg_M1.data = BitsData(0) # Bypass queue s.memresp_q = BypassQueueRTL(MemRespType, num_entries=2) @s.update def comb_M1b(): # enqueue messages into the bypass queue s.memresp_q.enq.en = s.memresp_q.enq.rdy & s.memreq_val_reg_M1.out s.memresp_q.enq.msg = s.memresp_msg_M1 # dequeue messages from the bypass queue s.memresp_q.deq.en = s.memresp_q.deq.rdy & s.minion.resp.rdy s.minion.resp.en = s.memresp_q.deq.rdy & s.minion.resp.rdy s.minion.resp.msg = s.memresp_q.deq.ret # stop the minion interface if not enough skid buffering s.minion.req.rdy = (s.memresp_q.count == b2(0))
def construct(s, num_cores=1): dtype = mk_bits(32) MemReqType, MemRespType = mk_mem_msg(8, 32, 32) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Parameters s.core_id = InPort(dtype) # imem ports s.imemreq_msg = OutPort(MemReqType) s.imemresp_msg = InPort(MemRespType) # dmem ports s.dmemreq_data = OutPort(dtype) s.dmemreq_addr = OutPort(dtype) s.dmemresp_msg = InPort(MemRespType) # mngr ports s.mngr2proc_data = InPort(dtype) s.proc2mngr_data = OutPort(dtype) # xcel ports s.xcelreq_addr = OutPort(Bits5) s.xcelreq_data = OutPort(Bits32) s.xcelresp_msg = InPort(XcelRespMsg) # Control signals (ctrl->dpath) s.reg_en_F = InPort() s.pc_sel_F = InPort(Bits2) s.reg_en_D = InPort() s.op1_byp_sel_D = InPort(Bits2) s.op2_byp_sel_D = InPort(Bits2) s.op1_sel_D = InPort() s.op2_sel_D = InPort(Bits2) s.csrr_sel_D = InPort(Bits2) s.imm_type_D = InPort(Bits3) s.imul_req_en_D = InPort() s.imul_req_rdy_D = OutPort() s.reg_en_X = InPort() s.alu_fn_X = InPort(Bits4) s.ex_result_sel_X = InPort(Bits2) s.imul_resp_en_X = InPort() s.imul_resp_rdy_X = OutPort() s.reg_en_M = InPort() s.wb_result_sel_M = InPort(Bits2) s.reg_en_W = InPort() s.rf_waddr_W = InPort(Bits5) s.rf_wen_W = InPort(Bits1) s.stats_en_wen_W = InPort(Bits1) # Status signals (dpath->Ctrl) s.inst_D = OutPort(dtype) s.br_cond_eq_X = OutPort() s.br_cond_lt_X = OutPort() s.br_cond_ltu_X = OutPort() # stats_en output s.stats_en = OutPort() #--------------------------------------------------------------------- # F stage #--------------------------------------------------------------------- s.pc_F = Wire(dtype) s.pc_plus4_F = Wire(dtype) # PC+4 incrementer s.pc_incr_F = Incrementer(dtype, amount=4)( in_=s.pc_F, out=s.pc_plus4_F, ) # forward delaration for branch target and jal target s.br_target_X = Wire(dtype) s.jal_target_D = Wire(dtype) s.jalr_target_X = Wire(dtype) # PC sel mux s.pc_sel_mux_F = Mux(dtype, ninputs=4)( in_={ 0: s.pc_plus4_F, 1: s.br_target_X, 2: s.jal_target_D, 3: s.jalr_target_X, }, sel=s.pc_sel_F, ) s.imemreq_msg //= lambda: MemReqType(b4(0), b8(0), s.pc_sel_mux_F.out, b2(0), b32(0)) # PC register s.pc_reg_F = RegEnRst(dtype, reset_value=c_reset_vector - 4)( en=s.reg_en_F, in_=s.pc_sel_mux_F.out, out=s.pc_F) #--------------------------------------------------------------------- # D stage #--------------------------------------------------------------------- # PC reg in D stage # This value is basically passed from F stage for the corresponding # instruction to use, e.g. branch to (PC+imm) s.pc_reg_D = RegEnRst(dtype)( en=s.reg_en_D, in_=s.pc_F, ) # Instruction reg s.inst_D_reg = RegEnRst(dtype, reset_value=c_reset_inst)( en=s.reg_en_D, in_=s.imemresp_msg.data, out=s.inst_D, # to ctrl ) # Register File # The rf_rdata_D wires, albeit redundant in some sense, are used to # remind people these data are from D stage. s.rf_rdata0_D = Wire(dtype) s.rf_rdata1_D = Wire(dtype) s.rf_wdata_W = Wire(dtype) s.rf = RegisterFile(dtype, nregs=32, rd_ports=2, wr_ports=1, const_zero=True)( raddr={ 0: s.inst_D[RS1], 1: s.inst_D[RS2], }, rdata={ 0: s.rf_rdata0_D, 1: s.rf_rdata1_D, }, wen={ 0: s.rf_wen_W }, waddr={ 0: s.rf_waddr_W }, wdata={ 0: s.rf_wdata_W }, ) # Immediate generator s.imm_gen_D = ImmGenPRTL()(imm_type=s.imm_type_D, inst=s.inst_D) s.byp_data_X = Wire(Bits32) s.byp_data_M = Wire(Bits32) s.byp_data_W = Wire(Bits32) # op1 bypass mux s.op1_byp_mux_D = Mux(dtype, ninputs=4)(in_={ 0: s.rf_rdata0_D, 1: s.byp_data_X, 2: s.byp_data_M, 3: s.byp_data_W, }, sel=s.op1_byp_sel_D) # op2 bypass mux s.op2_byp_mux_D = Mux(dtype, ninputs=4)( in_={ 0: s.rf_rdata1_D, 1: s.byp_data_X, 2: s.byp_data_M, 3: s.byp_data_W, }, sel=s.op2_byp_sel_D, ) # op1 sel mux s.op1_sel_mux_D = Mux(dtype, ninputs=2)( in_={ 0: s.op1_byp_mux_D.out, 1: s.pc_reg_D.out, }, sel=s.op1_sel_D, ) # csrr sel mux s.csrr_sel_mux_D = Mux(dtype, ninputs=3)( in_={ 0: s.mngr2proc_data, 1: num_cores, 2: s.core_id, }, sel=s.csrr_sel_D, ) # op2 sel mux # This mux chooses among RS2, imm, and the output of the above csrr # sel mux. Basically we are using two muxes here for pedagogy. s.op2_sel_mux_D = Mux(dtype, ninputs=3)( in_={ 0: s.op2_byp_mux_D.out, 1: s.imm_gen_D.imm, 2: s.csrr_sel_mux_D.out, }, sel=s.op2_sel_D, ) # Risc-V always calcs branch/jal target by adding imm(generated above) to PC s.pc_plus_imm_D = Adder(dtype)( in0=s.pc_reg_D.out, in1=s.imm_gen_D.imm, out=s.jal_target_D, ) #--------------------------------------------------------------------- # X stage #--------------------------------------------------------------------- # imul # Since on the datapath diagram it's slightly left to those registers, # I put it at the beginning of the X stage :) s.imul = IntMulScycleRTL() s.imulresp_q = BypassQueueRTL(Bits32, 1)(enq=s.imul.minion.resp) s.imul.minion.req.en //= s.imul_req_en_D s.imul.minion.req.rdy //= s.imul_req_rdy_D s.imul.minion.req.msg.a //= s.op1_sel_mux_D.out s.imul.minion.req.msg.b //= s.op2_sel_mux_D.out s.imulresp_q.deq.en //= s.imul_resp_en_X s.imulresp_q.deq.rdy //= s.imul_resp_rdy_X # br_target_reg_X # Since branches are resolved in X stage, we register the target, # which is already calculated in D stage, to X stage. s.br_target_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.pc_plus_imm_D.out, out=s.br_target_X, ) # PC reg in X stage s.pc_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.pc_reg_D.out, ) # op1 reg s.op1_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op1_sel_mux_D.out, ) # op2 reg s.op2_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op2_sel_mux_D.out, ) s.xcelreq_addr //= s.op2_reg_X.out[0:5] s.xcelreq_data //= s.op1_reg_X.out # dmemreq write data reg # Since the op1 is the base address and op2 is the immediate so that # we could utilize ALU to do address calculation, we need one more # register to hold the R[rs2] we want to store to memory. s.dmem_write_data_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op2_byp_mux_D.out, # R[rs2] out=s.dmemreq_data, ) # ALU s.alu_X = AluPRTL()( in0=s.op1_reg_X.out, in1=s.op2_reg_X.out, fn=s.alu_fn_X, ops_eq=s.br_cond_eq_X, ops_lt=s.br_cond_lt_X, ops_ltu=s.br_cond_ltu_X, out=s.jalr_target_X, ) # PC+4 generator s.pc_incr_X = Incrementer(dtype, amount=4)(in_=s.pc_reg_X.out) # X result sel mux s.ex_result_sel_mux_X = Mux(dtype, ninputs=3)(in_={ 0: s.alu_X.out, 1: s.imul.minion.resp.msg, 2: s.pc_incr_X.out, }, sel=s.ex_result_sel_X, out=(s.byp_data_X, s.dmemreq_addr)) #--------------------------------------------------------------------- # M stage #--------------------------------------------------------------------- # Alu execution result reg s.ex_result_reg_M = RegEnRst(dtype, reset_value=0)( en=s.reg_en_M, in_=s.ex_result_sel_mux_X.out) # Writeback result selection mux s.wb_result_sel_mux_M = Mux(dtype, ninputs=3)( in_={ 0: s.ex_result_reg_M.out, 1: s.dmemresp_msg.data, 2: s.xcelresp_msg.data, }, sel=s.wb_result_sel_M, out=s.byp_data_M, ) #--------------------------------------------------------------------- # W stage #--------------------------------------------------------------------- # Writeback result reg s.wb_result_reg_W = RegEnRst(dtype, reset_value=0)( en=s.reg_en_W, in_=s.wb_result_sel_mux_M.out, out=(s.byp_data_W, s.rf_wdata_W, s.proc2mngr_data), ) # stats_en s.stats_en_reg_W = RegEnRst(dtype, reset_value=0)( en=s.stats_en_wen_W, in_=s.wb_result_reg_W.out, ) s.stats_en //= s.stats_en_reg_W.out[0]
def construct(s): req_class, resp_class = mk_mem_msg(8, 32, 32) # Proc/Mngr Interface s.mngr2proc = RecvIfcRTL(Bits32) s.proc2mngr = SendIfcRTL(Bits32) # Instruction Memory Request/Response Interface s.imem = MemMasterIfcRTL(req_class, resp_class) # Data Memory Request/Response Interface s.dmem = MemMasterIfcRTL(req_class, resp_class) # Xcel Request/Response Interface xreq_class, xresp_class = mk_xcel_msg(5, 32) s.xcel = XcelMasterIfcRTL(xreq_class, xresp_class) # val_W port used for counting commited insts. s.commit_inst = OutPort(Bits1) # imem drop unit s.imemresp_drop = m = DropUnitRTL(Bits32) connect_pairs( m.in_.en, s.imem.resp.en, m.in_.rdy, s.imem.resp.rdy, m.in_.msg, s.imem.resp.msg.data, ) # Bypass queues s.imemreq_q = BypassQueue2RTL(req_class, 2)(deq=s.imem.req) # We have to turn input receive interface into get interface s.imemresp_q = BypassQueueRTL(Bits32, 1)(enq=s.imemresp_drop.out) s.dmemresp_q = BypassQueueRTL(resp_class, 1)(enq=s.dmem.resp) s.mngr2proc_q = BypassQueueRTL(Bits32, 1)(enq=s.mngr2proc) s.xcelresp_q = BypassQueueRTL(xresp_class, 1)(enq=s.xcel.resp) # Control s.ctrl = ProcCtrl()( # imem port imemresp_drop=s.imemresp_drop.drop, imemreq_en=s.imemreq_q.enq.en, imemreq_rdy=s.imemreq_q.enq.rdy, imemresp_en=s.imemresp_q.deq.en, imemresp_rdy=s.imemresp_q.deq.rdy, # dmem port dmemreq_en=s.dmem.req.en, dmemreq_rdy=s.dmem.req.rdy, dmemreq_type=s.dmem.req.msg.type_, dmemresp_en=s.dmemresp_q.deq.en, dmemresp_rdy=s.dmemresp_q.deq.rdy, # xcel port xcelreq_en=s.xcel.req.en, xcelreq_rdy=s.xcel.req.rdy, xcelresp_en=s.xcelresp_q.deq.en, xcelresp_rdy=s.xcelresp_q.deq.rdy, # proc2mngr and mngr2proc proc2mngr_en=s.proc2mngr.en, proc2mngr_rdy=s.proc2mngr.rdy, mngr2proc_en=s.mngr2proc_q.deq.en, mngr2proc_rdy=s.mngr2proc_q.deq.rdy, # commit inst for counting commit_inst=s.commit_inst) # Dpath s.dpath = ProcDpath()( # imem ports imemreq_addr=s.imemreq_q.enq.msg.addr, imemresp_data=s.imemresp_q.deq.msg, # dmem ports dmemreq_addr=s.dmem.req.msg.addr, dmemreq_data=s.dmem.req.msg.data, dmemresp_data=s.dmemresp_q.deq.msg.data, # xcel ports xcelresp_data=s.xcelresp_q.deq.msg.data, # mngr mngr2proc_data=s.mngr2proc_q.deq.msg, proc2mngr_data=s.proc2mngr.msg, ) @s.update def up_xcelreq(): s.xcel.req.msg = xreq_class( s.ctrl.xcelreq_type, s.dpath.xcelreq_addr, s.dpath.xcelreq_data, ) # Ctrl <-> Dpath connect_pairs( s.ctrl.reg_en_F, s.dpath.reg_en_F, s.ctrl.pc_sel_F, s.dpath.pc_sel_F, s.ctrl.reg_en_D, s.dpath.reg_en_D, s.ctrl.op1_byp_sel_D, s.dpath.op1_byp_sel_D, s.ctrl.op2_byp_sel_D, s.dpath.op2_byp_sel_D, s.ctrl.op1_sel_D, s.dpath.op1_sel_D, s.ctrl.op2_sel_D, s.dpath.op2_sel_D, s.ctrl.imm_type_D, s.dpath.imm_type_D, s.ctrl.reg_en_X, s.dpath.reg_en_X, s.ctrl.alu_fn_X, s.dpath.alu_fn_X, s.ctrl.reg_en_M, s.dpath.reg_en_M, s.ctrl.wb_result_sel_M, s.dpath.wb_result_sel_M, s.ctrl.mask_sel_W, s.dpath.mask_sel_W, s.ctrl.reg_en_W, s.dpath.reg_en_W, s.ctrl.rf_waddr_W, s.dpath.rf_waddr_W, s.ctrl.rf_wen_W, s.dpath.rf_wen_W, s.dpath.inst_D, s.ctrl.inst_D, s.dpath.ne_X, s.ctrl.ne_X, )
# RTL models. # # Notice the difference between the TestHarness instances in FL and RTL. # # class TestHarness( Model ): # def __init__( s, src_msgs, sink_msgs, stall_prob, latency, # src_delay, sink_delay, CacheModel, check_test, dump_vcd ) # # The last parameter of TestHarness, check_test is whether or not we # check the test field in the cacheresp. In FL model we don't care about # test field and we set cehck_test to be False because FL model is just # passing through cachereq to mem, so all cachereq sent to the FL model # will be misses, whereas in RTL model we must set cehck_test to be True # so that the test sink will know if we hit the cache properly. CacheReqType, CacheRespType = mk_mem_msg( 8, 32, 32 ) MemReqType, MemRespType = mk_mem_msg( 8, 32, 128 ) #------------------------------------------------------------------------- # TestHarness #------------------------------------------------------------------------- class TestHarness( Component ): def construct( s, dut, check_test ): # Instantiate models s.src = TestSrcCL( CacheReqType ) s.cache = dut s.mem = MemoryCL( 1, [ (MemReqType, MemRespType) ] )