def construct(s, Type): s.enq = InValRdyIfc(Type) s.deq = OutValRdyIfc(Type) s.buffer = RegEn(Type)(in_=s.enq.msg) s.next_full = Wire(Bits1) s.full = Wire(Bits1) s.byp_mux = Mux(Type, 2)( out=s.deq.msg, in_={ 0: s.enq.msg, 1: s.buffer.out, }, sel=s.full, # full -- buffer.out, empty -- bypass ) @s.update_ff def up_full(): s.full <<= s.next_full @s.update def up_bypq_set_enq_rdy(): s.enq.rdy = ~s.full @s.update def up_bypq_internal(): s.buffer.en = (~s.deq.rdy) & (s.enq.val & s.enq.rdy) s.next_full = (~s.deq.rdy) & s.deq.val # this enables the sender to make enq.val depend on enq.rdy @s.update def up_bypq_set_deq_val(): s.deq.val = s.full | s.enq.val
def construct(s, Type): s.enq = RecvIfcRTL(Type) s.deq = SendIfcRTL(Type) s.buffer = RegEn(Type)(in_=s.enq.msg) s.full = RegRst(Bits1, reset_value=0) s.byp_mux = Mux(Type, 2)( out=s.deq.msg, in_={ 0: s.enq.msg, 1: s.buffer.out, }, sel=s.full.out, # full -- buffer.out, empty -- bypass ) @s.update def up_bypq_set_enq_rdy(): s.enq.rdy = ~s.full.out @s.update def up_bypq_use_enq_en(): s.deq.en = (s.enq.en | s.full.out) & s.deq.rdy s.buffer.en = s.enq.en & ~s.deq.en s.full.in_ = (s.enq.en | s.full.out) & ~s.deq.en
def construct(s): #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- s.in_en = InPort() s.in_a = InPort(Bits32) s.in_b = InPort(Bits32) s.in_result = InPort(Bits32) s.out_en = OutPort() s.out_a = OutPort(Bits32) s.out_b = OutPort(Bits32) s.out_result = OutPort(Bits32) #--------------------------------------------------------------------- # Logic #--------------------------------------------------------------------- # Right shifter s.rshifter = RightLogicalShifter(Bits32)( in_=s.in_b, shamt=1, out=s.out_b, ) # Left shifter s.lshifter = LeftLogicalShifter(Bits32)( in_=s.in_a, shamt=1, out=s.out_a, ) # Adder s.add = Adder(Bits32)( in0=s.in_a, in1=s.in_result, ) # Result mux s.result_mux = Mux(Bits32, 2)( sel=s.in_b[0], in_={ 0: s.in_result, 1: s.add.out }, out=s.out_result, ) # Connect the valid bits s.in_en //= s.out_en
def construct(s, EntryType, num_entries=2): # Interface s.enq_msg = InPort(EntryType) s.deq_msg = OutPort(EntryType) s.wen = InPort(Bits1) s.waddr = InPort(mk_bits(clog2(num_entries))) s.raddr = InPort(mk_bits(clog2(num_entries))) s.mux_sel = InPort(Bits1) # Component s.queue = RegisterFile(EntryType, num_entries)( raddr={ 0: s.raddr }, wen={ 0: s.wen }, waddr={ 0: s.waddr }, wdata={ 0: s.enq_msg }, ) s.mux = Mux(EntryType, 2)( sel=s.mux_sel, in_={ 0: s.queue.rdata[0], 1: s.enq_msg }, out=s.deq_msg, )
def construct(s): #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- s.req_msg_a = InPort(Bits32) s.req_msg_b = InPort(Bits32) s.resp_msg = OutPort(Bits32) # Control signals (ctrl -> dpath) s.a_mux_sel = InPort(mk_bits(A_MUX_SEL_NBITS)) s.b_mux_sel = InPort(mk_bits(B_MUX_SEL_NBITS)) s.result_mux_sel = InPort(mk_bits(RESULT_MUX_SEL_NBITS)) s.result_reg_en = InPort() s.add_mux_sel = InPort(mk_bits(ADD_MUX_SEL_NBITS)) # Status signals (dpath -> ctrl) s.b_lsb = OutPort() s.is_b_zero = OutPort() #--------------------------------------------------------------------- # Struction composition #--------------------------------------------------------------------- # B mux s.rshifter_out = Wire(Bits32) s.b_mux = Mux(Bits32, 2)(sel=s.b_mux_sel, in_={ B_MUX_SEL_RSH: s.rshifter_out, B_MUX_SEL_LD: s.req_msg_b }) # B register s.b_reg = Reg(Bits32)(in_=s.b_mux.out) # B zero comparator s.b_zero_cmp = ZeroComparator(Bits32)( in_=s.b_reg.out, out=s.is_b_zero, ) # Calculate shift amount s.calc_shamt = IntMulVarLatCalcShamtRTL()(in_=s.b_reg.out[0:8], ) # Right shifter s.rshifter = RightLogicalShifter(Bits32, 4)( in_=s.b_reg.out, shamt=s.calc_shamt.out, out=s.rshifter_out, ) # A mux s.lshifter_out = Wire(Bits32) s.a_mux = Mux(Bits32, 2)(sel=s.a_mux_sel, in_={ A_MUX_SEL_LSH: s.lshifter_out, A_MUX_SEL_LD: s.req_msg_a }) # A register s.a_reg = Reg(Bits32)(in_=s.a_mux.out) # Left shifter s.lshifter = LeftLogicalShifter(Bits32, 4)( in_=s.a_reg.out, shamt=s.calc_shamt.out, out=s.lshifter_out, ) # Result mux s.add_mux_out = Wire(Bits32) s.result_mux = Mux(Bits32, 2)(sel=s.result_mux_sel, in_={ RESULT_MUX_SEL_ADD: s.add_mux_out, RESULT_MUX_SEL_0: 0 }) # Result register s.result_reg = RegEn(Bits32)( en=s.result_reg_en, in_=s.result_mux.out, ) # Adder s.add = Adder(Bits32)( in0=s.a_reg.out, in1=s.result_reg.out, ) # Add mux s.add_mux = m = Mux(Bits32, 2)( sel=s.add_mux_sel, in_={ ADD_MUX_SEL_ADD: s.add.out, ADD_MUX_SEL_RESULT: s.result_reg.out }, out=s.add_mux_out, ) # Status signals s.b_lsb //= s.b_reg.out[0] # Connect to output port s.resp_msg //= s.result_reg.out
def construct( s, idx_shamt=0 ): CacheReqType, CacheRespType = mk_mem_msg( 8, 32, 32 ) MemReqType, MemRespType = mk_mem_msg( 8, 32, 128 ) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Cache request s.cachereq_msg = InPort ( CacheReqType ) # Cache response s.cacheresp_msg = OutPort( CacheRespType ) # Memory request s.memreq_msg = OutPort( MemReqType ) # Memory response s.memresp_msg = InPort ( MemRespType ) # control signals (ctrl->dpath) s.amo_sel = InPort( Bits2 ) s.cachereq_enable = InPort() s.memresp_enable = InPort() s.is_refill = InPort() s.tag_array_0_wen = InPort() s.tag_array_0_ren = InPort() s.tag_array_1_wen = InPort() s.tag_array_1_ren = InPort() s.way_sel = InPort() s.way_sel_current = InPort() s.data_array_wen = InPort() s.data_array_ren = InPort() s.skip_read_data_reg = InPort() # width of cacheline divided by number of bits per byte s.data_array_wben = InPort( mk_bits(clw//8/4) ) s.read_data_reg_en = InPort() s.read_tag_reg_en = InPort() s.read_byte_sel = InPort( mk_bits(clog2(clw/dbw)) ) s.memreq_type = InPort( Bits4 ) s.cacheresp_type = InPort( Bits4 ) s.cacheresp_hit = InPort() # status signals (dpath->ctrl) s.cachereq_type = OutPort ( Bits4 ) s.cachereq_addr = OutPort ( mk_bits(abw) ) s.tag_match_0 = OutPort () s.tag_match_1 = OutPort () # Register the unpacked cachereq_msg s.cachereq_type_reg = RegEnRst( Bits4, reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.type_, out = s.cachereq_type ) s.cachereq_addr_reg = RegEnRst( mk_bits(abw), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.addr, out = s.cachereq_addr ) s.cachereq_opaque_reg = RegEnRst( mk_bits(o), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.opaque, out = s.cacheresp_msg.opaque, ) s.cachereq_data_reg = RegEnRst( mk_bits(dbw), reset_value=0 )( en = s.cachereq_enable, in_ = s.cachereq_msg.data, ) # Register the unpacked data from memresp_msg s.memresp_data_reg = RegEnRst( mk_bits(clw), reset_value=0 )( en = s.memresp_enable, in_ = s.memresp_msg.data, ) # Generate cachereq write data which will be the data field or some # calculation with the read data for amos s.cachereq_data_reg_out_add = Wire( mk_bits(dbw) ) s.cachereq_data_reg_out_and = Wire( mk_bits(dbw) ) s.cachereq_data_reg_out_or = Wire( mk_bits(dbw) ) @s.update def comb_connect_wires(): s.cachereq_data_reg_out_add = s.cachereq_data_reg.out + s.read_byte_sel_mux.out s.cachereq_data_reg_out_and = s.cachereq_data_reg.out & s.read_byte_sel_mux.out s.cachereq_data_reg_out_or = s.cachereq_data_reg.out | s.read_byte_sel_mux.out s.amo_sel_mux = Mux( mk_bits(dbw), ninputs=4 )( in_ = { 0: s.cachereq_data_reg.out, 1: s.cachereq_data_reg_out_add, 2: s.cachereq_data_reg_out_and, 3: s.cachereq_data_reg_out_or, }, sel = s.amo_sel, ) # Replicate cachereq_write_data s.cachereq_write_data_replicated = Wire( mk_bits(dbw*clw/dbw) ) for i in range(0, clw, dbw): s.cachereq_write_data_replicated[i:i+dbw] //= s.amo_sel_mux.out # Refill mux s.refill_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.cachereq_write_data_replicated, 1: s.memresp_msg.data, }, sel = s.is_refill, ) # Taking slices of the cache request address # byte offset: 2 bits wide # word offset: 2 bits wide # index: $clog2(nblocks) bits wide - 1 bits wide # nbits: width of tag = width of addr - $clog2(nblocks) - 4 # entries: 256*8/128 = 16 s.cachereq_tag = Wire( mk_bits(abw-4) ) s.cachereq_idx = Wire( mk_bits(idw) ) s.cachereq_tag //= s.cachereq_addr_reg.out[4:abw] s.cachereq_idx //= s.cachereq_addr_reg.out[4:idw_off] # Concat s.temp_cachereq_tag = Wire( mk_bits(abw) ) s.cachereq_msg_addr = Wire( mk_bits(abw) ) s.cur_cachereq_idx = Wire( mk_bits(idw) ) s.data_array_0_wen = Wire() s.data_array_1_wen = Wire() s.sram_tag_0_en = Wire() s.sram_tag_1_en = Wire() s.sram_data_0_en = Wire() s.sram_data_1_en = Wire() @s.update def comb_tag(): s.cachereq_msg_addr = s.cachereq_msg.addr s.temp_cachereq_tag = concat( b4(0), s.cachereq_tag ) if s.cachereq_enable: s.cur_cachereq_idx = s.cachereq_msg_addr[4:idw_off] else: s.cur_cachereq_idx = s.cachereq_idx s.data_array_0_wen = s.data_array_wen & (s.way_sel_current == b1(0)) s.data_array_1_wen = s.data_array_wen & (s.way_sel_current == b1(1)) s.sram_tag_0_en = s.tag_array_0_wen | s.tag_array_0_ren s.sram_tag_1_en = s.tag_array_1_wen | s.tag_array_1_ren s.sram_data_0_en = (s.data_array_wen & (s.way_sel_current == b1(0))) | s.data_array_ren s.sram_data_1_en = (s.data_array_wen & (s.way_sel_current == b1(1))) | s.data_array_ren # Tag array 0 s.tag_array_0_read_out = Wire( mk_bits(abw) ) s.tag_array_0 = SramRTL( 32, 256 )( port0_val = s.sram_tag_0_en, port0_type = s.tag_array_0_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.tag_array_0_read_out, port0_wdata = s.temp_cachereq_tag, ) # Tag array 1 s.tag_array_1_read_out = Wire( mk_bits(abw) ) s.tag_array_1 = SramRTL( 32, 256 )( port0_val = s.sram_tag_1_en, port0_type = s.tag_array_1_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.tag_array_1_read_out, port0_wdata = s.temp_cachereq_tag, ) # Data array 0 s.data_array_0_read_out = Wire( mk_bits(clw) ) s.data_array_0 = SramRTL( 128, 256, mask_size=4 )( port0_val = s.sram_data_0_en, port0_type = s.data_array_0_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.data_array_0_read_out, port0_wben = s.data_array_wben, port0_wdata = s.refill_mux.out, ) # Data array 1 s.data_array_1_read_out = Wire( mk_bits(clw) ) s.data_array_1 = SramRTL( 128, 256, mask_size=4 )( port0_val = s.sram_data_1_en, port0_type = s.data_array_1_wen, port0_idx = s.cur_cachereq_idx, port0_rdata = s.data_array_1_read_out, port0_wben = s.data_array_wben, port0_wdata = s.refill_mux.out, ) # Data read mux s.data_read_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.data_array_0_read_out, 1: s.data_array_1_read_out, }, sel = s.way_sel_current ) # Eq comparator to check for tag matching (tag_compare_0) s.tag_compare_0 = m = EqComparator( mk_bits(abw - 4) )( in0 = s.cachereq_tag, in1 = s.tag_array_0_read_out[0:abw-4], out = s.tag_match_0, ) # Eq comparator to check for tag matching (tag_compare_1) s.tag_compare_1 = m = EqComparator( mk_bits(abw - 4) )( in0 = s.cachereq_tag, in1 = s.tag_array_1_read_out[0:abw-4], out = s.tag_match_1, ) # Mux that selects between the ways for requesting from memory s.way_sel_mux = Mux( mk_bits(abw - 4), ninputs = 2 )( in_ = { 0: s.tag_array_0_read_out[0:abw-4], 1: s.tag_array_1_read_out[0:abw-4], }, sel = s.way_sel_current ) # Read data register s.read_data_reg = RegEnRst( mk_bits(clw), reset_value=0 )( en = s.read_data_reg_en, in_ = s.data_read_mux.out, out = s.memreq_msg.data, ) # Read tag register s.read_tag_reg = RegEnRst( mk_bits(abw - 4), reset_value=0 )( en = s.read_tag_reg_en, in_ = s.way_sel_mux.out, ) # Memreq Type Mux s.memreq_type_mux_out = Wire( mk_bits(abw - 4) ) s.tag_mux = Mux( mk_bits(abw - 4), ninputs = 2 )( in_ = { 0: s.cachereq_tag, 1: s.read_tag_reg.out, }, sel = s.memreq_type[0], out = s.memreq_type_mux_out, ) # Pack address for memory request s.memreq_addr = Wire( mk_bits(abw) ) @s.update def comb_addr_evict(): s.memreq_addr = concat(s.memreq_type_mux_out, b4(0)) # Skip read data reg mux s.read_data = Wire( mk_bits(clw) ) s.skip_read_data_mux = m = Mux( mk_bits(clw), ninputs=2 )( in_ = { 0: s.read_data_reg.out, 1: s.data_read_mux.out, }, sel = s.skip_read_data_reg, out = s.read_data, ) # Select byte for cache response s.read_byte_sel_mux = Mux( mk_bits(dbw), ninputs=4 )( in_ = { 0: s.read_data[0: dbw], 1: s.read_data[1*dbw: 2*dbw], 2: s.read_data[2*dbw: 3*dbw], 3: s.read_data[3*dbw: 4*dbw], }, sel = s.read_byte_sel, ) @s.update def comb_addr_refill(): if s.cacheresp_type == b4(0): s.cacheresp_msg.data = s.read_byte_sel_mux.out else : s.cacheresp_msg.data = b32(0) @s.update def comb_cacherespmsgpack(): s.cacheresp_msg.type_ = s.cacheresp_type s.cacheresp_msg.test = concat( b1(0), s.cacheresp_hit ) s.cacheresp_msg.len = b2(0) @s.update def comb_memrespmsgpack(): s.memreq_msg.type_ = s.memreq_type s.memreq_msg.opaque = b8(0) s.memreq_msg.addr = s.memreq_addr s.memreq_msg.len = b4(0)
def construct( s ): #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- s.req_msg_a = InPort (Bits16) s.req_msg_b = InPort (Bits16) s.resp_msg = OutPort(Bits16) # Control signals (ctrl -> dpath) s.a_mux_sel = InPort( mk_bits(A_MUX_SEL_NBITS) ) s.a_reg_en = InPort() s.b_mux_sel = InPort( mk_bits(B_MUX_SEL_NBITS) ) s.b_reg_en = InPort() # Status signals (dpath -> ctrl) s.is_b_zero = OutPort() s.is_a_lt_b = OutPort() #--------------------------------------------------------------------- # Structural composition #--------------------------------------------------------------------- # A mux s.sub_out = Wire(Bits16) s.b_reg_out = Wire(Bits16) s.a_mux = Mux( Bits16, 3 )( sel = s.a_mux_sel, in_ = { A_MUX_SEL_IN: s.req_msg_a, A_MUX_SEL_SUB: s.sub_out, A_MUX_SEL_B: s.b_reg_out } ) # A register s.a_reg = RegEn(Bits16)( en = s.a_reg_en, in_ = s.a_mux.out, ) # B mux s.b_mux = Mux( Bits16, 2 )( sel = s.b_mux_sel, in_ = { B_MUX_SEL_A : s.a_reg.out, B_MUX_SEL_IN: s.req_msg_b } ) # B register s.b_reg = RegEn(Bits16)( en = s.b_reg_en, in_ = s.b_mux.out, out = s.b_reg_out, ) # Zero compare s.b_zero = ZeroComparator(Bits16)( in_ = s.b_reg.out, out = s.is_b_zero, ) # Less-than comparator s.a_lt_b = LTComparator(Bits16)( in0 = s.a_reg.out, in1 = s.b_reg.out, out = s.is_a_lt_b, ) # Subtractor s.sub = Subtractor(Bits16)( in0 = s.a_reg.out, in1 = s.b_reg.out, out = s.sub_out, ) # connect to output port s.resp_msg //= s.sub.out
def construct(s, num_cores=1): dtype = mk_bits(32) MemReqType, MemRespType = mk_mem_msg(8, 32, 32) #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Parameters s.core_id = InPort(dtype) # imem ports s.imemreq_msg = OutPort(MemReqType) s.imemresp_msg = InPort(MemRespType) # dmem ports s.dmemreq_data = OutPort(dtype) s.dmemreq_addr = OutPort(dtype) s.dmemresp_msg = InPort(MemRespType) # mngr ports s.mngr2proc_data = InPort(dtype) s.proc2mngr_data = OutPort(dtype) # xcel ports s.xcelreq_addr = OutPort(Bits5) s.xcelreq_data = OutPort(Bits32) s.xcelresp_msg = InPort(XcelRespMsg) # Control signals (ctrl->dpath) s.reg_en_F = InPort() s.pc_sel_F = InPort(Bits2) s.reg_en_D = InPort() s.op1_byp_sel_D = InPort(Bits2) s.op2_byp_sel_D = InPort(Bits2) s.op1_sel_D = InPort() s.op2_sel_D = InPort(Bits2) s.csrr_sel_D = InPort(Bits2) s.imm_type_D = InPort(Bits3) s.imul_req_en_D = InPort() s.imul_req_rdy_D = OutPort() s.reg_en_X = InPort() s.alu_fn_X = InPort(Bits4) s.ex_result_sel_X = InPort(Bits2) s.imul_resp_en_X = InPort() s.imul_resp_rdy_X = OutPort() s.reg_en_M = InPort() s.wb_result_sel_M = InPort(Bits2) s.reg_en_W = InPort() s.rf_waddr_W = InPort(Bits5) s.rf_wen_W = InPort(Bits1) s.stats_en_wen_W = InPort(Bits1) # Status signals (dpath->Ctrl) s.inst_D = OutPort(dtype) s.br_cond_eq_X = OutPort() s.br_cond_lt_X = OutPort() s.br_cond_ltu_X = OutPort() # stats_en output s.stats_en = OutPort() #--------------------------------------------------------------------- # F stage #--------------------------------------------------------------------- s.pc_F = Wire(dtype) s.pc_plus4_F = Wire(dtype) # PC+4 incrementer s.pc_incr_F = Incrementer(dtype, amount=4)( in_=s.pc_F, out=s.pc_plus4_F, ) # forward delaration for branch target and jal target s.br_target_X = Wire(dtype) s.jal_target_D = Wire(dtype) s.jalr_target_X = Wire(dtype) # PC sel mux s.pc_sel_mux_F = Mux(dtype, ninputs=4)( in_={ 0: s.pc_plus4_F, 1: s.br_target_X, 2: s.jal_target_D, 3: s.jalr_target_X, }, sel=s.pc_sel_F, ) s.imemreq_msg //= lambda: MemReqType(b4(0), b8(0), s.pc_sel_mux_F.out, b2(0), b32(0)) # PC register s.pc_reg_F = RegEnRst(dtype, reset_value=c_reset_vector - 4)( en=s.reg_en_F, in_=s.pc_sel_mux_F.out, out=s.pc_F) #--------------------------------------------------------------------- # D stage #--------------------------------------------------------------------- # PC reg in D stage # This value is basically passed from F stage for the corresponding # instruction to use, e.g. branch to (PC+imm) s.pc_reg_D = RegEnRst(dtype)( en=s.reg_en_D, in_=s.pc_F, ) # Instruction reg s.inst_D_reg = RegEnRst(dtype, reset_value=c_reset_inst)( en=s.reg_en_D, in_=s.imemresp_msg.data, out=s.inst_D, # to ctrl ) # Register File # The rf_rdata_D wires, albeit redundant in some sense, are used to # remind people these data are from D stage. s.rf_rdata0_D = Wire(dtype) s.rf_rdata1_D = Wire(dtype) s.rf_wdata_W = Wire(dtype) s.rf = RegisterFile(dtype, nregs=32, rd_ports=2, wr_ports=1, const_zero=True)( raddr={ 0: s.inst_D[RS1], 1: s.inst_D[RS2], }, rdata={ 0: s.rf_rdata0_D, 1: s.rf_rdata1_D, }, wen={ 0: s.rf_wen_W }, waddr={ 0: s.rf_waddr_W }, wdata={ 0: s.rf_wdata_W }, ) # Immediate generator s.imm_gen_D = ImmGenPRTL()(imm_type=s.imm_type_D, inst=s.inst_D) s.byp_data_X = Wire(Bits32) s.byp_data_M = Wire(Bits32) s.byp_data_W = Wire(Bits32) # op1 bypass mux s.op1_byp_mux_D = Mux(dtype, ninputs=4)(in_={ 0: s.rf_rdata0_D, 1: s.byp_data_X, 2: s.byp_data_M, 3: s.byp_data_W, }, sel=s.op1_byp_sel_D) # op2 bypass mux s.op2_byp_mux_D = Mux(dtype, ninputs=4)( in_={ 0: s.rf_rdata1_D, 1: s.byp_data_X, 2: s.byp_data_M, 3: s.byp_data_W, }, sel=s.op2_byp_sel_D, ) # op1 sel mux s.op1_sel_mux_D = Mux(dtype, ninputs=2)( in_={ 0: s.op1_byp_mux_D.out, 1: s.pc_reg_D.out, }, sel=s.op1_sel_D, ) # csrr sel mux s.csrr_sel_mux_D = Mux(dtype, ninputs=3)( in_={ 0: s.mngr2proc_data, 1: num_cores, 2: s.core_id, }, sel=s.csrr_sel_D, ) # op2 sel mux # This mux chooses among RS2, imm, and the output of the above csrr # sel mux. Basically we are using two muxes here for pedagogy. s.op2_sel_mux_D = Mux(dtype, ninputs=3)( in_={ 0: s.op2_byp_mux_D.out, 1: s.imm_gen_D.imm, 2: s.csrr_sel_mux_D.out, }, sel=s.op2_sel_D, ) # Risc-V always calcs branch/jal target by adding imm(generated above) to PC s.pc_plus_imm_D = Adder(dtype)( in0=s.pc_reg_D.out, in1=s.imm_gen_D.imm, out=s.jal_target_D, ) #--------------------------------------------------------------------- # X stage #--------------------------------------------------------------------- # imul # Since on the datapath diagram it's slightly left to those registers, # I put it at the beginning of the X stage :) s.imul = IntMulScycleRTL() s.imulresp_q = BypassQueueRTL(Bits32, 1)(enq=s.imul.minion.resp) s.imul.minion.req.en //= s.imul_req_en_D s.imul.minion.req.rdy //= s.imul_req_rdy_D s.imul.minion.req.msg.a //= s.op1_sel_mux_D.out s.imul.minion.req.msg.b //= s.op2_sel_mux_D.out s.imulresp_q.deq.en //= s.imul_resp_en_X s.imulresp_q.deq.rdy //= s.imul_resp_rdy_X # br_target_reg_X # Since branches are resolved in X stage, we register the target, # which is already calculated in D stage, to X stage. s.br_target_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.pc_plus_imm_D.out, out=s.br_target_X, ) # PC reg in X stage s.pc_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.pc_reg_D.out, ) # op1 reg s.op1_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op1_sel_mux_D.out, ) # op2 reg s.op2_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op2_sel_mux_D.out, ) s.xcelreq_addr //= s.op2_reg_X.out[0:5] s.xcelreq_data //= s.op1_reg_X.out # dmemreq write data reg # Since the op1 is the base address and op2 is the immediate so that # we could utilize ALU to do address calculation, we need one more # register to hold the R[rs2] we want to store to memory. s.dmem_write_data_reg_X = RegEnRst(dtype, reset_value=0)( en=s.reg_en_X, in_=s.op2_byp_mux_D.out, # R[rs2] out=s.dmemreq_data, ) # ALU s.alu_X = AluPRTL()( in0=s.op1_reg_X.out, in1=s.op2_reg_X.out, fn=s.alu_fn_X, ops_eq=s.br_cond_eq_X, ops_lt=s.br_cond_lt_X, ops_ltu=s.br_cond_ltu_X, out=s.jalr_target_X, ) # PC+4 generator s.pc_incr_X = Incrementer(dtype, amount=4)(in_=s.pc_reg_X.out) # X result sel mux s.ex_result_sel_mux_X = Mux(dtype, ninputs=3)(in_={ 0: s.alu_X.out, 1: s.imul.minion.resp.msg, 2: s.pc_incr_X.out, }, sel=s.ex_result_sel_X, out=(s.byp_data_X, s.dmemreq_addr)) #--------------------------------------------------------------------- # M stage #--------------------------------------------------------------------- # Alu execution result reg s.ex_result_reg_M = RegEnRst(dtype, reset_value=0)( en=s.reg_en_M, in_=s.ex_result_sel_mux_X.out) # Writeback result selection mux s.wb_result_sel_mux_M = Mux(dtype, ninputs=3)( in_={ 0: s.ex_result_reg_M.out, 1: s.dmemresp_msg.data, 2: s.xcelresp_msg.data, }, sel=s.wb_result_sel_M, out=s.byp_data_M, ) #--------------------------------------------------------------------- # W stage #--------------------------------------------------------------------- # Writeback result reg s.wb_result_reg_W = RegEnRst(dtype, reset_value=0)( en=s.reg_en_W, in_=s.wb_result_sel_mux_M.out, out=(s.byp_data_W, s.rf_wdata_W, s.proc2mngr_data), ) # stats_en s.stats_en_reg_W = RegEnRst(dtype, reset_value=0)( en=s.stats_en_wen_W, in_=s.wb_result_reg_W.out, ) s.stats_en //= s.stats_en_reg_W.out[0]
def construct( s ): #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # imem ports s.imemreq_addr = OutPort( Bits32 ) s.imemresp_data = InPort ( Bits32 ) # dmem ports s.dmemreq_addr = OutPort( Bits32 ) s.dmemreq_data = OutPort( Bits32 ) s.dmemresp_data = InPort ( Bits32 ) # mngr ports s.mngr2proc_data = InPort ( Bits32 ) s.proc2mngr_data = OutPort( Bits32 ) # xcel ports s.xcelreq_addr = OutPort( Bits5 ) s.xcelreq_data = OutPort( Bits32 ) s.xcelresp_data = InPort ( Bits32 ) # Control signals (ctrl->dpath) s.reg_en_F = InPort ( Bits1 ) s.pc_sel_F = InPort ( Bits1 ) s.reg_en_D = InPort ( Bits1 ) s.op1_byp_sel_D = InPort ( Bits2 ) s.op2_byp_sel_D = InPort ( Bits2 ) s.op2_sel_D = InPort ( Bits2 ) s.imm_type_D = InPort ( Bits3 ) s.reg_en_X = InPort ( Bits1 ) s.alu_fn_X = InPort ( Bits4 ) s.reg_en_M = InPort ( Bits1 ) s.wb_result_sel_M = InPort ( Bits2 ) s.reg_en_W = InPort ( Bits1 ) s.rf_waddr_W = InPort ( Bits5 ) s.rf_wen_W = InPort ( Bits1 ) # Status signals (dpath->Ctrl) s.inst_D = OutPort( Bits32 ) s.ne_X = OutPort( Bits1 ) #--------------------------------------------------------------------- # F stage #--------------------------------------------------------------------- s.pc_F = Wire( Bits32 ) s.pc_plus4_F = Wire( Bits32 ) # PC+4 incrementer s.pc_incr_F = Incrementer( Bits32, amount=4 )( in_ = s.pc_F, out = s.pc_plus4_F, ) # forward delaration for branch target and jal target s.br_target_X = Wire( Bits32 ) # PC sel mux s.pc_sel_mux_F = Mux( Bits32, 2 )( in_ = { 0: s.pc_plus4_F, 1: s.br_target_X }, sel = s.pc_sel_F, out = s.imemreq_addr, ) # PC register s.pc_reg_F = RegEnRst( Bits32, reset_value=c_reset_vector-4 )( en = s.reg_en_F, in_ = s.pc_sel_mux_F.out, out = s.pc_F, ) #--------------------------------------------------------------------- # D stage #--------------------------------------------------------------------- # PC reg in D stage # This value is basically passed from F stage for the corresponding # instruction to use, e.g. branch to (PC+imm) s.pc_reg_D = m = RegEnRst( Bits32 )( en = s.reg_en_D, in_ = s.pc_F ) # Instruction reg s.inst_D_reg = m = RegEnRst( Bits32, reset_value=c_reset_inst )( en = s.reg_en_D, in_ = s.imemresp_data, out = s.inst_D # to ctrl ) # Register File # The rf_rdata_D wires, albeit redundant in some sense, are used to # remind people these data are from D stage. s.rf_rdata0_D = Wire( Bits32 ) s.rf_rdata1_D = Wire( Bits32 ) s.rf_wdata_W = Wire( Bits32 ) s.rf = RegisterFile( Bits32, nregs=32, rd_ports=2, wr_ports=1, const_zero=True )( raddr = { 0: s.inst_D[ RS1 ], 1: s.inst_D[ RS2 ], }, rdata = { 0: s.rf_rdata0_D, 1: s.rf_rdata1_D, }, wen = { 0: s.rf_wen_W }, waddr = { 0: s.rf_waddr_W }, wdata = { 0: s.rf_wdata_W }, ) # Immediate generator s.immgen_D = ImmGenRTL()( imm_type = s.imm_type_D, inst = s.inst_D ) s.bypass_X = Wire( Bits32 ) s.bypass_M = Wire( Bits32 ) s.bypass_W = Wire( Bits32 ) # op1 bypass mux s.op1_byp_mux_D = Mux( Bits32, 4 )( in_ = { 0: s.rf_rdata0_D, 1: s.bypass_X, 2: s.bypass_M, 3: s.bypass_W, }, sel = s.op1_byp_sel_D, ) # op2 bypass mux s.op2_byp_mux_D = Mux( Bits32, 4 )( in_ = { 0: s.rf_rdata1_D, 1: s.bypass_X, 2: s.bypass_M, 3: s.bypass_W, }, sel = s.op2_byp_sel_D, ) # op2 sel mux # This mux chooses among RS2, imm, and the mngr2proc. # Basically we are using two muxes here for pedagogy. s.op2_sel_mux_D = Mux( Bits32, 3 )( in_ = { 0: s.op2_byp_mux_D.out, 1: s.immgen_D.imm, 2: s.mngr2proc_data, }, sel = s.op2_sel_D, ) # Risc-V always calcs branch target by adding imm(generated above) to PC s.pc_plus_imm_D = Adder( Bits32 )( in0 = s.pc_reg_D.out, in1 = s.immgen_D.imm, ) #--------------------------------------------------------------------- # X stage #--------------------------------------------------------------------- # br_target_reg_X # Since branches are resolved in X stage, we register the target, # which is already calculated in D stage, to X stage. s.br_target_reg_X = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_X, in_ = s.pc_plus_imm_D.out, out = s.br_target_X, ) # op1 reg s.op1_reg_X = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_X, in_ = s.op1_byp_mux_D.out, ) # op2 reg s.op2_reg_X = m = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_X, in_ = s.op2_sel_mux_D.out, ) # Send out xcelreq msg s.xcelreq_data //= s.op1_reg_X.out s.xcelreq_addr //= s.op2_reg_X.out[0:5] # store data reg # Since the op1 is the base address and op2 is the immediate so that # we could utilize ALU to do address calculation, we need one more # register to hold the R[rs2] we want to store to memory. s.store_reg_X = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_X, in_ = s.op2_byp_mux_D.out, # R[rs2] out = s.dmemreq_data, ) # ALU s.alu_X = AluRTL()( in0 = s.op1_reg_X.out, in1 = s.op2_reg_X.out, fn = s.alu_fn_X, ops_ne = s.ne_X, out = ( s.bypass_X, s.dmemreq_addr ) ) #--------------------------------------------------------------------- # M stage #--------------------------------------------------------------------- # Alu execution result reg s.ex_result_reg_M = m = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_M, in_ = s.alu_X.out ) # Writeback result selection mux s.wb_result_sel_mux_M = Mux( Bits32, 3 )( in_ = { 0: s.ex_result_reg_M.out, 1: s.dmemresp_data, 2: s.xcelresp_data, }, sel = s.wb_result_sel_M, out = s.bypass_M, ) #--------------------------------------------------------------------- # W stage #--------------------------------------------------------------------- # Writeback result reg s.wb_result_reg_W = RegEnRst( Bits32, reset_value=0 )( en = s.reg_en_W, in_ = s.wb_result_sel_mux_M.out, out = ( s.bypass_W, s.rf_wdata_W, s.proc2mngr_data ), )