def __init__(s): # interface s.in_control = InValRdyBundle(inst_msg()) # control word s.memresp = InValRdyBundle(MemMsg(32, nWid).resp) # 0,1: right/left neighbors # 2,3: length-2 right/left PEs # 4,5: length-4 right/left PEs s.in_neighbor = InValRdyBundle[6](nWid) s.memreq = OutValRdyBundle(MemMsg(32, nWid).req) s.out_fsm = OutValRdyBundle(1) # response to FSM s.out_neighbor = OutValRdyBundle[6](nWid) # Queues s.memreq_q = SingleElementBypassQueue(MemReqMsg(32, nWid)) s.connect(s.memreq, s.memreq_q.deq) #s.memresp_q = SingleElementPipelinedQueue( MemRespMsg(16) ) s.memresp_q = SingleElementBypassQueue(MemRespMsg(nWid)) s.connect(s.memresp, s.memresp_q.enq) # temporarily store destination for non-blocking loads s.memdes_q = NormalQueue(nMemInst, nDesField) # PE local register file s.rf = RegisterFile(nWid, nReg, 2) # 2 read ports # approx_mul # input is done by reqMsg(src0, src1) done in control plane. s.approx_mul = CombinationalApproxMult(nWid / 2, 4) s.mul_msg = MymultReqMsg(nWid / 2) # temporary variables s.src0_tmp = Wire(nWid) s.src1_tmp = Wire(nWid) s.des_tmp = Wire(nWid) s.go = Wire(1) s.go_req = Wire(1) s.go_resp = Wire(1) s.go_mul = Wire(1) s.reqsent = RegRst(1, 0)
def __init__( s, num_entries, dtype ): s.enq_bits = InPort ( dtype ) s.deq_bits = OutPort ( dtype ) # Control signal (ctrl -> dpath) addr_nbits = clog2( num_entries ) s.wen = InPort ( 1 ) s.waddr = InPort ( addr_nbits ) s.raddr = InPort ( addr_nbits ) # Queue storage s.queue = RegisterFile( dtype, num_entries ) # Connect queue storage s.connect( s.queue.rd_addr[0], s.raddr ) s.connect( s.queue.rd_data[0], s.deq_bits ) s.connect( s.queue.wr_en, s.wen ) s.connect( s.queue.wr_addr, s.waddr ) s.connect( s.queue.wr_data, s.enq_bits )
def __init__(s, num_entries, Type): s.enq_bits = InVPort(Type) s.deq_bits = OutVPort(Type) # Control signal (ctrl -> dpath) addr_nbits = clog2(num_entries) s.wen = InVPort(Bits1) s.waddr = InVPort(mk_bits(addr_nbits)) s.raddr = InVPort(mk_bits(addr_nbits)) # Queue storage s.queue = RegisterFile(Type, num_entries) # Connect queue storage s.connect(s.queue.raddr[0], s.raddr) s.connect(s.queue.rdata[0], s.deq_bits) s.connect(s.queue.wen[0], s.wen) s.connect(s.queue.waddr[0], s.waddr) s.connect(s.queue.wdata[0], s.enq_bits)
def __init__( s ): # interface s.in_control = InValRdyBundle (inst_msg() ) # control word s.in_neighbor = InValRdyBundle[2] (nWid) s.out_fsm = OutValRdyBundle (1) # response to FSM s.out_neighbor = OutValRdyBundle[2] (nWid) s.ocmreqs = OutValRdyBundle (MemMsg(8,32,nWid)) s.ocmresps = InValRdyBundle (MempMsg(8, nWid)) # PE local register file s.rf = RegisterFile( nWid, nReg, 2, 1 ) # temporary variables s.src0_tmp = Wire( nWid ) s.src1_tmp = Wire( nWid ) s.des_tmp = Wire( nWid ) s.go = Wire( 1 )
def __init__(s, num_cores=1): #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Parameters s.core_id = InPort(32) # imem ports s.imemreq_msg = OutPort(MemReqMsg4B) s.imemresp_msg_data = InPort(32) # dmem ports s.dmemreq_msg_addr = OutPort(32) s.dmemreq_msg_data = OutPort(32) s.dmemresp_msg_data = InPort(32) # mngr ports s.mngr2proc_data = InPort(32) s.proc2mngr_data = OutPort(32) # Control signals (ctrl->dpath) s.reg_en_F = InPort(1) s.pc_sel_F = InPort(2) s.reg_en_D = InPort(1) s.op1_sel_D = InPort(1) s.op2_sel_D = InPort(2) s.csrr_sel_D = InPort(2) s.imm_type_D = InPort(3) s.imul_req_val_D = InPort(1) s.reg_en_X = InPort(1) s.alu_fn_X = InPort(4) s.ex_result_sel_X = InPort(2) s.imul_resp_rdy_X = InPort(1) s.reg_en_M = InPort(1) s.wb_result_sel_M = InPort(1) s.reg_en_W = InPort(1) s.rf_waddr_W = InPort(5) s.rf_wen_W = InPort(1) s.stats_en_wen_W = InPort(1) # Status signals (dpath->Ctrl) s.inst_D = OutPort(32) s.imul_req_rdy_D = OutPort(1) s.br_cond_eq_X = OutPort(1) s.br_cond_ltu_X = OutPort(1) s.br_cond_lt_X = OutPort(1) s.imul_resp_val_X = OutPort(1) # stats_en output s.stats_en = OutPort(1) #--------------------------------------------------------------------- # F stage #--------------------------------------------------------------------- s.pc_F = Wire(32) s.pc_plus4_F = Wire(32) # PC+4 incrementer s.pc_incr_F = m = Incrementer(nbits=32, increment_amount=4) s.connect_pairs(m.in_, s.pc_F, m.out, s.pc_plus4_F) # forward delaration for branch target and jal target s.br_target_X = Wire(32) s.jal_target_D = Wire(32) s.jalr_target_X = Wire(32) # PC sel mux s.pc_sel_mux_F = m = Mux(dtype=32, nports=4) s.connect_pairs(m.in_[0], s.pc_plus4_F, m.in_[1], s.br_target_X, m.in_[2], s.jal_target_D, m.in_[3], s.jalr_target_X, m.sel, s.pc_sel_F) @s.combinational def imem_req_F(): s.imemreq_msg.addr.value = s.pc_sel_mux_F.out # PC register s.pc_reg_F = m = RegEnRst(dtype=32, reset_value=c_reset_vector - 4) s.connect_pairs(m.en, s.reg_en_F, m.in_, s.pc_sel_mux_F.out, m.out, s.pc_F) #--------------------------------------------------------------------- # D stage #--------------------------------------------------------------------- # PC reg in D stage # This value is basically passed from F stage for the corresponding # instruction to use, e.g. branch to (PC+imm) s.pc_reg_D = m = RegEnRst(dtype=32) s.connect_pairs( m.en, s.reg_en_D, m.in_, s.pc_F, ) # Instruction reg s.inst_D_reg = m = RegEnRst(dtype=32, reset_value=c_reset_inst) s.connect_pairs( m.en, s.reg_en_D, m.in_, s.imemresp_msg_data, m.out, s.inst_D # to ctrl ) # Register File # The rf_rdata_D wires, albeit redundant in some sense, are used to # remind people these data are from D stage. s.rf_rdata0_D = Wire(32) s.rf_rdata1_D = Wire(32) s.rf_wdata_W = Wire(32) s.rf = m = RegisterFile(dtype=32, nregs=32, rd_ports=2, const_zero=True) s.connect_pairs(m.rd_addr[0], s.inst_D[RS1], m.rd_addr[1], s.inst_D[RS2], m.rd_data[0], s.rf_rdata0_D, m.rd_data[1], s.rf_rdata1_D, m.wr_en, s.rf_wen_W, m.wr_addr, s.rf_waddr_W, m.wr_data, s.rf_wdata_W) # Immediate generator s.imm_gen_D = m = ImmGenPRTL() s.connect_pairs(m.imm_type, s.imm_type_D, m.inst, s.inst_D) # csrr sel mux s.csrr_sel_mux_D = m = Mux(dtype=32, nports=3) s.connect_pairs( m.in_[0], s.mngr2proc_data, m.in_[1], num_cores, m.in_[2], s.core_id, m.sel, s.csrr_sel_D, ) # op1 sel mux s.op1_sel_mux_D = m = Mux(dtype=32, nports=2) s.connect_pairs( m.in_[0], s.rf_rdata0_D, m.in_[1], s.pc_reg_D.out, m.sel, s.op1_sel_D, ) # op2 sel mux # This mux chooses among RS2, imm, and the output of the above csrr # sel mux. Basically we are using two muxes here for pedagogy. s.op2_sel_mux_D = m = Mux(dtype=32, nports=3) s.connect_pairs( m.in_[0], s.rf_rdata1_D, m.in_[1], s.imm_gen_D.imm, m.in_[2], s.csrr_sel_mux_D.out, m.sel, s.op2_sel_D, ) # Risc-V always calcs branch/jal target by adding imm(generated above) to PC s.pc_plus_imm_D = m = Adder(32) s.connect_pairs( m.in0, s.pc_reg_D.out, m.in1, s.imm_gen_D.imm, m.out, s.jal_target_D, ) #--------------------------------------------------------------------- # X stage #--------------------------------------------------------------------- # br_target_reg_X # Since branches are resolved in X stage, we register the target, # which is already calculated in D stage, to X stage. s.br_target_reg_X = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs(m.en, s.reg_en_X, m.in_, s.pc_plus_imm_D.out, m.out, s.br_target_X) # op1 reg s.op1_reg_X = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_X, m.in_, s.op1_sel_mux_D.out, ) # op2 reg s.op2_reg_X = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_X, m.in_, s.op2_sel_mux_D.out, ) # dmemreq data reg s.dmem_write_data_reg_X = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_X, m.in_, s.rf_rdata1_D, ) # pc reg s.pc_reg_X = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_X, m.in_, s.pc_reg_D.out, ) # ALU s.alu_X = m = AluPRTL() s.connect_pairs( m.in0, s.op1_reg_X.out, m.in1, s.op2_reg_X.out, m.fn, s.alu_fn_X, m.ops_eq, s.br_cond_eq_X, m.ops_ltu, s.br_cond_ltu_X, m.ops_lt, s.br_cond_lt_X, m.out, s.jalr_target_X, ) # Multiplier s.imul_X = m = IntMulAltRTL() s.connect_pairs( m.req.msg[0:32], s.op1_sel_mux_D.out, m.req.msg[32:64], s.op2_sel_mux_D.out, m.req.val, s.imul_req_val_D, m.req.rdy, s.imul_req_rdy_D, m.resp.val, s.imul_resp_val_X, m.resp.rdy, s.imul_resp_rdy_X, ) # PC+4 Incrementer s.pc_incr_X = m = Incrementer(nbits=32, increment_amount=4) s.connect_pairs( m.in_, s.pc_reg_X.out, ) # ex result Mux s.ex_result_sel_mux_X = m = Mux(dtype=32, nports=3) s.connect_pairs( m.in_[0], s.pc_incr_X.out, m.in_[1], s.alu_X.out, m.in_[2], s.imul_X.resp.msg, m.sel, s.ex_result_sel_X, ) # dmemreq address s.connect(s.dmemreq_msg_addr, s.alu_X.out) s.connect(s.dmemreq_msg_data, s.dmem_write_data_reg_X.out) #--------------------------------------------------------------------- # M stage #--------------------------------------------------------------------- # Alu execution result reg s.ex_result_reg_M = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_M, m.in_, s.ex_result_sel_mux_X.out, ) # Writeback result selection mux s.wb_result_sel_mux_M = m = Mux(dtype=32, nports=2) s.connect_pairs(m.in_[0], s.ex_result_reg_M.out, m.in_[1], s.dmemresp_msg_data, m.sel, s.wb_result_sel_M) #--------------------------------------------------------------------- # W stage #--------------------------------------------------------------------- # Writeback result reg s.wb_result_reg_W = m = RegEnRst(dtype=32, reset_value=0) s.connect_pairs( m.en, s.reg_en_W, m.in_, s.wb_result_sel_mux_M.out, ) s.connect(s.proc2mngr_data, s.wb_result_reg_W.out) s.connect(s.rf_wdata_W, s.wb_result_reg_W.out) s.stats_en_reg_W = m = RegEnRst(dtype=32, reset_value=0) # stats_en logic s.connect_pairs( m.en, s.stats_en_wen_W, m.in_, s.wb_result_reg_W.out, ) @s.combinational def stats_en_logic_W(): s.stats_en.value = any( s.stats_en_reg_W.out) # reduction with bitwise OR
def __init__(s, k=3): SUM_DATA_SIZE = int(math.ceil(math.log(50 * k, 2))) s.req_msg_data = InPort(RE_DATA_SIZE) s.resp_msg_digit = OutPort(4) # ctrl->dpath s.knn_wr_data_mux_sel = InPort(1) s.knn_wr_addr = InPort(int(math.ceil(math.log(k * DIGIT, 2)))) # max 30 s.knn_rd_addr = InPort(int(math.ceil(math.log(k * DIGIT, 2)))) # max 30 s.knn_wr_en = InPort(1) s.vote_wr_data_mux_sel = InPort(1) s.vote_wr_addr = InPort(int(math.ceil(math.log(DIGIT, 2)))) # max 10 s.vote_rd_addr = InPort(int(math.ceil(math.log(DIGIT, 2)))) # max 10 s.vote_wr_en = InPort(1) s.FindMax_req_val = InPort(1) s.FindMax_resp_rdy = InPort(1) s.FindMin_req_val = InPort(1) s.FindMin_resp_rdy = InPort(1) s.msg_data_reg_en = InPort(1) s.msg_idx_reg_en = InPort(1) # dpath->ctrl s.FindMax_req_rdy = OutPort(1) s.FindMax_resp_val = OutPort(1) s.FindMax_resp_idx = OutPort(int(math.ceil(math.log(k, 2)))) # max 3 s.FindMin_req_rdy = OutPort(1) s.FindMin_resp_val = OutPort(1) s.isSmaller = OutPort(1) # internal wires s.knn_rd_data = Wire(Bits(RE_DATA_SIZE)) s.knn_wr_data = Wire(Bits(RE_DATA_SIZE)) s.subtractor_out = Wire(Bits(SUM_DATA_SIZE)) s.adder_out = Wire(Bits(SUM_DATA_SIZE)) s.vote_rd_data = Wire(Bits(SUM_DATA_SIZE)) s.vote_wr_data = Wire(Bits(SUM_DATA_SIZE)) s.FindMax_req_data = Wire(Bits(RE_DATA_SIZE)) s.FindMax_resp_data = Wire(Bits(RE_DATA_SIZE)) s.FindMin_req_data = Wire(Bits(SUM_DATA_SIZE)) s.FindMin_resp_data = Wire(Bits(SUM_DATA_SIZE)) s.FindMin_resp_idx = Wire(Bits(int(math.ceil(math.log(DIGIT, 2))))) # max 10 # Req msg data Register s.req_msg_data_q = Wire(Bits(RE_DATA_SIZE)) s.req_msg_data_reg = m = RegEnRst(RE_DATA_SIZE) s.connect_dict({ m.en: s.msg_data_reg_en, m.in_: s.req_msg_data, m.out: s.req_msg_data_q }) # knn_wr_data Mux s.knn_wr_data_mux = m = Mux(RE_DATA_SIZE, 2) s.connect_dict({ m.sel: s.knn_wr_data_mux_sel, m.in_[0]: 50, m.in_[1]: s.req_msg_data_q, m.out: s.knn_wr_data }) # register file knn_table s.knn_table = m = RegisterFile(dtype=Bits(RE_DATA_SIZE), nregs=k * DIGIT, rd_ports=1, wr_ports=1, const_zero=False) s.connect_dict({ m.rd_addr[0]: s.knn_rd_addr, m.rd_data[0]: s.knn_rd_data, m.wr_addr: s.knn_wr_addr, m.wr_data: s.knn_wr_data, m.wr_en: s.knn_wr_en }) # vote_wr_data Mux s.vote_wr_data_mux = m = Mux(SUM_DATA_SIZE, 2) s.connect_dict({ m.sel: s.vote_wr_data_mux_sel, m.in_[0]: 50 * k, m.in_[1]: s.adder_out, m.out: s.vote_wr_data }) # register file knn_vote s.knn_vote = m = RegisterFile(dtype=Bits(SUM_DATA_SIZE), nregs=DIGIT, rd_ports=1, wr_ports=1, const_zero=False) s.connect_dict({ m.rd_addr[0]: s.vote_rd_addr, m.rd_data[0]: s.vote_rd_data, m.wr_addr: s.vote_wr_addr, m.wr_data: s.vote_wr_data, m.wr_en: s.vote_wr_en }) # Find max value of knn_table for a given digit s.connect_wire(s.knn_rd_data, s.FindMax_req_data) s.findmax = m = FindMaxPRTL(RE_DATA_SIZE, k) s.connect_dict({ m.req.val: s.FindMax_req_val, m.req.rdy: s.FindMax_req_rdy, m.req.msg.data: s.FindMax_req_data, m.resp.val: s.FindMax_resp_val, m.resp.rdy: s.FindMax_resp_rdy, m.resp.msg.data: s.FindMax_resp_data, m.resp.msg.idx: s.FindMax_resp_idx }) # Less than comparator s.knn_LtComparator = m = LtComparator(RE_DATA_SIZE) s.connect_dict({ m.in0: s.req_msg_data_q, m.in1: s.FindMax_resp_data, m.out: s.isSmaller }) # Zero extender s.FindMax_resp_data_zext = Wire(Bits(SUM_DATA_SIZE)) s.FindMax_resp_data_zexter = m = ZeroExtender(RE_DATA_SIZE, SUM_DATA_SIZE) s.connect_dict({ m.in_: s.FindMax_resp_data, m.out: s.FindMax_resp_data_zext, }) # Subtractor s.subtractor = m = Subtractor(SUM_DATA_SIZE) s.connect_dict({ m.in0: s.vote_rd_data, m.in1: s.FindMax_resp_data_zext, m.out: s.subtractor_out }) # Zero extender s.req_msg_data_zext = Wire(Bits(SUM_DATA_SIZE)) s.req_msg_data_zexter = m = ZeroExtender(RE_DATA_SIZE, SUM_DATA_SIZE) s.connect_dict({ m.in_: s.req_msg_data_q, m.out: s.req_msg_data_zext, }) # Adder s.adder = m = Adder(SUM_DATA_SIZE) s.connect_dict({ m.in0: s.subtractor_out, m.in1: s.req_msg_data_zext, m.cin: 0, m.out: s.adder_out }) # Find min value of knn_vote, return digit s.connect_wire(s.vote_rd_data, s.FindMin_req_data) s.findmin = m = FindMinPRTL(SUM_DATA_SIZE, DIGIT) s.connect_dict({ m.req.val: s.FindMin_req_val, m.req.rdy: s.FindMin_req_rdy, m.req.msg.data: s.FindMin_req_data, m.resp.val: s.FindMin_resp_val, m.resp.rdy: s.FindMin_resp_rdy, m.resp.msg.data: s.FindMin_resp_data, m.resp.msg.digit: s.FindMin_resp_idx }) # Resp idx Register s.resp_msg_idx_q = Wire(Bits(int(math.ceil(math.log(DIGIT, 2))))) s.req_msg_idx_reg = m = RegEnRst(int(math.ceil(math.log(DIGIT, 2)))) s.connect_dict({ m.en: s.msg_idx_reg_en, m.in_: s.FindMin_resp_idx, m.out: s.resp_msg_idx_q }) # connect output idx s.connect(s.resp_msg_idx_q, s.resp_msg_digit)