Exemplo n.º 1
        def logic():
            self.io.resp.valid @= False
            self.io.req.ready @= False
            self.io.nasti.ar.valid @= False
            self.io.nasti.aw.valid @= False

            d_wen @= False

            data_wen @= False
            data_wdata @= m.UInt[b_bits](0)
            state.I @= state.O

            tags_wen @= False
            v_wen @= False

            if state.O == State.IDLE:
                if self.io.req.valid & self.io.resp.ready:
                    if v_rdata & (tags_rdata == tag):
                        if req.mask.reduce_or():
                            d_wen @= True
                            data_wdata @= write
                            data_wen @= True
                        self.io.req.ready @= True
                        self.io.resp.valid @= True
                        if d_rdata:
                            self.io.nasti.aw.valid @= True
                            state.I @= State.WRITE
                            data_wdata @= 0
                            data_wen @= True
                            self.io.nasti.ar.valid @= True
                            state.I @= State.READ
            elif state.O == State.WRITE:
                if w_done:
                    state.I @= State.WRITE_ACK
            elif state.O == State.WRITE_ACK:
                if self.io.nasti.b.valid:
                    data_wdata @= 0
                    data_wen @= True
                    self.io.nasti.ar.valid @= True
                    state.I @= State.READ
            elif state.O == State.READ:
                if self.io.nasti.r.valid:
                    data_wdata @= read | (
                        m.zext_to(self.io.nasti.r.data.data, b_bits) <<
                        (m.zext_to(r_cnt, b_bits) * nasti_params.x_data_bits))
                    data_wen @= True
                if r_done:
                    tags_wen @= True
                    v_wen @= True
                    state.I @= State.IDLE
Exemplo n.º 2
    def __init__(self, x_len):
        inst = self.io.inst
        Iimm = m.sext_to(m.sint(inst[20:32]), x_len)
        Simm = m.sext_to(m.sint(m.concat(inst[7:12], inst[25:32])), x_len)
        Bimm = m.sext_to(
                m.concat(m.bits(0, 1), inst[8:12], inst[25:31], inst[7],
                         inst[31])), x_len)
        Uimm = m.concat(m.bits(0, 12), inst[12:32])
        Jimm = m.sext_to(
                m.concat(m.bits(0, 1), inst[21:25], inst[25:31], inst[20],
                         inst[12:20], inst[31])), x_len)
        Zimm = m.sint(m.zext_to(inst[15:20], x_len))

        self.io.O @= m.uint(
                    IMM_I: Iimm,
                    IMM_S: Simm,
                    IMM_B: Bimm,
                    IMM_U: Uimm,
                    IMM_J: Jimm,
                    IMM_Z: Zimm
                }, self.io.sel, Iimm & -2))
Exemplo n.º 3
 def logic():
   io.inputMemAddr @= inputStartAddr
   io.inputMemAddrLen @= 0
   # default values required
   state.I @= state.O
   inputAddrLineCount.I @= inputAddrLineCount.O
   inputDataLineCount.I @= inputDataLineCount.O
   outputState.I @= outputState.O
   outputWordCounter.I @= outputWordCounter.O
   if state.O == TopState.inputLengthAddr:
     if io.inputMemAddrReady:
       state.I @= TopState.loadInputLength
   elif state.O == TopState.loadInputLength:
     if io.inputMemBlockValid:
       inputLength.I @= io.inputMemBlock[:32]
       state.I @= TopState.mainLoop
   elif state.O == TopState.mainLoop:
     io.inputMemAddr @= m.zext_to(sl(inputAddrLineCount.O, m.bitutils.clog2(bytesInLine)), 64) + \
       (inputStartAddr + bytesInLine) # final term is start offset of main data stream
     remainingAddrLen = inputLength.O - inputAddrLineCount.O - 1
     io.inputMemAddrLen @= 63 if remainingAddrLen > 63 else remainingAddrLen[:8]
     if io.inputMemAddrReady:
       inputAddrLineCount.I @= inputAddrLineCount.O + 64 if remainingAddrLen > 63 else inputLength.O
     if io.inputMemBlockValid:
       inputDataLineCount.I @= inputDataLineCount.O + 1
       if inputDataLineCount.O == inputLength.O - 1:
         state.I @= TopState.pause
   elif state.O == TopState.pause:
     # required to flush FeaturePair pipeline before shiftMode is set
     state.I @= TopState.writeOutput
   elif state.O == TopState.writeOutput:
     if outputState.O == OutputState.sendingAddr:
       if io.outputMemAddrReady:
         outputState.I @= OutputState.fillingLine
     elif outputState.O == OutputState.fillingLine:
       wordInLine = 0 if m.bit(outputWordsInLine == 1) else \
         outputWordCounter[:max(1, m.bitutils.clog2(outputWordsInLine))]
       if m.bit(wordInLine == outputWordsInLine - 1): # TODO figure out why m.bit is needed here
         outputState.I @= OutputState.sendingLine
       outputWordCounter.I @= outputWordCounter.O + 1
     else: # outputState is sendingLine
       if io.outputMemBlockReady:
         if outputWordCounter.O == numOutputWords:
           state.I @= TopState.finished
           outputState.I @= OutputState.sendingAddr
Exemplo n.º 4
 def logic():
     if io.doShift:
         outputCounter.I @= outputCounter.O + 1  # wraps around
         outputCounter.I @= outputCounter.O  # default required
     if io.shiftMode:
         bram.RADDR @= outputCounter.O + 1 if io.doShift else outputCounter.O
         bram.WDATA @= io.neighborOutputIn
         bram.WADDR @= outputCounter.O
         bram.WE @= io.doShift
         bram.RADDR @= io.inputFeatureTwo.concat(io.inputFeatureOne)
         bram.WDATA @= (
             readData[:32] +
             m.zext_to(lastMetric, 32)).concat(readData[32:] + 1)
         bram.WADDR @= lastFeatureTwo.concat(lastFeatureOne)
         bram.WE @= lastInputValid
Exemplo n.º 5
    class CSR_DUT(m.Circuit):
        io = m.IO(done=m.Out(m.Bit),
        io += m.ClockIO(has_reset=True)

        regs = {}
        for reg in CSR.regs:
            if reg == CSR.mcpuid:
                init = (1 << (ord('I') - ord('A')) | 1 <<
                        (ord('U') - ord('A')))
            elif reg == CSR.mstatus:
                init = (CSR.PRV_M.ext(30) << 4) | (CSR.PRV_M.ext(30) << 1)
            elif reg == CSR.mtvec:
                init = Const.PC_EVEC
                init = 0
            regs[reg] = m.Register(init=BV[32](init), reset_type=m.Reset)()

        csr = CSRGen(x_len)()
        ctrl = Control.Control(x_len)()

        counter = CounterModM(n, n.bit_length())
        inst = m.mux(insts, counter.O)
        ctrl.inst @= inst
        csr.inst @= inst
        csr_cmd = ctrl.csr_cmd
        csr.cmd @= csr_cmd
        csr.illegal @= ctrl.illegal
        csr.st_type @= ctrl.st_type
        csr.ld_type @= ctrl.ld_type
        csr.pc_check @= ctrl.pc_sel == Control.PC_ALU
        csr.pc @= m.mux(pc, counter.O)
        csr.addr @= m.mux(addr, counter.O)
        csr.I @= m.mux(data, counter.O)
        csr.stall @= False
        csr.host.fromhost.valid @= False
        csr.host.fromhost.data @= 0

        # values known statically
        _csr_addr = [csr(inst) for inst in insts]
        _rs1_addr = [rs1(inst) for inst in insts]
        _csr_ro = [((((x >> 11) & 0x1) > 0x0) & (((x >> 10) & 0x1) > 0x0)) |
                   (x == CSR.mtvec) | (x == CSR.mtdeleg) for x in _csr_addr]
        _csr_valid = [x in CSR.regs for x in _csr_addr]
        # should be <= prv in runtime
        _prv_level = [(x >> 8) & 0x3 for x in _csr_addr]
        # should consider prv in runtime
        _is_ecall = [((x & 0x1) == 0x0) & (((x >> 8) & 0x1) == 0x0)
                     for x in _csr_addr]
        _is_ebreak = [((x & 0x1) > 0x0) & (((x >> 8) & 0x1) == 0x0)
                      for x in _csr_addr]
        _is_eret = [((x & 0x1) == 0x0) & (((x >> 8) & 0x1) > 0x0)
                    for x in _csr_addr]
        # should consider pc_check in runtime
        _iaddr_invalid = [((x >> 1) & 0x1) > 0 for x in addr]
        # should consider ld_type & sd_type
        _waddr_invalid = [(((x >> 1) & 0x1) > 0) | ((x & 0x1) > 0)
                          for x in addr]
        _haddr_invalid = [(x & 0x1) > 0 for x in addr]

        # values known at runtime
        csr_addr = m.mux(_csr_addr, counter.O)
        rs1_addr = m.mux(_rs1_addr, counter.O)
        csr_ro = m.mux(_csr_ro, counter.O)
        csr_valid = m.mux(_csr_valid, counter.O)

        wen = (csr_cmd == CSR.W) | (csr_cmd[1] & (rs1_addr != 0))
        prv1 = (regs[CSR.mstatus].O >> 4) & 0x3
        ie1 = (regs[CSR.mstatus].O >> 3) & 0x1
        prv = (regs[CSR.mstatus].O >> 1) & 0x3
        ie = regs[CSR.mstatus].O & 0x1
        prv_inst = csr_cmd == CSR.P
        prv_valid = (m.uint(m.zext_to(m.mux(_prv_level, counter.O), 32)) <=
        iaddr_invalid = m.mux(_iaddr_invalid, counter.O) & csr.pc_check.value()
        laddr_invalid = (m.mux(_haddr_invalid, counter.O) &
                         ((ctrl.ld_type == Control.LD_LH) |
                          (ctrl.ld_type == Control.LD_LHU))
                         | m.mux(_waddr_invalid, counter.O) &
                         (ctrl.ld_type == Control.LD_LW))
        saddr_invalid = (m.mux(_haddr_invalid, counter.O) &
                         (ctrl.st_type == Control.ST_SH)
                         | m.mux(_waddr_invalid, counter.O) &
                         (ctrl.st_type == Control.ST_SW))
        is_ecall = prv_inst & m.mux(_is_ecall, counter.O)
        is_ebreak = prv_inst & m.mux(_is_ebreak, counter.O)
        is_eret = prv_inst & m.mux(_is_eret, counter.O)
        exception = (ctrl.illegal | iaddr_invalid | laddr_invalid
                     | saddr_invalid | (((csr_cmd & 0x3) > 0) &
                                        (~csr_valid | ~prv_valid)) |
                     (csr_ro & wen) | (prv_inst & ~prv_valid) | is_ecall
                     | is_ebreak)
        instret = (inst != nop) & (~exception | is_ecall | is_ebreak)

        rdata = m.dict_lookup({key: value.O
                               for key, value in regs.items()}, csr_addr)
        wdata = m.dict_lookup(
                CSR.W: csr.I.value(),
                CSR.S: (csr.I.value() | rdata),
                CSR.C: (~csr.I.value() & rdata)
            }, csr_cmd)

        # compute state
        regs[CSR.time].I @= regs[CSR.time].O + 1
        regs[CSR.timew].I @= regs[CSR.timew].O + 1
        regs[CSR.mtime].I @= regs[CSR.mtime].O + 1
        regs[CSR.cycle].I @= regs[CSR.cycle].O + 1
        regs[CSR.cyclew].I @= regs[CSR.cyclew].O + 1

        time_max = regs[CSR.time].O.reduce_and()
        # TODO: mtime has same default value as this case (from chisel code)
        # https://github.com/ucb-bar/riscv-mini/blob/release/src/test/scala/CSRTests.scala#L140
        # mtime_reg = regs[CSR.mtime]
        # mtime_reg.I @= m.mux([mtime_reg.O, mtime_reg.O + 1], time_max)

        incr_when(regs[CSR.timeh], time_max)
        incr_when(regs[CSR.timehw], time_max)

        cycle_max = regs[CSR.cycle].O.reduce_and()

        incr_when(regs[CSR.cycleh], cycle_max)
        incr_when(regs[CSR.cyclehw], cycle_max)

        incr_when(regs[CSR.instret], instret)
        incr_when(regs[CSR.instretw], instret)

        instret_max = regs[CSR.instret].O.reduce_and()
        incr_when(regs[CSR.instreth], instret & instret_max)
        incr_when(regs[CSR.instrethw], instret & instret_max)

        cond = ~exception & ~is_eret & wen
        # Assuming these are mutually exclusive, so we don't need chained
        # elsewhen
        update_when(regs[CSR.mstatus], m.zext_to(wdata[0:6], 32),
                    cond & (csr_addr == CSR.mstatus))
                    (m.bits(wdata[7], 32) << 7) | (m.bits(wdata[3], 32) << 3),
                    cond & (csr_addr == CSR.mip))
                    (m.bits(wdata[7], 32) << 7) | (m.bits(wdata[3], 32) << 3),
                    cond & (csr_addr == CSR.mie))
        update_when(regs[CSR.mepc], (wdata >> 2) << 2,
                    cond & (csr_addr == CSR.mepc))
        update_when(regs[CSR.mcause], wdata & (1 << 31 | 0xf),
                    cond & (csr_addr == CSR.mcause))
        update_when(regs[CSR.time], wdata,
                    cond & ((csr_addr == CSR.timew) | (csr_addr == CSR.mtime)))
        update_when(regs[CSR.timew], wdata,
                    cond & ((csr_addr == CSR.timew) | (csr_addr == CSR.mtime)))
        update_when(regs[CSR.mtime], wdata,
                    cond & ((csr_addr == CSR.timew) | (csr_addr == CSR.mtime)))
            regs[CSR.timeh], wdata,
            cond & ((csr_addr == CSR.timehw) | (csr_addr == CSR.mtimeh)))
            regs[CSR.timehw], wdata,
            cond & ((csr_addr == CSR.timehw) | (csr_addr == CSR.mtimeh)))
            regs[CSR.mtimeh], wdata,
            cond & ((csr_addr == CSR.timehw) | (csr_addr == CSR.mtimeh)))
        update_when(regs[CSR.cycle], wdata, cond & (csr_addr == CSR.cyclew))
        update_when(regs[CSR.cyclew], wdata, cond & (csr_addr == CSR.cyclew))
        update_when(regs[CSR.cycleh], wdata, cond & (csr_addr == CSR.cyclehw))
        update_when(regs[CSR.cyclehw], wdata, cond & (csr_addr == CSR.cyclehw))
        update_when(regs[CSR.instret], wdata,
                    cond & (csr_addr == CSR.instretw))
        update_when(regs[CSR.instretw], wdata,
                    cond & (csr_addr == CSR.instretw))
        update_when(regs[CSR.instreth], wdata,
                    cond & (csr_addr == CSR.instrethw))
        update_when(regs[CSR.instrethw], wdata,
                    cond & (csr_addr == CSR.instrethw))
        update_when(regs[CSR.mtimecmp], wdata,
                    cond & (csr_addr == CSR.mtimecmp))
        update_when(regs[CSR.mscratch], wdata,
                    cond & (csr_addr == CSR.mscratch))
        update_when(regs[CSR.mbadaddr], wdata,
                    cond & (csr_addr == CSR.mbadaddr))
        update_when(regs[CSR.mtohost], wdata, cond & (csr_addr == CSR.mtohost))
        update_when(regs[CSR.mfromhost], wdata,
                    cond & (csr_addr == CSR.mfromhost))

        # eret
                    (CSR.PRV_U.zext(30) << 4) | (1 << 3) | (prv1 << 1) | ie1,
                    ~exception & is_eret)

        # TODO: exception logic comes after since it has priority
        Cause = make_Cause(x_len)
        mcause = m.mux([
                        m.mux([Cause.IllegalInst, Cause.Breakpoint],
                        Cause.Ecall + prv,
                    ], is_ecall),
                ], saddr_invalid),
            ], laddr_invalid),
        ], iaddr_invalid)
        update_when(regs[CSR.mcause], mcause, exception)

        update_when(regs[CSR.mepc], (csr.pc.value() >> 2) << 2, exception)
                    (prv << 4) | (ie << 3) | (CSR.PRV_M.zext(30) << 1),
            regs[CSR.mbadaddr], csr.addr.value(),
            exception & (iaddr_invalid | laddr_invalid | saddr_invalid))

        epc = regs[CSR.mepc].O
        evec = regs[CSR.mtvec].O + (prv << 6)

        m.display("*** Counter: %d ***", counter.O)
        m.display("[in] inst: 0x%x, pc: 0x%x, addr: 0x%x, in: 0x%x", csr.inst,
                  csr.pc, csr.addr, csr.I)

            "     cmd: 0x%x, st_type: 0x%x, ld_type: 0x%x, illegal: %d, "
            "pc_check: %d", csr.cmd, csr.st_type, csr.ld_type, csr.illegal,

        m.display("[state] csr addr: %x", csr_addr)

        for reg_addr, reg in regs.items():
            m.display(f" {hex(int(reg_addr))} -> 0x%x", reg.O)

            "[out] read: 0x%x =? 0x%x, epc: 0x%x =? 0x%x, evec: 0x%x ?= "
            "0x%x, expt: %d ?= %d", csr.O, rdata, csr.epc, epc, csr.evec, evec,
            csr.expt, exception)
        io.check @= counter.O.reduce_or()

        io.rdata @= csr.O
        io.expected_rdata @= rdata

        io.epc @= csr.epc
        io.expected_epc @= epc

        io.evec @= csr.evec
        io.expected_evec @= evec

        io.expt @= csr.expt
        io.expected_expt @= exception

        # io.failed @= counter.O.reduce_or() & (
        #     (csr.O != rdata) |
        #     (csr.epc != epc) |
        #     (csr.evec != evec) |
        #     (csr.expt != exception)
        # )
        io.done @= counter.COUT
        for key, reg in regs.items():
            if not reg.I.driven():
                reg.I @= reg.O
Exemplo n.º 6
    def __init__(self, x_len):
        Cause = make_Cause(x_len)

        self.io = io = m.IO(
            # Excpetion
                m.UInt[x_len])) + HostIO(x_len) + m.ClockIO(has_reset=True)

        csr_addr = io.inst[20:32]
        rs1_addr = io.inst[15:20]

        # user counters
        time = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        timeh = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        cycle = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        cycleh = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        instret = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        instreth = m.Register(m.UInt[x_len], reset_type=m.Reset)()

        mcpuid = m.concat(
                1 << (ord('I') - ord('A')) |  # Base ISA
                1 << (ord('U') - ord('A'))),  # User Mode
            BV[x_len - 28](0),
            BV[2](0),  # RV32I
        mimpid = BV[x_len](0)
        mhartid = BV[x_len](0)

        # interrupt enable stack
        PRV = m.Register(m.UInt[len(CSR.PRV_M)],
        PRV1 = m.Register(m.UInt[len(CSR.PRV_M)],
        PRV2 = BV[2](0)
        PRV3 = BV[2](0)
        IE = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        IE1 = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        IE2 = False
        IE3 = False

        # virtualization management field
        VM = BV[5](0)

        # memory privilege
        MPRV = False

        # Extension context status
        XS = BV[2](0)
        FS = BV[2](0)
        SD = BV[1](0)
        mstatus = m.concat(IE.O, PRV.O, IE1.O, PRV1.O, IE2, PRV2, IE3, PRV3,
                           FS, XS, MPRV, VM, BV[x_len - 23](0), SD)
        mtvec = BV[x_len](Const.PC_EVEC)
        mtdeleg = BV[x_len](0)

        # interrupt registers
        MTIP = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        HTIP = False
        STIP = False
        MTIE = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        HTIE = False
        STIE = False
        MSIP = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        HSIP = False
        SSIP = False
        MSIE = m.Register(m.Bit, init=False, reset_type=m.Reset)()
        HSIE = False
        SSIE = False

        mip = m.concat(Bit(False), SSIP, HSIP, MSIP.O, Bit(False), STIP, HTIP,
                       MTIP.O, BV[x_len - 8](0))
        mie = m.concat(Bit(False), SSIE, HSIE, MSIE.O, Bit(False), STIE, HTIE,
                       MTIE.O, BV[x_len - 8](0))

        mtimecmp = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        mscratch = m.Register(m.UInt[x_len], reset_type=m.Reset)()

        mepc = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        mcause = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        mbadaddr = m.Register(m.UInt[x_len], reset_type=m.Reset)()

        mtohost = m.Register(m.UInt[x_len], reset_type=m.Reset)()
        mfromhost = m.Register(m.UInt[x_len], reset_type=m.Reset)()

        io.host.tohost @= mtohost.O
        csr_file = {
            CSR.cycle: cycle.O,
            CSR.time: time.O,
            CSR.instret: instret.O,
            CSR.cycleh: cycleh.O,
            CSR.timeh: timeh.O,
            CSR.instreth: instreth.O,
            CSR.cyclew: cycle.O,
            CSR.timew: time.O,
            CSR.instretw: instret.O,
            CSR.cyclehw: cycleh.O,
            CSR.timehw: timeh.O,
            CSR.instrethw: instreth.O,
            CSR.mcpuid: mcpuid,
            CSR.mimpid: mimpid,
            CSR.mhartid: mhartid,
            CSR.mtvec: mtvec,
            CSR.mtdeleg: mtdeleg,
            CSR.mie: mie,
            CSR.mtimecmp: mtimecmp.O,
            CSR.mtime: time.O,
            CSR.mtimeh: timeh.O,
            CSR.mscratch: mscratch.O,
            CSR.mepc: mepc.O,
            CSR.mcause: mcause.O,
            CSR.mbadaddr: mbadaddr.O,
            CSR.mip: mip,
            CSR.mtohost: mtohost.O,
            CSR.mfromhost: mfromhost.O,
            CSR.mstatus: mstatus,
        out = m.dict_lookup(csr_file, csr_addr)
        io.O @= out

        priv_valid = csr_addr[8:10] <= PRV.O
        priv_inst = io.cmd == CSR.P
        is_E_call = priv_inst & ~csr_addr[0] & ~csr_addr[8]
        is_E_break = priv_inst & csr_addr[0] & ~csr_addr[8]
        is_E_ret = priv_inst & ~csr_addr[0] & csr_addr[8]
        csr_valid = m.reduce(operator.or_,
                             m.bits([csr_addr == key for key in csr_file]))
        csr_RO = (csr_addr[10:12].reduce_and() | (csr_addr == CSR.mtvec) |
                  (csr_addr == CSR.mtdeleg))
        wen = (io.cmd == CSR.W) | io.cmd[1] & rs1_addr.reduce_or()
        wdata = m.dict_lookup(
                CSR.W: io.I,
                CSR.S: out | io.I,
                CSR.C: out & ~io.I
            }, io.cmd)

        iaddr_invalid = io.pc_check & io.addr[1]

        laddr_invalid = m.dict_lookup(
                Control.LD_LW: io.addr[0:2].reduce_or(),
                Control.LD_LH: io.addr[0],
                Control.LD_LHU: io.addr[0]
            }, io.ld_type)

        saddr_invalid = m.dict_lookup(
                Control.ST_SW: io.addr[0:2].reduce_or(),
                Control.ST_SH: io.addr[0]
            }, io.st_type)

        expt = (io.illegal | iaddr_invalid | laddr_invalid | saddr_invalid
                | io.cmd[0:2].reduce_or() & (~csr_valid | ~priv_valid)
                | wen & csr_RO | (priv_inst & ~priv_valid) | is_E_call
                | is_E_break)
        io.expt @= expt

        io.evec @= mtvec + (m.zext_to(PRV.O, x_len) << 6)
        io.epc @= mepc.O

        def logic():
            # Counters
            time.I @= time.O + 1
            timeh.I @= timeh.O
            if time.O.reduce_and():
                timeh.I @= timeh.O + 1

            cycle.I @= cycle.O + 1
            cycleh.I @= cycleh.O
            if cycle.O.reduce_and():
                cycleh.I @= cycleh.O + 1
            instret.I @= instret.O
            is_inst_ret = ((io.inst != Instructions.NOP) &
                           (~expt | is_E_call | is_E_break) & ~io.stall)
            if is_inst_ret:
                instret.I @= instret.O + 1
            instreth.I @= instreth.O
            if is_inst_ret & instret.O.reduce_and():
                instreth.I @= instreth.O + 1

            mbadaddr.I @= mbadaddr.O
            mepc.I @= mepc.O
            mcause.I @= mcause.O
            PRV.I @= PRV.O
            IE.I @= IE.O
            IE1.I @= IE1.O
            PRV1.I @= PRV1.O
            MTIP.I @= MTIP.O
            MSIP.I @= MSIP.O
            MTIE.I @= MTIE.O
            MSIE.I @= MSIE.O
            mtimecmp.I @= mtimecmp.O
            mscratch.I @= mscratch.O
            mtohost.I @= mtohost.O
            mfromhost.I @= mfromhost.O
            if io.host.fromhost.valid:
                mfromhost.I @= io.host.fromhost.data

            if ~io.stall:
                if expt:
                    mepc.I @= io.pc >> 2 << 2
                    if iaddr_invalid:
                        mcause.I @= Cause.InstAddrMisaligned
                    elif laddr_invalid:
                        mcause.I @= Cause.LoadAddrMisaligned
                    elif saddr_invalid:
                        mcause.I @= Cause.StoreAddrMisaligned
                    elif is_E_call:
                        mcause.I @= Cause.Ecall + m.zext_to(PRV.O, x_len)
                    elif is_E_break:
                        mcause.I @= Cause.Breakpoint
                        mcause.I @= Cause.IllegalInst
                    PRV.I @= CSR.PRV_M
                    IE.I @= False
                    PRV1.I @= PRV.O
                    IE1.I @= IE.O
                    if iaddr_invalid | laddr_invalid | saddr_invalid:
                        mbadaddr.I @= io.addr
                elif is_E_ret:
                    PRV.I @= PRV1.O
                    IE.I @= IE1.O
                    PRV1.I @= CSR.PRV_U
                    IE1.I @= True
                elif wen:
                    if csr_addr == CSR.mstatus:
                        PRV1.I @= wdata[4:6]
                        IE1.I @= wdata[3]
                        PRV.I @= wdata[1:3]
                        IE.I @= wdata[0]
                    elif csr_addr == CSR.mip:
                        MTIP.I @= wdata[7]
                        MSIP.I @= wdata[3]
                    elif csr_addr == CSR.mie:
                        MTIE.I @= wdata[7]
                        MSIE.I @= wdata[3]
                    elif csr_addr == CSR.mtime:
                        time.I @= wdata
                    elif csr_addr == CSR.mtimeh:
                        timeh.I @= wdata
                    elif csr_addr == CSR.mtimecmp:
                        mtimecmp.I @= wdata
                    elif csr_addr == CSR.mscratch:
                        mscratch.I @= wdata
                    elif csr_addr == CSR.mepc:
                        mepc.I @= wdata >> 2 << 2
                    elif csr_addr == CSR.mcause:
                        mcause.I @= wdata & (1 << (x_len - 1) | 0xf)
                    elif csr_addr == CSR.mbadaddr:
                        mbadaddr.I @= wdata
                    elif csr_addr == CSR.mtohost:
                        mtohost.I @= wdata
                    elif csr_addr == CSR.mfromhost:
                        mfromhost.I @= wdata
                    elif csr_addr == CSR.cyclew:
                        cycle.I @= wdata
                    elif csr_addr == CSR.timew:
                        time.I @= wdata
                    elif csr_addr == CSR.instretw:
                        instret.I @= wdata
                    elif csr_addr == CSR.cyclehw:
                        cycleh.I @= wdata
                    elif csr_addr == CSR.timehw:
                        timeh.I @= wdata
                    elif csr_addr == CSR.instrethw:
                        instreth.I @= wdata
Exemplo n.º 7
        def logic():
            # Counters
            time.I @= time.O + 1
            timeh.I @= timeh.O
            if time.O.reduce_and():
                timeh.I @= timeh.O + 1

            cycle.I @= cycle.O + 1
            cycleh.I @= cycleh.O
            if cycle.O.reduce_and():
                cycleh.I @= cycleh.O + 1
            instret.I @= instret.O
            is_inst_ret = ((io.inst != Instructions.NOP) &
                           (~expt | is_E_call | is_E_break) & ~io.stall)
            if is_inst_ret:
                instret.I @= instret.O + 1
            instreth.I @= instreth.O
            if is_inst_ret & instret.O.reduce_and():
                instreth.I @= instreth.O + 1

            mbadaddr.I @= mbadaddr.O
            mepc.I @= mepc.O
            mcause.I @= mcause.O
            PRV.I @= PRV.O
            IE.I @= IE.O
            IE1.I @= IE1.O
            PRV1.I @= PRV1.O
            MTIP.I @= MTIP.O
            MSIP.I @= MSIP.O
            MTIE.I @= MTIE.O
            MSIE.I @= MSIE.O
            mtimecmp.I @= mtimecmp.O
            mscratch.I @= mscratch.O
            mtohost.I @= mtohost.O
            mfromhost.I @= mfromhost.O
            if io.host.fromhost.valid:
                mfromhost.I @= io.host.fromhost.data

            if ~io.stall:
                if expt:
                    mepc.I @= io.pc >> 2 << 2
                    if iaddr_invalid:
                        mcause.I @= Cause.InstAddrMisaligned
                    elif laddr_invalid:
                        mcause.I @= Cause.LoadAddrMisaligned
                    elif saddr_invalid:
                        mcause.I @= Cause.StoreAddrMisaligned
                    elif is_E_call:
                        mcause.I @= Cause.Ecall + m.zext_to(PRV.O, x_len)
                    elif is_E_break:
                        mcause.I @= Cause.Breakpoint
                        mcause.I @= Cause.IllegalInst
                    PRV.I @= CSR.PRV_M
                    IE.I @= False
                    PRV1.I @= PRV.O
                    IE1.I @= IE.O
                    if iaddr_invalid | laddr_invalid | saddr_invalid:
                        mbadaddr.I @= io.addr
                elif is_E_ret:
                    PRV.I @= PRV1.O
                    IE.I @= IE1.O
                    PRV1.I @= CSR.PRV_U
                    IE1.I @= True
                elif wen:
                    if csr_addr == CSR.mstatus:
                        PRV1.I @= wdata[4:6]
                        IE1.I @= wdata[3]
                        PRV.I @= wdata[1:3]
                        IE.I @= wdata[0]
                    elif csr_addr == CSR.mip:
                        MTIP.I @= wdata[7]
                        MSIP.I @= wdata[3]
                    elif csr_addr == CSR.mie:
                        MTIE.I @= wdata[7]
                        MSIE.I @= wdata[3]
                    elif csr_addr == CSR.mtime:
                        time.I @= wdata
                    elif csr_addr == CSR.mtimeh:
                        timeh.I @= wdata
                    elif csr_addr == CSR.mtimecmp:
                        mtimecmp.I @= wdata
                    elif csr_addr == CSR.mscratch:
                        mscratch.I @= wdata
                    elif csr_addr == CSR.mepc:
                        mepc.I @= wdata >> 2 << 2
                    elif csr_addr == CSR.mcause:
                        mcause.I @= wdata & (1 << (x_len - 1) | 0xf)
                    elif csr_addr == CSR.mbadaddr:
                        mbadaddr.I @= wdata
                    elif csr_addr == CSR.mtohost:
                        mtohost.I @= wdata
                    elif csr_addr == CSR.mfromhost:
                        mfromhost.I @= wdata
                    elif csr_addr == CSR.cyclew:
                        cycle.I @= wdata
                    elif csr_addr == CSR.timew:
                        time.I @= wdata
                    elif csr_addr == CSR.instretw:
                        instret.I @= wdata
                    elif csr_addr == CSR.cyclehw:
                        cycleh.I @= wdata
                    elif csr_addr == CSR.timehw:
                        timeh.I @= wdata
                    elif csr_addr == CSR.instrethw:
                        instreth.I @= wdata
Exemplo n.º 8
        class DUT(m.Circuit):
            io = m.IO(done=m.Out(m.Bit)) + m.ClockIO(has_reset=True)
            core = Core(
                x_len, data_path_kwargs=m.generator.ParamDict(ImmGen=ImmGen))()
            core.host.fromhost.valid @= False

            # reverse concat because we're using utils with chisel ordering
            _hex = [concat(*reversed(x)) for x in loadmem]
            imem = RegFileBuilder("imem",
                                  1 << 20,
            dmem = RegFileBuilder("dmem",
                                  1 << 20,

            INIT, RUN = False, True

            state = m.Register(init=INIT)()
            cycle = m.Register(m.UInt[32])()

            n = len(_hex)
            counter = CounterModM(n, n.bit_length(), has_ce=True)
            counter.CE @= m.enable(state.O == INIT)
            cntr, done = counter.O, counter.COUT

            iaddr = (core.icache.req.data.addr // (x_len // 8))[:20]
            daddr = (core.dcache.req.data.addr // (x_len // 8))[:20]

            dmem_data = dmem[daddr]
            imem_data = imem[iaddr]
            write = 0
            for i in range(x_len // 8):
                write |= m.zext_to(
                    m.mux([dmem_data, core.dcache.req.data.data],
                          & core.dcache.req.data.mask[i])[8 * i:8 * (i + 1)],
                    32) << (8 * i)

            core.RESET @= m.reset(state.O == INIT)

            core.icache.resp.valid @= state.O == RUN
            core.dcache.resp.valid @= state.O == RUN

            core.icache.resp.data.data @= m.Register(
            core.dcache.resp.data.data @= m.Register(

            chunk = m.mux(_hex, cntr)

            imem.write(m.zext_to(cntr, 20), chunk, m.enable(state.O == INIT))

                m.mux([m.zext_to(cntr, 20), daddr], state.O == INIT),
                m.mux([chunk, write], state.O == INIT),
                m.enable((state.O == INIT)
                         | (core.dcache.req.valid
                            & core.dcache.req.data.mask.reduce_or())))

            def logic():
                state.I @= state.O
                cycle.I @= cycle.O
                if state.O == INIT:
                    if done:
                        state.I @= RUN
                if state.O == RUN:
                    cycle.I @= cycle.O + 1

            debug = False
            if debug:
                m.display("LOADMEM[%x] <= %x", cntr * (x_len // 8),
                          chunk).when(m.posedge(io.CLK)).if_(state.O == INIT)

                m.display("INST[%x] => %x",
                          iaddr * (x_len // 8), dmem_data).when(
                              m.posedge(io.CLK)).if_((state.O == RUN)
                                                     & core.icache.req.valid)

                m.display("MEM[%x] <= %x", daddr * (x_len // 8), write).when(
                        io.CLK)).if_((state.O == RUN) & core.dcache.req.valid
                                     & core.dcache.req.data.mask.reduce_or())

                    "MEM[%x] => %x", daddr * (x_len // 8),
                        io.CLK)).if_((state.O == RUN) & core.dcache.req.valid
                                     & ~core.dcache.req.data.mask.reduce_or())

                m.display("cycles: %d", cycle.O).when(m.posedge(
                    io.CLK)).if_(io.done.value() == 1)
            f.assert_immediate(cycle.O < test.maxcycles)
            io.done @= core.host.tohost != 0
                (core.host.tohost >> 1) == 0,
                failure_msg=("* tohost: %d *", core.host.tohost))
Exemplo n.º 9
    def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int):
        nasti_params = NastiParameters(data_bits=64,

        self.io = m.IO(req=m.Consumer(m.Decoupled[make_CacheReq(x_len)]),
                       nasti=make_NastiIO(nasti_params)) + m.ClockIO()
        size = m.bitutils.clog2(nasti_params.x_data_bits)
        b_bits = b_bytes << 3
        b_len = m.bitutils.clog2(b_bytes)
        s_len = m.bitutils.clog2(n_sets)
        t_len = x_len - (s_len + b_len)
        nasti_params = NastiParameters(data_bits=64,
        data_beats = b_bits // nasti_params.x_data_bits
        length = data_beats - 1

        data = m.Memory(n_sets, m.UInt[b_bits])()
        tags = m.Memory(n_sets, m.UInt[t_len])()
        v = m.Memory(n_sets, m.Bit)()
        d = m.Memory(n_sets, m.Bit)()

        req = self.io.req.data
        tag = (req.addr >> (b_len + s_len))[:t_len]
        idx = req.addr[b_len:b_len + s_len]
        off = req.addr[:b_len]
        read = data.read(idx)
        write = m.bits(0, b_bits)
        for i in range(b_bytes):
            write |= m.mux([(read & (0xff << (8 * i))),
                            ((m.zext_to(req.data, b_bits) >>
                              ((8 * (i & 0x3)))) & 0xff) << (8 * i)],
                           ((off // 4) == (i // 4)) & (req.mask >>
                                                       (i & 0x3))[0])[:b_bits]

        class State(m.Enum):
            IDLE = 0
            WRITE = 1
            WRITE_ACK = 2
            READ = 3

        state = m.Register(init=State.IDLE)()

        write_counter = mantle.CounterModM(data_beats,
                                           max(data_beats.bit_length(), 1),
        write_counter.CE @= m.enable(state.O == State.WRITE)
        w_cnt, w_done = write_counter.O, write_counter.COUT

        read_counter = mantle.CounterModM(data_beats,
                                          max(data_beats.bit_length(), 1),
        read_counter.CE @= m.enable((state.O == State.READ)
                                    & self.io.nasti.r.valid)
        r_cnt, r_done = read_counter.O, read_counter.COUT

        self.io.resp.data.data @= (read >> (m.zext_to(
            (off // 4), b_bits) * x_len))[:x_len]
        self.io.nasti.ar.data @= NastiReadAddressChannel(
            nasti_params, 0, (req.addr >> b_len) << b_len, size, length)
        tags_rdata = tags.read(idx)
        self.io.nasti.aw.data @= NastiWriteAddressChannel(
            nasti_params, 0,
            m.bits(m.concat(idx, tags_rdata), nasti_params.x_addr_bits) <<
            b_len, size, length)
        self.io.nasti.w.data @= NastiWriteDataChannel(
            (read >> (m.zext_to(w_cnt, b_bits) *
            None, w_done)
        self.io.nasti.w.valid @= state.O == State.WRITE
        self.io.nasti.b.ready @= state.O == State.WRITE_ACK
        self.io.nasti.r.ready @= state.O == State.READ

        d_wen = m.Bit(name="d_wen")
        d.write(True, idx, m.enable(d_wen))

        data_wen = m.Bit(name="data_wen")
        data_wdata = m.UInt[b_bits](name="data_wdata")
        data.write(data_wdata, idx, m.enable(data_wen))
        # m.display("data_wdata=%x", data_wdata).when(m.posedge(self.io.CLK))

        v_wen = m.Bit(name="v_wen")
        v.write(True, idx, m.enable(v_wen))
        v_rdata = v.read(idx)

        tags_wen = m.Bit(name="tags_wen")
        tags.write(tag, idx, m.enable(tags_wen))

        d_rdata = d.read(idx)

        # m.display("gold_state=%x", state.O).when(m.posedge(self.io.CLK))
        # m.display("gold_w_done=%x", w_done).when(m.posedge(self.io.CLK))
        # m.display("gold_b_valid=%x",
        #           self.io.nasti.b.valid).when(m.posedge(self.io.CLK))

        if TRACE:
                "[%0t] [cache] data[%x] <= %x, off: %x, req: %x, mask: %b",
                m.time(), idx, write, off, self.io.req.data.data,
                .if_((state.O == State.IDLE) &
                     (self.io.req.valid & self.io.resp.ready) &
                     (v_rdata & (tags_rdata == tag)) & req.mask.reduce_or())

                "[%0t] [cache] data[%x] => %x, off: %x, resp: %x", m.time(),
                idx, write, off, self.io.resp.data.data.value())\
                .if_((state.O == State.IDLE) &
                     (self.io.req.valid & self.io.resp.ready) &
                     (v_rdata & (tags_rdata == tag)) & ~req.mask.reduce_or())

        def logic():
            self.io.resp.valid @= False
            self.io.req.ready @= False
            self.io.nasti.ar.valid @= False
            self.io.nasti.aw.valid @= False

            d_wen @= False

            data_wen @= False
            data_wdata @= m.UInt[b_bits](0)
            state.I @= state.O

            tags_wen @= False
            v_wen @= False

            if state.O == State.IDLE:
                if self.io.req.valid & self.io.resp.ready:
                    if v_rdata & (tags_rdata == tag):
                        if req.mask.reduce_or():
                            d_wen @= True
                            data_wdata @= write
                            data_wen @= True
                        self.io.req.ready @= True
                        self.io.resp.valid @= True
                        if d_rdata:
                            self.io.nasti.aw.valid @= True
                            state.I @= State.WRITE
                            data_wdata @= 0
                            data_wen @= True
                            self.io.nasti.ar.valid @= True
                            state.I @= State.READ
            elif state.O == State.WRITE:
                if w_done:
                    state.I @= State.WRITE_ACK
            elif state.O == State.WRITE_ACK:
                if self.io.nasti.b.valid:
                    data_wdata @= 0
                    data_wen @= True
                    self.io.nasti.ar.valid @= True
                    state.I @= State.READ
            elif state.O == State.READ:
                if self.io.nasti.r.valid:
                    data_wdata @= read | (
                        m.zext_to(self.io.nasti.r.data.data, b_bits) <<
                        (m.zext_to(r_cnt, b_bits) * nasti_params.x_data_bits))
                    data_wen @= True
                if r_done:
                    tags_wen @= True
                    v_wen @= True
                    state.I @= State.IDLE
Exemplo n.º 10
    class DUT(m.Circuit):
        io = m.IO(done=m.Out(m.Bit)) + m.ClockIO()
        x_len = 32
        n_sets = 256
        b_bytes = 4 * (x_len >> 3)
        b_len = m.bitutils.clog2(b_bytes)
        s_len = m.bitutils.clog2(n_sets)
        t_len = x_len - (s_len + b_len)
        nasti_params = NastiParameters(data_bits=64,

        dut = Cache(x_len, 1, n_sets, b_bytes)()
        dut_mem = make_NastiIO(nasti_params).undirected_t(name="dut_mem")
        dut_mem.ar @= make_Queue(dut.nasti.ar, 32)
        dut_mem.aw @= make_Queue(dut.nasti.aw, 32)
        dut_mem.w @= make_Queue(dut.nasti.w, 32)
        dut.nasti.b @= make_Queue(dut_mem.b, 32)
        dut.nasti.r @= make_Queue(dut_mem.r, 32)

        gold = GoldCache(x_len, 1, n_sets, b_bytes)()
        gold_req = type(gold.req).undirected_t(name="gold_req")
        gold_resp = type(gold.resp).undirected_t(name="gold_resp")
        gold_mem = make_NastiIO(nasti_params).undirected_t(name="gold_mem")
        gold.req @= make_Queue(gold_req, 32)
        gold_resp @= make_Queue(gold.resp, 32)
        gold_mem.ar @= make_Queue(gold.nasti.ar, 32)
        gold_mem.aw @= make_Queue(gold.nasti.aw, 32)
        gold_mem.w @= make_Queue(gold.nasti.w, 32)
        gold.nasti.b @= make_Queue(gold_mem.b, 32)
        gold.nasti.r @= make_Queue(gold_mem.r, 32)

        size = m.bitutils.clog2(nasti_params.x_data_bits // 8)
        b_bits = b_bytes << 3
        data_beats = b_bits // nasti_params.x_data_bits

        mem = m.Memory(1 << 20, m.UInt[nasti_params.x_data_bits])()

        class MemState(m.Enum):
            IDLE = 0
            WRITE = 1
            WRITE_ACK = 2
            READ = 3

        mem_state = m.Register(init=MemState.IDLE)()

        write_counter = mantle.CounterModM(data_beats,
        write_counter.CE @= m.enable((mem_state.O == MemState.WRITE)
                                     & dut_mem.w.valid & gold_mem.w.valid)
        read_counter = mantle.CounterModM(data_beats,
        read_counter.CE @= m.enable((mem_state.O == MemState.READ)
                                    & dut_mem.r.ready & gold_mem.r.ready)

        dut_mem.b.valid @= mem_state.O == MemState.WRITE_ACK
        dut_mem.b.data @= NastiWriteResponseChannel(nasti_params, 0)
        dut_mem.r.valid @= mem_state.O == MemState.READ
        dut_mem.r.data @= NastiReadDataChannel(
            nasti_params, 0,
                ((gold_mem.ar.data.addr) +
                 m.zext_to(read_counter.O, nasti_params.x_addr_bits))[:20]),
        gold_mem.ar.ready @= dut_mem.ar.ready
        gold_mem.aw.ready @= dut_mem.aw.ready
        gold_mem.w.ready @= dut_mem.w.ready
        gold_mem.b.valid @= dut_mem.b.valid
        gold_mem.b.data @= dut_mem.b.data
        gold_mem.r.valid @= dut_mem.r.valid
        gold_mem.r.data @= dut_mem.r.data

        mem_wen0 = m.Bit(name="mem_wen0")
        mem_wdata0 = m.UInt[nasti_params.x_data_bits](name="mem_wdata0")
        mem_wen1 = m.Bit(name="mem_wen1")
        mem_wdata1 = m.UInt[nasti_params.x_data_bits](name="mem_wdata1")
        mem_waddr1 = m.UInt[20](name="mem_waddr1")
            m.mux([dut_mem.w.data.data, mem_wdata1], mem_wen1),
            m.mux([((dut_mem.aw.data.addr) +
                    m.zext_to(write_counter.O, nasti_params.x_addr_bits))[:20],
                   mem_waddr1], mem_wen1), m.enable(mem_wen0 | mem_wen1))
        # m.display("mem_wen0 = %x, mem_wen1 = %x", mem_wen0,
        #           mem_wen1).when(m.posedge(io.CLK))
        # m.display("dut_mem.w.valid = %x",
        #           dut_mem.w.valid).when(m.posedge(io.CLK))
        # m.display("gold_mem.w.valid = %x",
        #           gold_mem.w.valid).when(m.posedge(io.CLK))

            (mem_state.O != MemState.IDLE)
            | ~(gold_mem.aw.valid & dut_mem.aw.valid) |
            (dut_mem.aw.data.addr == gold_mem.aw.data.addr),
                "[dut_mem.aw.data.addr] %x != [gold_mem.aw.data.addr] %x",
                dut_mem.aw.data.addr, gold_mem.aw.data.addr))

            (mem_state.O != MemState.IDLE)
            | ~(gold_mem.aw.valid & dut_mem.aw.valid)
            | ~(gold_mem.ar.valid & dut_mem.ar.valid) |
            (dut_mem.ar.data.addr == gold_mem.ar.data.addr),
                "[dut_mem.ar.data.addr] %x != [gold_mem.ar.data.addr] %x",
                dut_mem.ar.data.addr, gold_mem.ar.data.addr))

            (mem_state.O != MemState.WRITE)
            | ~(gold_mem.w.valid & dut_mem.w.valid) |
            (dut_mem.w.data.data == gold_mem.w.data.data),
                "[dut_mem.w.data.data] %x != [gold_mem.w.data.data] %x",
                dut_mem.w.data.data, gold_mem.w.data.data))

        def mem_fsm():
            dut_mem.w.ready @= False
            dut_mem.aw.ready @= False
            dut_mem.ar.ready @= False

            mem_wen0 @= False

            mem_state.I @= mem_state.O

            if mem_state.O == MemState.IDLE:
                if gold_mem.aw.valid & dut_mem.aw.valid:
                    mem_state.I @= MemState.WRITE
                elif gold_mem.ar.valid & dut_mem.ar.valid:
                    mem_state.I @= MemState.READ
            elif mem_state.O == MemState.WRITE:
                if gold_mem.w.valid & dut_mem.w.valid:
                    mem_wen0 @= True
                    dut_mem.w.ready @= True
                if write_counter.COUT:
                    dut_mem.aw.ready @= True
                    mem_state.I @= MemState.WRITE_ACK
            elif mem_state.O == MemState.WRITE_ACK:
                if gold_mem.b.ready & dut_mem.b.ready:
                    mem_state.I @= MemState.IDLE
            elif mem_state.O == MemState.READ:
                if read_counter.COUT:
                    dut_mem.ar.ready @= True
                    mem_state.I @= MemState.IDLE

        if TRACE:
            m.display("[%0t]: [write] mem[%x] <= %x", m.time(),
                      mem.WADDR.value(), dut_mem.w.data.data).when(
            m.display("[%0t]: [read] mem[%x] => %x", m.time(),
                          io.CLK)).if_((mem_state.O == MemState.READ)
                                       & dut_mem.r.ready & gold_mem.r.ready)

        def rand_data(nasti_params):
            rand_data = BitVector[nasti_params.x_data_bits](0)
            for i in range(nasti_params.x_data_bits // 8):
                rand_data |= BitVector[nasti_params.x_data_bits](
                    random.randint(0, 0xff) << (8 * i))
            return rand_data

        def rand_mask(x_len):
            return BitVector[x_len // 8](random.randint(
                1, (1 << (x_len // 8)) - 2))

        def make_test(rand_data, nasti_params, x_len):
            # Wrapper because function definition in side class namespace
            # doesn't inherit class variables
            def test(b_bits, tag, idx, off, mask=BitVector[x_len // 8](0)):
                test_data = rand_data(nasti_params)
                for i in range((b_bits // nasti_params.x_data_bits) - 1):
                    test_data = test_data.concat(rand_data(nasti_params))
                return m.uint(m.concat(off, idx, tag, test_data, mask))

            return test

        test = make_test(rand_data, nasti_params, x_len)

        tags = []
        for _ in range(3):
        idxs = []
        for _ in range(2):
        offs = []
        for _ in range(6):
            offs.append(BitVector.random(b_len) & -4)

        init_addr = []
        init_data = []
        _iter = itertools.product(tags, idxs, range(0, data_beats))
        for tag, idx, off in _iter:
            init_addr.append(m.uint(m.concat(BitVector[b_len](off), idx, tag)))

        test_vec = [
            test(b_bits, tags[0], idxs[0], offs[0]),  # 0: read miss
            test(b_bits, tags[0], idxs[0], offs[1]),  # 1: read hit
            test(b_bits, tags[1], idxs[0], offs[0]),  # 2: read miss
            test(b_bits, tags[1], idxs[0], offs[2]),  # 3: read hit
            test(b_bits, tags[1], idxs[0], offs[3]),  # 4: read hit
            test(b_bits, tags[1], idxs[0], offs[4],
                 rand_mask(x_len)),  # 5: write hit  # noqa
            test(b_bits, tags[1], idxs[0], offs[4]),  # 6: read hit
            test(b_bits, tags[2], idxs[0],
                 offs[5]),  # 7: read miss & write back  # noqa
            test(b_bits, tags[0], idxs[1], offs[0],
                 rand_mask(x_len)),  # 8: write miss  # noqa
            test(b_bits, tags[0], idxs[1], offs[0]),  # 9: read hit
            test(b_bits, tags[0], idxs[1], offs[1]),  # 10: read hit
            test(b_bits, tags[1], idxs[1], offs[2],
                 rand_mask(x_len)),  # 11: write miss & write back  # noqa
            test(b_bits, tags[1], idxs[1], offs[3]),  # 12: read hit
            test(b_bits, tags[2], idxs[1], offs[4]),  # 13: read write back
            test(b_bits, tags[2], idxs[1], offs[5])  # 14: read hit

        class TestState(m.Enum):
            INIT = 0
            START = 1
            WAIT = 2
            DONE = 3

        state = m.Register(init=TestState.INIT)()
        timeout = m.Register(m.UInt[32])()
        init_m = len(init_addr) - 1
        init_counter = mantle.CounterModM(init_m,
        init_counter.CE @= m.enable(state.O == TestState.INIT)

        test_m = len(test_vec) - 1
        test_counter = mantle.CounterModM(test_m,
        test_counter.CE @= m.enable(state.O == TestState.DONE)
        curr_vec = m.mux(test_vec, test_counter.O)
        mask = (curr_vec >> (b_len + s_len + t_len + b_bits))[:x_len // 8]
        data = (curr_vec >> (b_len + s_len + t_len))[:b_bits]
        tag = (curr_vec >> (b_len + s_len))[:t_len]
        idx = (curr_vec >> b_len)[:s_len]
        off = curr_vec[:b_len]

        dut.cpu.req.data.addr @= m.concat(off, idx, tag)
        # TODO: Is truncating this fine?
        req_data = data[:x_len]
        dut.cpu.req.data.data @= req_data
        dut.cpu.req.data.mask @= mask
        dut.cpu.req.valid @= state.O == TestState.WAIT
        dut.cpu.abort @= 0
        gold_req.data @= dut.cpu.req.data.value()
        gold_req.valid @= state.O == TestState.START
        gold_resp.ready @= state.O == TestState.DONE

        mem_waddr1 @= m.mux(init_addr, init_counter.O)[:20]
        mem_wdata1 @= m.mux(init_data, init_counter.O)

        check_resp_data = m.Bit()
        if TRACE:
            m.display("[%0t]: [init] mem[%x] <= %x", m.time(),
                      mem_waddr1, mem_wdata1)\
                .if_(state.O == TestState.INIT)

        def state_fsm():
            timeout.I @= timeout.O
            mem_wen1 @= m.bit(False)
            check_resp_data @= m.bit(False)
            state.I @= state.O
            if state.O == TestState.INIT:
                mem_wen1 @= m.bit(True)
                if init_counter.COUT:
                    state.I @= TestState.START
            elif state.O == TestState.START:
                if gold_req.ready:
                    timeout.I @= m.bits(0, 32)
                    state.I @= TestState.WAIT
            elif state.O == TestState.WAIT:
                timeout.I @= timeout.O + 1
                if dut.cpu.resp.valid & gold_resp.valid:
                    if ~mask.reduce_or():
                        check_resp_data @= m.bit(True)
                    state.I @= TestState.DONE
            elif state.O == TestState.DONE:
                state.I @= TestState.START

        f.assert_immediate((state.O != TestState.WAIT) | (timeout.O < 100))
            ~check_resp_data | (dut.cpu.resp.data.data == gold_resp.data.data),
            failure_msg=("dut.cpu.resp.data.data => %x != %x",
                         dut.cpu.resp.data.data, gold_resp.data.data))
        # m.display("mem_state=%x", mem_state.O).when(m.posedge(io.CLK))
        # m.display("test_state=%x", state.O).when(m.posedge(io.CLK))
        # m.display("dut req valid = %x",
        #           dut.cpu.req.valid).when(m.posedge(io.CLK))
        # m.display("gold req valid = %x, ready = %x", gold_req.valid,
        #           gold_req.ready).when(m.posedge(io.CLK))
        # m.display("[%0t]: dut resp data = %x, gold resp data = %x", m.time(),
        #           dut.cpu.resp.data.data, gold_resp.data.data)\
        #     .when(m.posedge(io.CLK))
        io.done @= test_counter.COUT
Exemplo n.º 11
    def __init__(self,
        self.io = make_DatapathIO(x_len) + m.ClockIO(has_reset=True)
        csr = CSRGen(x_len)()
        reg_file = RegFile(x_len)()
        alu = ALU(x_len)()
        imm_gen = ImmGen(x_len)()
        br_cond = BrCondArea(x_len)()

        # Fetch / Execute Registers
        fe_inst = m.Register(init=Instructions.NOP, has_enable=True)()
        fe_pc = m.Register(m.UInt[x_len], has_enable=True)()

        # Execute / Write Back Registers
        ew_inst = m.Register(init=Instructions.NOP)()
        ew_pc = m.Register(m.UInt[x_len])()
        ew_alu = m.Register(m.UInt[x_len])()
        csr_in = m.Register(m.UInt[x_len])()

        # Control signals
        st_type = m.Register(type(self.io.ctrl.st_type).undirected_t)()
        ld_type = m.Register(type(self.io.ctrl.ld_type).undirected_t)()
        wb_sel = m.Register(type(self.io.ctrl.wb_sel).undirected_t)()
        wb_en = m.Register(m.Bit)()
        csr_cmd = m.Register(type(self.io.ctrl.csr_cmd).undirected_t)()
        illegal = m.Register(m.Bit)()
        pc_check = m.Register(m.Bit)()

        # Fetch
        started = m.Register(m.Bit)()(m.bit(self.io.RESET))
        stall = ~self.io.icache.resp.valid | ~self.io.dcache.resp.valid
        pc = m.Register(init=UIntVector[x_len](Const.PC_START) -
        npc = m.mux([
                        m.mux([pc.O + m.uint(4, x_len), pc.O],
                              self.io.ctrl.pc_sel == PC_0), alu.sum_ >> 1 << 1
                    ], (self.io.ctrl.pc_sel == PC_ALU) | br_cond.taken),
                ], self.io.ctrl.pc_sel == PC_EPC), csr.evec
            ], csr.expt), pc.O
        ], stall)

        inst = m.mux([self.io.icache.resp.data.data, Instructions.NOP], started
                     | self.io.ctrl.inst_kill | br_cond.taken | csr.expt)

        pc.I @= npc
        self.io.icache.req.data.addr @= npc
        self.io.icache.req.data.data @= 0
        self.io.icache.req.data.mask @= 0
        self.io.icache.req.valid @= ~stall
        self.io.icache.abort @= False

        fe_pc.I @= pc.O
        fe_pc.CE @= m.enable(~stall)
        fe_inst.I @= inst
        fe_inst.CE @= m.enable(~stall)

        # Execute
        # Decode
        self.io.ctrl.inst @= fe_inst.O

        # reg_file read
        rs1_addr = fe_inst.O[15:20]
        rs2_addr = fe_inst.O[20:25]
        reg_file.raddr1 @= rs1_addr
        reg_file.raddr2 @= rs2_addr

        # gen immediates
        imm_gen.inst @= fe_inst.O
        imm_gen.sel @= self.io.ctrl.imm_sel

        # bypass
        wb_rd_addr = ew_inst.O[7:12]
        rs1_hazard = wb_en.O & rs1_addr.reduce_or() & (rs1_addr == wb_rd_addr)
        rs2_hazard = wb_en.O & rs2_addr.reduce_or() & (rs2_addr == wb_rd_addr)
        rs1 = m.mux([reg_file.rdata1, ew_alu.O],
                    (wb_sel.O == WB_ALU) & rs1_hazard)
        rs2 = m.mux([reg_file.rdata2, ew_alu.O],
                    (wb_sel.O == WB_ALU) & rs2_hazard)

        # ALU operations
        alu.A @= m.mux([fe_pc.O, rs1], self.io.ctrl.A_sel == A_RS1)
        alu.B @= m.mux([imm_gen.O, rs2], self.io.ctrl.B_sel == B_RS2)
        alu.op @= self.io.ctrl.alu_op

        # Branch condition calc
        br_cond.rs1 @= rs1
        br_cond.rs2 @= rs2
        br_cond.br_type @= self.io.ctrl.br_type

        # D$ access
        daddr = m.mux([alu.sum_, ew_alu.O], stall) >> 2 << 2
        w_offset = ((m.bits(alu.sum_[1], x_len) << 4) |
                    (m.bits(alu.sum_[0], x_len) << 3))
        self.io.dcache.req.valid @= ~stall & (self.io.ctrl.st_type.reduce_or()
        self.io.dcache.req.data.addr @= daddr
        self.io.dcache.req.data.data @= rs2 << w_offset
        self.io.dcache.req.data.mask @= m.dict_lookup(
                ST_SW: m.bits(0b1111, 4),
                ST_SH: m.bits(0b11, 4) << m.zext(alu.sum_[0:2], 2),
                ST_SB: m.bits(0b1, 4) << m.zext(alu.sum_[0:2], 2),
            }, m.mux([self.io.ctrl.st_type, st_type.O], stall), m.bits(0, 4))

        # Pipelining
        def pipeline_logic():
            ew_pc.I @= ew_pc.O
            ew_inst.I @= ew_inst.O
            ew_alu.I @= ew_alu.O
            csr_in.I @= csr_in.O
            st_type.I @= st_type.O
            ld_type.I @= ld_type.O
            wb_sel.I @= wb_sel.O
            wb_en.I @= wb_en.O
            csr_cmd.I @= csr_cmd.O
            illegal.I @= illegal.O
            pc_check.I @= pc_check.O
            if m.bit(self.io.RESET) | ~stall & csr.expt:
                st_type.I @= 0
                ld_type.I @= 0
                wb_en.I @= 0
                csr_cmd.I @= 0
                illegal.I @= False
                pc_check.I @= False
            elif ~stall & ~csr.expt:
                ew_pc.I @= fe_pc.O
                ew_inst.I @= fe_inst.O
                ew_alu.I @= alu.O
                csr_in.I @= m.mux([rs1, imm_gen.O],
                                  self.io.ctrl.imm_sel == IMM_Z)
                st_type.I @= self.io.ctrl.st_type
                ld_type.I @= self.io.ctrl.ld_type
                wb_sel.I @= self.io.ctrl.wb_sel
                wb_en.I @= self.io.ctrl.wb_en
                csr_cmd.I @= self.io.ctrl.csr_cmd
                illegal.I @= self.io.ctrl.illegal
                pc_check.I @= self.io.ctrl.pc_sel == PC_ALU

        # Load
        l_offset = ((m.uint(ew_alu.O[1], x_len) << 4) |
                    (m.uint(ew_alu.O[0], x_len) << 3))
        l_shift = self.io.dcache.resp.data.data >> l_offset
        load = m.dict_lookup(
                LD_LH: m.sext_to(m.sint(l_shift[0:16]), x_len),
                LD_LHU: m.sint(m.zext_to(l_shift[0:16], x_len)),
                LD_LB: m.sext_to(m.sint(l_shift[0:8]), x_len),
                LD_LBU: m.sint(m.zext_to(l_shift[0:8], x_len))
            }, ld_type.O, m.sint(self.io.dcache.resp.data.data))

        # CSR access
        csr.stall @= stall
        csr.I @= csr_in.O
        csr.cmd @= csr_cmd.O
        csr.inst @= ew_inst.O
        csr.pc @= ew_pc.O
        csr.addr @= ew_alu.O
        csr.illegal @= illegal.O
        csr.pc_check @= pc_check.O
        csr.ld_type @= ld_type.O
        csr.st_type @= st_type.O
        self.io.host @= csr.host

        # Regfile write
        reg_write = m.dict_lookup(
                WB_MEM: m.uint(load),
                WB_PC4: (ew_pc.O + 4),
                WB_CSR: csr.O
            }, wb_sel.O, ew_alu.O)

        reg_file.wen @= m.enable(wb_en.O & ~stall & ~csr.expt)
        reg_file.waddr @= wb_rd_addr
        reg_file.wdata @= reg_write

        # Abort store when there's an exception
        self.io.dcache.abort @= csr.expt
Exemplo n.º 12
  def __init__(self, inputStartAddr: int, outputStartAddr: int, busWidth: int,
                     wordWidth: int, numWordsPerGroup: int, metricWidth: int):
    self.inputStartAddr = inputStartAddr
    self.outputStartAddr = outputStartAddr
    self.busWidth = busWidth
    self.io = io = m.IO(
      inputMemAddr = m.Out(m.UInt[64]),
      inputMemAddrValid = m.Out(m.Bit),
      inputMemAddrLen = m.Out(m.UInt[8]),
      inputMemAddrReady = m.In(m.Bit),
      inputMemBlock = m.In(m.UInt[busWidth]),
      inputMemBlockValid = m.In(m.Bit),
      inputMemBlockReady = m.Out(m.Bit),
      outputMemAddr = m.Out(m.UInt[64]),
      outputMemAddrValid = m.Out(m.Bit),
      outputMemAddrLen = m.Out(m.UInt[8]),
      outputMemAddrId = m.Out(m.UInt[16]),
      outputMemAddrReady = m.In(m.Bit),
      outputMemBlock = m.Out(m.UInt[busWidth]),
      outputMemBlockValid = m.Out(m.Bit),
      outputMemBlockLast = m.Out(m.Bit),
      outputMemBlockReady = m.In(m.Bit),
      finished = m.Out(m.Bit)
    ) + m.ClockIO(has_reset = True)

    assert(busWidth >= 64)
    numFeaturePairs = numWordsPerGroup * numWordsPerGroup
    outputWordsInLine = busWidth // 64
    numOutputWords = numFeaturePairs * (1 << (2 * wordWidth))
    # round up to nearest full line
    numOutputWords = (numOutputWords + outputWordsInLine - 1) // outputWordsInLine * \
    bytesInLine = busWidth // 8

    class TopState(m.Enum):
      inputLengthAddr = 0
      loadInputLength = 1
      mainLoop = 2
      pause = 3
      writeOutput = 4
      finished = 5

    class OutputState(m.Enum):
      sendingAddr = 0
      fillingLine = 1
      sendingLine = 2

    state = reg_init(TopState, TopState.inputLengthAddr)
    inputLength = reg(m.UInt[32])
    inputAddrLineCount = reg_init(m.UInt[32], 0)
    inputDataLineCount = reg_init(m.UInt[32], 0)
    outputState = reg_init(OutputState, OutputState.sendingAddr)
    outputWordCounter = reg_init(m.UInt[m.bitutils.clog2(numOutputWords + 1)], 0)
    outputLine = reg(m.Array[outputWordsInLine, m.UInt[64]])

    featurePairs = []
    for i in range(numWordsPerGroup):
      for j in range(numWordsPerGroup):
        idx = i * numWordsPerGroup + j
        featurePair = FeaturePair(wordWidth, metricWidth, idx)()
        featurePair.inputMetric @= io.inputMemBlock[2 * numWordsPerGroup * wordWidth:
          2 * numWordsPerGroup * wordWidth + metricWidth]
        featurePair.inputFeatureOne @= io.inputMemBlock[i * wordWidth:(i + 1) * wordWidth]
        featurePair.inputFeatureTwo @= io.inputMemBlock[(j + numWordsPerGroup) * wordWidth:
          (j + 1 + numWordsPerGroup) * wordWidth]
        featurePair.inputValid @= io.inputMemBlockValid & (state.O == TopState.mainLoop)
        featurePair.shiftMode @= state.O == TopState.writeOutput
        featurePair.doShift @= (state.O == TopState.writeOutput) & (outputState.O == OutputState.fillingLine)
    io.inputMemBlock[max(32, 2 * numWordsPerGroup * wordWidth + metricWidth):].unused()
    for i in range(numFeaturePairs):
      if i == numFeaturePairs - 1:
        featurePairs[i].neighborOutputIn @= 0
        featurePairs[i].neighborOutputIn @= featurePairs[i + 1].out

    io.inputMemAddrValid @= (state.O == TopState.inputLengthAddr) | \
      ((state.O == TopState.mainLoop) & (inputAddrLineCount.O != inputLength.O))
    io.inputMemBlockReady @= (state.O == TopState.loadInputLength) | (state.O == TopState.mainLoop)
    io.outputMemAddr @= m.zext_to(sl(outputWordCounter.O, 3), 64) + outputStartAddr
    io.outputMemAddrValid @= (state.O == TopState.writeOutput) & (outputState.O == OutputState.sendingAddr)
    io.outputMemAddrLen @= 0
    io.outputMemAddrId @= 0
    io.outputMemBlock @= m.as_bits(outputLine.O)
    io.outputMemBlockValid @= (state.O == TopState.writeOutput) & (outputState.O == OutputState.sendingLine)
    io.outputMemBlockLast @= True
    io.finished @= state.O == TopState.finished

    # hard to put this inside the inline comb
    cond = (state.O == TopState.writeOutput) & (outputState.O == OutputState.fillingLine)
    outputLine.I[outputWordsInLine - 1] @= \
      m.mux([outputLine.O[outputWordsInLine - 1], featurePairs[0].out], cond)
    for i in range(len(outputLine.I) - 1):
      outputLine.I[i] @= m.mux([outputLine.O[i], outputLine.O[i + 1]], cond)

    def logic():
      io.inputMemAddr @= inputStartAddr
      io.inputMemAddrLen @= 0
      # default values required
      state.I @= state.O
      inputAddrLineCount.I @= inputAddrLineCount.O
      inputDataLineCount.I @= inputDataLineCount.O
      outputState.I @= outputState.O
      outputWordCounter.I @= outputWordCounter.O
      if state.O == TopState.inputLengthAddr:
        if io.inputMemAddrReady:
          state.I @= TopState.loadInputLength
      elif state.O == TopState.loadInputLength:
        if io.inputMemBlockValid:
          inputLength.I @= io.inputMemBlock[:32]
          state.I @= TopState.mainLoop
      elif state.O == TopState.mainLoop:
        io.inputMemAddr @= m.zext_to(sl(inputAddrLineCount.O, m.bitutils.clog2(bytesInLine)), 64) + \
          (inputStartAddr + bytesInLine) # final term is start offset of main data stream
        remainingAddrLen = inputLength.O - inputAddrLineCount.O - 1
        io.inputMemAddrLen @= 63 if remainingAddrLen > 63 else remainingAddrLen[:8]
        if io.inputMemAddrReady:
          inputAddrLineCount.I @= inputAddrLineCount.O + 64 if remainingAddrLen > 63 else inputLength.O
        if io.inputMemBlockValid:
          inputDataLineCount.I @= inputDataLineCount.O + 1
          if inputDataLineCount.O == inputLength.O - 1:
            state.I @= TopState.pause
      elif state.O == TopState.pause:
        # required to flush FeaturePair pipeline before shiftMode is set
        state.I @= TopState.writeOutput
      elif state.O == TopState.writeOutput:
        if outputState.O == OutputState.sendingAddr:
          if io.outputMemAddrReady:
            outputState.I @= OutputState.fillingLine
        elif outputState.O == OutputState.fillingLine:
          wordInLine = 0 if m.bit(outputWordsInLine == 1) else \
            outputWordCounter[:max(1, m.bitutils.clog2(outputWordsInLine))]
          if m.bit(wordInLine == outputWordsInLine - 1): # TODO figure out why m.bit is needed here
            outputState.I @= OutputState.sendingLine
          outputWordCounter.I @= outputWordCounter.O + 1
        else: # outputState is sendingLine
          if io.outputMemBlockReady:
            if outputWordCounter.O == numOutputWords:
              state.I @= TopState.finished
              outputState.I @= OutputState.sendingAddr
Exemplo n.º 13
    def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int):
        b_bits = b_bytes << 3
        b_len = m.bitutils.clog2(b_bytes)
        s_len = m.bitutils.clog2(n_sets)
        t_len = x_len - (s_len + b_len)
        n_words = b_bits // x_len
        w_bytes = x_len // 8
        byte_offset_bits = m.bitutils.clog2(w_bytes)
        nasti_params = NastiParameters(data_bits=64,
        data_beats = b_bits // nasti_params.x_data_bits

        class MetaData(m.Product):
            tag = m.UInt[t_len]

        self.io = m.IO(**make_cache_ports(x_len, nasti_params))
        self.io += m.ClockIO()

        class State(m.Enum):
            IDLE = 0
            READ_CACHE = 1
            WRITE_CACHE = 2
            WRITE_BACK = 3
            WRITE_ACK = 4
            REFILL_READY = 5
            REFILL = 6

        state = m.Register(init=State.IDLE)()

        # memory
        v = m.Register(m.UInt[n_sets], has_enable=True)()
        d = m.Register(m.UInt[n_sets], has_enable=True)()
        meta_mem = m.Memory(n_sets,
        data_mem = [
                         has_read_enable=True)() for _ in range(n_words)

        addr_reg = m.Register(type(self.io.cpu.req.data.addr).undirected_t,
        cpu_data = m.Register(type(self.io.cpu.req.data.data).undirected_t,
        cpu_mask = m.Register(type(self.io.cpu.req.data.mask).undirected_t,

        self.io.nasti.r.ready @= state.O == State.REFILL
        # Counters
        assert data_beats > 0
        if data_beats > 1:
            read_counter = mantle.CounterModM(data_beats,
                                              max(data_beats.bit_length(), 1),
            read_counter.CE @= m.enable(self.io.nasti.r.fired())
            read_count, read_wrap_out = read_counter.O, read_counter.COUT

            write_counter = mantle.CounterModM(data_beats,
                                               max(data_beats.bit_length(), 1),
            write_count, write_wrap_out = write_counter.O, write_counter.COUT
            read_count, read_wrap_out = 0, 1
            write_count, write_wrap_out = 0, 1

        refill_buf = m.Register(m.Array[data_beats,
        if data_beats == 1:
            refill_buf.I[0] @= self.io.nasti.r.data.data
            refill_buf.I @= m.set_index(refill_buf.O,
        refill_buf.CE @= m.enable(self.io.nasti.r.fired())

        is_idle = state.O == State.IDLE
        is_read = state.O == State.READ_CACHE
        is_write = state.O == State.WRITE_CACHE
        is_alloc = (state.O == State.REFILL) & read_wrap_out
        # m.display("[%0t]: is_alloc = %x", m.time(), is_alloc)\
        #     .when(m.posedge(self.io.CLK))
        is_alloc_reg = m.Register(m.Bit)()(is_alloc)

        hit = m.Bit(name="hit")
        wen = is_write & (hit | is_alloc_reg) & ~self.io.cpu.abort | is_alloc
        # m.display("[%0t]: wen = %x", m.time(), wen)\
        #     .when(m.posedge(self.io.CLK))
        ren = m.enable(~wen & (is_idle | is_read) & self.io.cpu.req.valid)
        ren_reg = m.enable(m.Register(m.Bit)()(ren))

        addr = self.io.cpu.req.data.addr
        idx = addr[b_len:s_len + b_len]
        tag_reg = addr_reg.O[s_len + b_len:x_len]
        idx_reg = addr_reg.O[b_len:s_len + b_len]
        off_reg = addr_reg.O[byte_offset_bits:b_len]

        rmeta = meta_mem.read(idx, ren)
        rdata = m.concat(*(mem.read(idx, ren) for mem in data_mem))
        rdata_buf = m.Register(type(rdata), has_enable=True)()(rdata,

        read = m.mux([
            m.as_bits(m.mux([rdata_buf, rdata], ren_reg)),
        ], is_alloc_reg)
        # m.display("is_alloc_reg=%x", is_alloc_reg)\
        #     .when(m.posedge(self.io.CLK))

        hit @= v.O[idx_reg] & (rmeta.tag == tag_reg)

        # read mux
        self.io.cpu.resp.data.data @= m.array(
            [read[i * x_len:(i + 1) * x_len] for i in range(n_words)])[off_reg]
        self.io.cpu.resp.valid @= (is_idle | (is_read & hit) |
                                   (is_alloc_reg & ~cpu_mask.O.reduce_or()))
        m.display("resp.valid=%x", self.io.cpu.resp.valid.value())\
        m.display("[%0t]: valid = %x", m.time(),
        m.display("[%0t]: is_idle = %x, is_read = %x, hit = %x, is_alloc_reg = "
                  "%x, ~cpu_mask.O.reduce_or() = %x", m.time(), is_idle,
                  is_read, hit, is_alloc_reg, ~cpu_mask.O.reduce_or())\
        m.display("[%0t]: refill_buf.O=%x, %x", m.time(), *refill_buf.O)\
            .if_(self.io.cpu.resp.valid.value() & is_alloc_reg)
        m.display("[%0t]: read=%x", m.time(), read)\
            .if_(self.io.cpu.resp.valid.value() & is_alloc_reg)

        addr_reg.I @= addr
        addr_reg.CE @= m.enable(self.io.cpu.resp.valid.value())

        cpu_data.I @= self.io.cpu.req.data.data
        cpu_data.CE @= m.enable(self.io.cpu.resp.valid.value())

        cpu_mask.I @= self.io.cpu.req.data.mask
        cpu_mask.CE @= m.enable(self.io.cpu.resp.valid.value())

        wmeta = MetaData(name="wmeta")
        wmeta.tag @= tag_reg

        offset_mask = (m.zext_to(cpu_mask.O, w_bytes * 8) << m.concat(
            m.bits(0, byte_offset_bits), off_reg))
        wmask = m.mux([m.SInt[w_bytes * 8](-1),
                       m.sint(offset_mask)], ~is_alloc)

        if len(refill_buf.O) == 1:
            wdata_alloc = self.io.nasti.r.data.data
            wdata_alloc = m.concat(
                # TODO: not sure why they use `init.reverse`
                # https://github.com/ucb-bar/riscv-mini/blob/release/src/main/scala/Cache.scala#L116
        wdata = m.mux([wdata_alloc,
                       m.as_bits(m.repeat(cpu_data.O, n_words))], ~is_alloc)

        v.I @= m.set_index(v.O, m.bit(True), idx_reg)
        v.CE @= m.enable(wen)
        d.I @= m.set_index(d.O, ~is_alloc, idx_reg)
        d.CE @= m.enable(wen)
        # m.display("[%0t]: refill_buf.O = %x", m.time(),
        #           m.concat(*refill_buf.O)).when(m.posedge(self.io.CLK)).if_(wen)
        # m.display("[%0t]: nasti.r.data.data = %x", m.time(),
        #           self.io.nasti.r.data.data).when(m.posedge(self.io.CLK)).if_(wen)

        meta_mem.write(wmeta, idx_reg, m.enable(wen & is_alloc))
        for i, mem in enumerate(data_mem):
            data = [
                wdata[i * x_len + j * 8:i * x_len + (j + 1) * 8]
                for j in range(w_bytes)
            mem.write(m.array(data), idx_reg,
                      wmask[i * w_bytes:(i + 1) * w_bytes], m.enable(wen))
            # m.display("[%0t]: wdata = %x, %x, %x, %x", m.time(),
            #           *mem.WDATA.value()).when(m.posedge(self.io.CLK)).if_(wen)
            # m.display("[%0t]: wmask = %x, %x, %x, %x", m.time(),
            #           *mem.WMASK.value()).when(m.posedge(self.io.CLK)).if_(wen)

        tag_and_idx = m.zext_to(m.concat(idx_reg, tag_reg),
        self.io.nasti.ar.data @= NastiReadAddressChannel(
            nasti_params, 0, tag_and_idx << m.Bits[len(tag_and_idx)](b_len),
            m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1)

        rmeta_and_idx = m.zext_to(m.concat(idx_reg, rmeta.tag),
        self.io.nasti.aw.data @= NastiWriteAddressChannel(
            nasti_params, 0,
            rmeta_and_idx << m.Bits[len(rmeta_and_idx)](b_len),
            m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1)

        self.io.nasti.w.data @= NastiWriteDataChannel(
                read[i * nasti_params.x_data_bits:(i + 1) *
                     nasti_params.x_data_bits] for i in range(data_beats)
            ])[write_count[:-1]], None, write_wrap_out)

        is_dirty = v.O[idx_reg] & d.O[idx_reg]

        # TODO: Have to use temporary so we can invoke `fired()`
        aw_valid = m.Bit(name="aw_valid")
        self.io.nasti.aw.valid @= aw_valid

        ar_valid = m.Bit(name="ar_valid")
        self.io.nasti.ar.valid @= ar_valid

        b_ready = m.Bit(name="b_ready")
        self.io.nasti.b.ready @= b_ready

        def logic():
            state.I @= state.O
            aw_valid @= False
            ar_valid @= False
            self.io.nasti.w.valid @= False
            b_ready @= False
            if state.O == State.IDLE:
                if self.io.cpu.req.valid:
                    if self.io.cpu.req.data.mask.reduce_or():
                        state.I @= State.WRITE_CACHE
                        state.I @= State.READ_CACHE
            elif state.O == State.READ_CACHE:
                if hit:
                    if self.io.cpu.req.valid:
                        if self.io.cpu.req.data.mask.reduce_or():
                            state.I @= State.WRITE_CACHE
                            state.I @= State.READ_CACHE
                        state.I @= State.IDLE
                    aw_valid @= is_dirty
                    ar_valid @= ~is_dirty
                    if self.io.nasti.aw.fired():
                        state.I @= State.WRITE_BACK
                    elif self.io.nasti.ar.fired():
                        state.I @= State.REFILL
            elif state.O == State.WRITE_CACHE:
                if hit | is_alloc_reg | self.io.cpu.abort:
                    state.I @= State.IDLE
                    aw_valid @= is_dirty
                    ar_valid @= ~is_dirty
                    if self.io.nasti.aw.fired():
                        state.I @= State.WRITE_BACK
                    elif self.io.nasti.ar.fired():
                        state.I @= State.REFILL
            elif state.O == State.WRITE_BACK:
                self.io.nasti.w.valid @= True
                if write_wrap_out:
                    state.I @= State.WRITE_ACK
            elif state.O == State.WRITE_ACK:
                b_ready @= True
                if self.io.nasti.b.fired():
                    state.I @= State.REFILL_READY
            elif state.O == State.REFILL_READY:
                ar_valid @= True
                if self.io.nasti.ar.fired():
                    state.I @= State.REFILL
            elif state.O == State.REFILL:
                if read_wrap_out:
                    if cpu_mask.O.reduce_or():
                        state.I @= State.WRITE_CACHE
                        state.I @= State.IDLE

        if data_beats > 1:
            # TODO: Have to do this at the end since the inline comb logic
            # wires up nasti.w
            write_counter.CE @= m.enable(self.io.nasti.w.fired())