class Register(m.Circuit): name = f"Register_has_ce_{has_ce}_has_reset_{has_reset}_" \ f"has_async_reset_{has_async_reset}_" \ f"has_async_resetn_{has_async_resetn}_" \ f"type_{_type.__name__}_n_{n}" io = m.IO(I=m.In(T), O=m.Out(T)) io += m.ClockIO(has_ce=has_ce, has_reset=has_reset, has_async_reset=has_async_reset, has_async_resetn=has_async_resetn) reg = DefineCoreirReg(n, init, has_async_reset, has_async_resetn, _type)(name="value") I = io.I O = reg.O if n is None: O = O[0] if has_reset and has_ce: if reset_priority: I = mantle.mux([O, I], io.CE, name="enable_mux") I = mantle.mux([I, m.bits(init, n)], io.RESET) else: I = mantle.mux([I, m.bits(init, n)], io.RESET) I = mantle.mux([O, I], io.CE, name="enable_mux") elif has_ce: I = mantle.mux([O, I], io.CE, name="enable_mux") elif has_reset: I = mantle.mux([I, m.bits(init, n)], io.RESET) if n is None: m.wire(I, reg.I[0]) else: m.wire(I, reg.I) m.wire(io.O, O)
class Monitor(m.Circuit): io = m.IO(enq=m.In(m.ReadyValid[T]), deq=m.In( m.ReadyValid[T])) + m.ClockIO() m.inline_verilog("""\ reg [7:0] data [0:3]; reg [2:0] write_pointer; reg [2:0] read_pointer; wire wen; wire ren; wire full; wire empty; assign wen = {io.enq.valid} & {io.enq.ready}; assign ren = {io.deq.ready} & {io.deq.valid}; assign empty = write_pointer == read_pointer; assign full = ((write_pointer[1:0] == read_pointer[1:0]) & (write_pointer[2] == ~read_pointer[2])); always @(posedge {io.CLK}) begin if (wen) begin assert (!full) else $error("Trying to write to full buffer"); data[write_pointer[1:0]] <= {io.enq.data}; write_pointer <= write_pointer + 1; end if (ren) begin assert (!empty) else $error("Trying to read from empty buffer"); assert ({io.deq.data} == data[read_pointer[1:0]]) else $error("Got wrong read data: io.deq.data %x != %x", {io.deq.data}, data[read_pointer[1:0]]); read_pointer <= read_pointer + 1; end end""")
def __init__(self, n: int, T: m.Kind = m.Bit, init=None, has_enable: bool = False, reset_type: Optional[m.AbstractReset] = None): if init is None: init = [T(*_zero_init_args(T)) for _ in range(n)] self.name = f"SIPO{n}" self.io = m.IO( I=m.In(T), O=m.Out(m.Array[n, T] if T is not m.Bit else m.Bits[n]) ) # TODO: Add magma helper func for this has_async_reset = reset_type == m.AsyncReset has_async_resetn = reset_type == m.AsyncResetN has_reset = reset_type == m.Reset has_resetn = reset_type == m.ResetN self.io += m.ClockIO(has_enable=has_enable, has_async_reset=has_async_reset, has_async_resetn=has_async_resetn, has_reset=has_reset, has_resetn=has_resetn) regs = (m.Register(T, init=init[i], has_enable=has_enable, reset_type=reset_type)() for i in range(n)) # TODO: Default clock wiring logic raises warning inside scan self.io.O @= m.scan(regs, scanargs={"I": "O"})(self.io.I)
class DUT(m.Circuit): io = m.IO(done=m.Out(m.Bit)) + m.ClockIO() imm = ImmGen(32)() ctrl = Control(32)() counter = mantle.CounterModM(len(insts), len(insts).bit_length()) i = m.mux([iimm(i) for i in insts], counter.O) s = m.mux([simm(i) for i in insts], counter.O) b = m.mux([bimm(i) for i in insts], counter.O) u = m.mux([uimm(i) for i in insts], counter.O) j = m.mux([jimm(i) for i in insts], counter.O) z = m.mux([zimm(i) for i in insts], counter.O) x = m.mux([iimm(i) & -2 for i in insts], counter.O) O = m.mux([ m.mux([ m.mux([ m.mux([ m.mux([m.mux([x, z], ctrl.imm_sel == IMM_Z), j], ctrl.imm_sel == IMM_J), u ], ctrl.imm_sel == IMM_U), b ], ctrl.imm_sel == IMM_B), s ], ctrl.imm_sel == IMM_S), i ], ctrl.imm_sel == IMM_I) inst = m.mux(insts, counter.O) ctrl.inst @= inst imm.inst @= inst imm.sel @= ctrl.imm_sel io.done @= counter.COUT f.assert_immediate(imm.O == O, failure_msg=("Counter: %d, Type: 0x%x, O: %x ?= %x", counter.O, imm.sel, imm.O, O)) m.display("Counter: %d, Type: 0x%x, O: %x ?= %x", counter.O, imm.sel, imm.O, O)
def __init__(self, name, height: int, width: int, backend: str = "magma", write_forward=True): """ write_forward: (bool, default True) selects whether a read of a written address returns the new value to be written (combinational forward from write port) or the old value at the address (current register output) """ super().__init__(name) self._data_width = width self._height = height self._addr_width = m.bitutils.clog2(height) self._read_ports = [] self._write_ports = [] self._enable_ports = {} self._readT = _make_read_type(self._data_width, self._addr_width) self._writeT = _make_write_type(self._data_width, self._addr_width) clocks = m.ClockIO(has_async_reset=True).decl() for name, typ in zip(clocks[::2], clocks[1::2]): self._add_port(name, typ) self.backend = backend self.write_forward = write_forward
class _Circuit(m.Circuit): __test__ = False # Disable pytest discovery name = circ_name io = m.IO(I=m.In(T), O=m.Out(T)) if has_clk: io += m.ClockIO() m.wire(io.I, io.O)
def __init__(self, T, entries, pipe=False, flow=False): assert entries >= 0 self.io = m.IO( # Flipped since enq/deq is from perspective of the client enq=m.DeqIO[T], deq=m.EnqIO[T], count=m.Out(m.UInt[m.bitutils.clog2(entries + 1)])) + m.ClockIO() ram = m.Memory(entries, T)() enq_ptr = mantle.CounterModM(entries, entries.bit_length(), has_ce=True, cout=False) deq_ptr = mantle.CounterModM(entries, entries.bit_length(), has_ce=True, cout=False) maybe_full = m.Register(init=False, has_enable=True)() ptr_match = enq_ptr.O == deq_ptr.O empty = ptr_match & ~maybe_full.O full = ptr_match & maybe_full.O self.io.deq.valid @= ~empty self.io.enq.ready @= ~full do_enq = self.io.enq.fired() do_deq = self.io.deq.fired() ram.write(self.io.enq.data, enq_ptr.O[:-1], m.enable(do_enq)) enq_ptr.CE @= m.enable(do_enq) deq_ptr.CE @= m.enable(do_deq) maybe_full.I @= m.enable(do_enq) maybe_full.CE @= m.enable(do_enq != do_deq) self.io.deq.data @= ram[deq_ptr.O[:-1]] if flow: raise NotImplementedError() if pipe: raise NotImplementedError() def ispow2(n): return (n & (n - 1) == 0) and n != 0 count_len = len(self.io.count) if ispow2(entries): self.io.count @= m.mux([m.bits(0, count_len), entries], maybe_full.O & ptr_match) else: ptr_diff = enq_ptr.O - deq_ptr.O self.io.count @= m.mux([ m.mux([m.bits(0, count_len), entries], maybe_full.O), m.mux([ptr_diff, entries + ptr_diff], deq_ptr.O > enq_ptr.O) ], ptr_match)
class BrCond_DUT(m.Circuit): _IGNORE_UNUSED_ = True io = m.IO( done=m.Out(m.Bit), out=m.Out(m.Bit), taken=m.Out(m.Bit) ) + m.ClockIO() br_cond = BrCond(x_len)() io.taken @= br_cond.taken control = Control(x_len)() br_cond.br_type @= control.br_type insts = [ B(Funct3.BEQ, 0, 0, 0), B(Funct3.BNE, 0, 0, 0), B(Funct3.BLT, 0, 0, 0), B(Funct3.BGE, 0, 0, 0), B(Funct3.BLTU, 0, 0, 0), B(Funct3.BGEU, 0, 0, 0), ] * 10 n = len(insts) counter = CounterModM(n, n.bit_length()) control.inst @= m.mux(insts, counter.O) io.done @= counter.COUT rs1 = [BV.random(x_len) for _ in range(n)] rs2 = [BV.random(x_len) for _ in range(n)] br_cond.rs1 @= m.mux(rs1, counter.O) br_cond.rs2 @= m.mux(rs2, counter.O) eq = [a == b for a, b in zip(rs1, rs2)] ne = [a != b for a, b in zip(rs1, rs2)] lt = [m.sint(a) < m.sint(b) for a, b in zip(rs1, rs2)] ge = [m.sint(a) >= m.sint(b) for a, b in zip(rs1, rs2)] ltu = [a < b for a, b in zip(rs1, rs2)] geu = [a >= b for a, b in zip(rs1, rs2)] @m.inline_combinational() def logic(): if control.br_type == BR_EQ: io.out @= m.mux(eq, counter.O) elif control.br_type == BR_NE: io.out @= m.mux(ne, counter.O) elif control.br_type == BR_LT: io.out @= m.mux(lt, counter.O) elif control.br_type == BR_GE: io.out @= m.mux(ge, counter.O) elif control.br_type == BR_LTU: io.out @= m.mux(ltu, counter.O) elif control.br_type == BR_GEU: io.out @= m.mux(geu, counter.O) else: io.out @= False
class ALUTile(m.Circuit): io = m.IO(a=m.In(m.UInt[16]), b=m.In(m.UInt[16]), config_data=m.In(m.UInt[2]), config_en=m.In(m.Enable), c=m.Out(m.UInt[16])) + m.ClockIO() config_reg = m.Register(m.Bits[2], has_enable=True)() config_reg.CE @= io.config_en config_reg.I @= io.config_data alu = ALUCore() io.c @= alu(io.a, io.b, config_reg.O)
class _Main(m.Circuit): name = _name io = m.IO(write_addr=m.In(m.Bits[addr_width]), write_data=m.In(m.Bits[data_width]), read_addr=m.In(m.Bits[addr_width]), read_data=m.Out( m.Bits[data_width])) + m.ClockIO(has_async_reset=True) reg_file = mantle.RegFileBuilder("my_regfile", height, data_width, backend=backend, write_forward=write_forward) reg_file[io.write_addr] = io.write_data io.read_data @= reg_file[io.read_addr]
class _Main(m.Circuit): name = f"test_regfile_enable_{backend}" io = m.IO(write_addr=m.In(m.Bits[addr_width]), write_data=m.In(m.Bits[data_width]), write_enable=m.In(m.Enable), read_addr=m.In(m.Bits[addr_width]), read_data=m.Out( m.Bits[data_width])) + m.ClockIO(has_async_reset=True) reg_file = mantle.RegFileBuilder("my_regfile", height, data_width, backend=backend) reg_file.write(io.write_addr, io.write_data, enable=io.write_enable) io.read_data @= reg_file[io.read_addr]
def __init__(self, wordWidth: int, metricWidth: int, idx: int): self.io = io = m.IO( inputFeatureOne=m.In(m.UInt[wordWidth]), inputFeatureTwo=m.In(m.UInt[wordWidth]), inputMetric=m.In(m.UInt[metricWidth]), inputValid=m.In(m.Bit), shiftMode=m.In( m.Bit ), # one cycle pause required between last inputValid and start of shiftMode doShift=m.In(m.Bit), neighborOutputIn=m.In(m.UInt[64]), out=m.Out(m.UInt[64])) + m.ClockIO(has_reset=True) ram_size = 1 << (2 * wordWidth) bram = PairMem(ram_size)() lastFeatureOne = reg_next(io.inputFeatureOne) lastFeatureTwo = reg_next(io.inputFeatureTwo) lastMetric = reg_next(io.inputMetric) lastInputValid = reg_next_init(io.inputValid, False) if idx >= 800 and idx < 2479: # BRAM lastWrite = reg_next(bram.WDATA) collision = reg_next((bram.RADDR == bram.WADDR) & bram.WEN) readData = m.mux([bram.RDATA, lastWrite], collision) else: readData = bram.RDATA outputCounter = reg_init(m.UInt[2 * wordWidth], 0) io.out @= bram.RDATA @m.inline_combinational() def logic(): if io.doShift: outputCounter.I @= outputCounter.O + 1 # wraps around else: outputCounter.I @= outputCounter.O # default required if io.shiftMode: bram.RADDR @= outputCounter.O + 1 if io.doShift else outputCounter.O bram.WDATA @= io.neighborOutputIn bram.WADDR @= outputCounter.O bram.WE @= io.doShift else: bram.RADDR @= io.inputFeatureTwo.concat(io.inputFeatureOne) bram.WDATA @= ( readData[:32] + m.zext_to(lastMetric, 32)).concat(readData[32:] + 1) bram.WADDR @= lastFeatureTwo.concat(lastFeatureOne) bram.WE @= lastInputValid
def __init__(self, x_len: int): self.io = io = m.IO( raddr1=m.In(m.UInt[5]), raddr2=m.In(m.UInt[5]), rdata1=m.Out(m.UInt[x_len]), rdata2=m.Out(m.UInt[x_len]), wen=m.In(m.Enable), waddr=m.In(m.UInt[5]), wdata=m.In(m.UInt[x_len]) ) + m.ClockIO(has_reset=True) regs = RegFileBuilder("reg_file", 32, x_len, write_forward=False, reset_type=m.Reset, backend="verilog") io.rdata1 @= m.mux([0, regs[io.raddr1]], io.raddr1.reduce_or()) io.rdata2 @= m.mux([0, regs[io.raddr2]], io.raddr2.reduce_or()) wen = m.bit(io.wen) & io.waddr.reduce_or() regs.write(io.waddr, io.wdata, enable=m.enable(wen))
class SimpleALU(m.Circuit): io = m.IO( a=m.In(m.UInt[16]), b=m.In(m.UInt[16]), c=m.Out(m.UInt[16]), config_data=m.In(m.Bits[2]), config_en=m.In(m.Enable), ) + m.ClockIO() opcode = ConfigReg(name="config_reg")(io.config_data, CE=io.config_en) io.c @= mantle.mux( # udiv not implemented # [io.a + io.b, io.a - io.b, io.a * io.b, io.a / io.b], opcode) # use arbitrary fourth op [io.a + io.b, io.a - io.b, io.a * io.b, io.b - io.a], opcode)