def __init__(self, x_len): super().__init__(x_len) inst = self.io.inst Iimm = m.sext_to(m.sint(inst[20:32]), x_len) Simm = m.sext_to(m.sint(m.concat(inst[7:12], inst[25:32])), x_len) Bimm = m.sext_to( m.sint( m.concat(m.bits(0, 1), inst[8:12], inst[25:31], inst[7], inst[31])), x_len) Uimm = m.concat(m.bits(0, 12), inst[12:32]) Jimm = m.sext_to( m.sint( m.concat(m.bits(0, 1), inst[21:25], inst[25:31], inst[20], inst[12:20], inst[31])), x_len) Zimm = m.sint(m.zext_to(inst[15:20], x_len)) self.io.O @= m.uint( m.dict_lookup( { IMM_I: Iimm, IMM_S: Simm, IMM_B: Bimm, IMM_U: Uimm, IMM_J: Jimm, IMM_Z: Zimm }, self.io.sel, Iimm & -2))
def definition(io): edge_r = rising(io.SCK) edge_f = falling(io.SCK) # pixels come 16 bits (high and low byte) at a time bit_counter = mantle.Counter(4, has_ce=True, has_reset=True) m.wire(edge_r, bit_counter.CE) # find when the high and low byte are valid low = mantle.Decode(15, 4)(bit_counter.O) high = mantle.Decode(7, 4)(bit_counter.O) # shift registers to store high and low byte low_byte = mantle.PIPO(8, has_ce=True) high_byte = mantle.PIPO(8, has_ce=True) low_byte(0, io.DATA, low) high_byte(0, io.DATA, high) m.wire(low, low_byte.CE) m.wire(high, high_byte.CE) # assemble the 16-bit RGB565 value px_bits = (m.uint(mantle.LSL(16)((m.uint(m.concat(high_byte.O, zeros))), m.bits(8, 4))) + m.uint(m.concat(low_byte.O, zeros))) # extract the values for each color r_val = m.uint(mantle.LSR(16)((px_bits & RMASK), m.bits(11, 4))) g_val = m.uint(mantle.LSR(16)((px_bits & GMASK), m.bits(5, 4))) b_val = m.uint(px_bits & BMASK) # sum them to get grayscale (0 to 125) px_val = (r_val + g_val + b_val) # --------------------------UART OUTPUT---------------------------- # # run 16-bit UART at 2x speed baud = edge_r | edge_f # reset at start of pixel transfer ff1 = mantle.FF(has_ce=True) m.wire(baud, ff1.CE) u_reset = mantle.LUT2(I0 & ~I1)(io.VALID, ff1(io.VALID)) m.wire(u_reset, bit_counter.RESET) # generate load signal ff2 = mantle.FF(has_ce=True) m.wire(baud, ff2.CE) load = mantle.LUT3(I0 & I1 & ~I2)(io.VALID, high, ff2(high)) uart = UART(16) uart(CLK=io.CLK, BAUD=baud, DATA=px_val, LOAD=load) m.wire(px_val, io.PXV) m.wire(uart, io.UART) m.wire(load, io.LOAD)
def get_hash_mask(sample_size: SampleSize) -> (m.Bits(4)): if sample_size == SampleSize.ONE_PIXEL: hash_mask = m.repeat(m.bit(0), 4) elif sample_size == SampleSize.HALF_PIXEL: hash_mask = m.concat(m.bit(1), m.repeat(m.bit(0), 3)) elif sample_size == SampleSize.QUARTER_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 2), m.repeat(m.bit(0), 2)) else: hash_mask = m.concat(m.repeat(m.bit(1), 3), m.bit(0)) return (hash_mask)
def definition(io): arr32 = m.concat(\ m.bits(io.data_in[0:8]) ^ m.bits(io.data_in[8:16]),\ m.bits(io.data_in[8:16]) ^ m.bits(io.data_in[16:24]),\ m.bits(io.data_in[16:24]) ^ m.bits(io.data_in[24:32]),\ m.bits(io.data_in[24:32]) ^ m.bits(io.data_in[32:40])) arr16 = m.concat(m.bits(arr32[0:8]) ^ m.bits(arr32[16:24]),\ m.bits(arr32[8:16]) ^ m.bits(arr32[24:32])) m.wire(io.data_out, (m.bits(arr16[0:8]) ^ m.bits(arr16[8:16])) & m.bits(io.mask[0:8]))
def get_hash_mask(sample_size: SampleSize) -> (m.Bits(8)): if sample_size == SampleSize.ONE_PIXEL: hash_mask = m.repeat(m.bit(1), 8) elif sample_size == SampleSize.HALF_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 7), m.repeat(m.bit(0), 1)) elif sample_size == SampleSize.QUARTER_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 6), m.repeat(m.bit(0), 2)) else: #elif sample_size == SampleSize.EIGHTH_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 5), m.repeat(m.bit(0), 3)) return (hash_mask)
def _get_coreir_op_drivers(ckt, name, sel): assert name == "out" op_name = ckt.coreir_name if ckt.coreir_lib == "corebit": assert type(sel) is m.value_utils.Selector assert sel.child is None if op_name == "not": return [ckt.I] return [ckt.I0, ckt.I1] assert ckt.coreir_lib == "coreir" if isinstance(sel, m.value_utils.ArraySelector): assert sel.child is None N = len(ckt.O) n = sel.index else: # cmp op assert type(sel) is m.value_utils.Selector assert isinstance(ckt.O, m.Bit) N = 1 n = 0 if op_name == "orr": op = lambda x: functools.reduce(operator.or_, x) elif op_name == "andr": op = lambda x: functools.reduce(operator.and_, x) else: op = getattr(operator, m.primitive_to_python(op_name)) if op_name in ("not", "neg", "orr", "andr"): inputs = list(ckt.I) else: inputs = list(m.concat(ckt.I0, ckt.I1)) op = binop_to_unop(op) op = WrappedOp(op_name, op) M = len(inputs) tests = (test_op(op, M, N, m, n) for m in range(M)) return [inputs[i] for i, t in enumerate(tests) if t]
def __init__(self, x_len: int): super().__init__(x_len) io = self.io sum_ = io.A + m.mux([io.B, -io.B], io.op[0]) cmp = m.uint(m.mux([m.mux([io.A[-1], io.B[-1]], io.op[1]), sum_[-1]], io.A[-1] == io.B[-1]), x_len) shin = m.mux([io.A[::-1], io.A], io.op[3]) shiftr = m.uint(m.sint( m.concat(shin, io.op[0] & shin[x_len - 1]) ) >> m.sint(m.zext(io.B, 1)))[:x_len] shiftl = shiftr[::-1] @m.inline_combinational() def alu(): if (io.op == ALUOP.ADD) | (io.op == ALUOP.SUB): io.O @= sum_ elif (io.op == ALUOP.SLT) | (io.op == ALUOP.SLTU): io.O @= cmp elif (io.op == ALUOP.SRA) | (io.op == ALUOP.SRL): io.O @= shiftr elif io.op == ALUOP.SLL: io.O @= shiftl elif io.op == ALUOP.AND: io.O @= io.A & io.B elif io.op == ALUOP.OR: io.O @= io.A | io.B elif io.op == ALUOP.XOR: io.O @= io.A ^ io.B elif io.op == ALUOP.COPY_A: io.O @= io.A else: io.O @= io.B io.sum_ @= sum_
def txmod_logic( data: m.Bits(8), writing: m.Bit, valid: m.Bit, dataStore: m.Bits(11), writeClock: m.Bits(14), writeBit: m.Bits(4), ) -> ( m.Bit, m.Bits(11), m.Bits(14), m.Bits(4), m.Bit, ): if (writing == m.bit(0)) & (valid == m.bit(1)): writing_out = m.bit(1) dataStore_out = m.concat(dataStore[0:1], data, dataStore[9:]) writeClock_out = m.bits(100, 14) writeBit_out = m.bits(0, 4) TXReg_out = dataStore[0] elif (writing == m.bit(1)) & \ (writeClock == m.bits(0, 14)) & \ (writeBit == m.bits(9, 4)): dataStore_out = dataStore writeClock_out = writeClock writeBit_out = writeBit TXReg_out = m.bit(1) writing_out = m.bit(0) elif (writing == m.bit(1)) & (writeClock == m.bits(0, 14)): writing_out = writing dataStore_out = dataStore TXReg_out = dataStore[writeBit] writeBit_out = m.bits(m.uint(writeBit) + m.bits(1, 4)) writeClock_out = m.bits(100, 14) elif writing == m.bit(1): writing_out = writing dataStore_out = dataStore writeBit_out = writeBit TXReg_out = dataStore[writeBit] writeClock_out = m.bits(m.uint(writeClock) - m.bits(1, 14)) else: writing_out = writing dataStore_out = dataStore writeClock_out = writeClock writeBit_out = writeBit TXReg_out = m.bit(1) return ( writing_out, dataStore_out, writeClock_out, writeBit_out, TXReg_out, )
def test_compile_coreir(): width = 16 numInputs = 4 doubleT = magma.Bits[width] double = magma.DefineCircuit("double", "I", magma.In(doubleT), "O", magma.Out(doubleT)) shift_amount = 2 output = magma.concat(double.I[shift_amount:width], magma.bits(0, shift_amount)) magma.wire(output, double.O) coreir_double = magma.backend.coreir.coreir_.compile(double) c = coreir_double.context def get_lib(lib): if lib in {"coreir", "mantle", "corebit"}: return c.get_namespace(lib) elif lib == "global": return c.global_namespace else: return c.load_library(lib) def import_(lib, name): return get_lib(lib).generators[name] mapParallelParams = c.new_values({ "numInputs": numInputs, "operator": coreir_double }) test_module_typ = c.Record({ "in": c.Array(numInputs, c.Array(width, c.BitIn())), "out": c.Array(numInputs, c.Array(width, c.Bit())) }) test_module = c.global_namespace.new_module("test_module", test_module_typ) test_module_def = test_module.new_definition() mapParallel = import_("aetherlinglib", "mapParallel") mapMod = mapParallel(numInputs=numInputs, operator=coreir_double) mapDouble = test_module_def.add_module_instance("mapDouble", mapMod) test_module_def.connect(test_module_def.interface.select("in"), mapDouble.select("I")) test_module_def.connect(mapDouble.select("O"), test_module_def.interface.select("out")) test_module_def.print_() test_module.definition = test_module_def test_module.print_() dir_path = os.path.dirname(os.path.realpath(__file__)) test_module.save_to_file(os.path.join(dir_path, "mapParallel_test.json")) with open(os.path.join(dir_path, "mapParallel_test.json"), "r") as actual: with open(os.path.join(dir_path, "mapParallel_test_gold.json"), "r") as gold: assert actual.read() == gold.read() mod = c.load_from_file(os.path.join(dir_path, "mapParallel_test.json")) mod.print_()
def flatten_fields_to_bits(tuple_): # Take a tuple and flatten it into a Bits representation so it can be used # in a RAM assert isinstance(tuple_, m.TupleType) fields = [] for value in tuple_: # Promote Bit to Bits so we can concat if isinstance(value, m.BitType): value = m.bits(value, 1) fields.append(value) return m.concat(*fields)
def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int): b_bits = b_bytes << 3 b_len = m.bitutils.clog2(b_bytes) s_len = m.bitutils.clog2(n_sets) t_len = x_len - (s_len + b_len) n_words = b_bits // x_len w_bytes = x_len // 8 byte_offset_bits = m.bitutils.clog2(w_bytes) nasti_params = NastiParameters(data_bits=64, addr_bits=x_len, id_bits=5) data_beats = b_bits // nasti_params.x_data_bits class MetaData(m.Product): tag = m.UInt[t_len] self.io = m.IO(**make_cache_ports(x_len, nasti_params)) self.io += m.ClockIO() class State(m.Enum): IDLE = 0 READ_CACHE = 1 WRITE_CACHE = 2 WRITE_BACK = 3 WRITE_ACK = 4 REFILL_READY = 5 REFILL = 6 state = m.Register(init=State.IDLE)() # memory v = m.Register(m.UInt[n_sets], has_enable=True)() d = m.Register(m.UInt[n_sets], has_enable=True)() meta_mem = m.Memory(n_sets, MetaData, read_latency=1, has_read_enable=True)() data_mem = [ ArrayMaskMem(n_sets, w_bytes, m.UInt[8], read_latency=1, has_read_enable=True)() for _ in range(n_words) ] addr_reg = m.Register(type(self.io.cpu.req.data.addr).undirected_t, has_enable=True)() cpu_data = m.Register(type(self.io.cpu.req.data.data).undirected_t, has_enable=True)() cpu_mask = m.Register(type(self.io.cpu.req.data.mask).undirected_t, has_enable=True)() self.io.nasti.r.ready @= state.O == State.REFILL # Counters assert data_beats > 0 if data_beats > 1: read_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) read_counter.CE @= m.enable(self.io.nasti.r.fired()) read_count, read_wrap_out = read_counter.O, read_counter.COUT write_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) write_count, write_wrap_out = write_counter.O, write_counter.COUT else: read_count, read_wrap_out = 0, 1 write_count, write_wrap_out = 0, 1 refill_buf = m.Register(m.Array[data_beats, m.UInt[nasti_params.x_data_bits]], has_enable=True)() if data_beats == 1: refill_buf.I[0] @= self.io.nasti.r.data.data else: refill_buf.I @= m.set_index(refill_buf.O, self.io.nasti.r.data.data, read_count[:-1]) refill_buf.CE @= m.enable(self.io.nasti.r.fired()) is_idle = state.O == State.IDLE is_read = state.O == State.READ_CACHE is_write = state.O == State.WRITE_CACHE is_alloc = (state.O == State.REFILL) & read_wrap_out # m.display("[%0t]: is_alloc = %x", m.time(), is_alloc)\ # .when(m.posedge(self.io.CLK)) is_alloc_reg = m.Register(m.Bit)()(is_alloc) hit = m.Bit(name="hit") wen = is_write & (hit | is_alloc_reg) & ~self.io.cpu.abort | is_alloc # m.display("[%0t]: wen = %x", m.time(), wen)\ # .when(m.posedge(self.io.CLK)) ren = m.enable(~wen & (is_idle | is_read) & self.io.cpu.req.valid) ren_reg = m.enable(m.Register(m.Bit)()(ren)) addr = self.io.cpu.req.data.addr idx = addr[b_len:s_len + b_len] tag_reg = addr_reg.O[s_len + b_len:x_len] idx_reg = addr_reg.O[b_len:s_len + b_len] off_reg = addr_reg.O[byte_offset_bits:b_len] rmeta = meta_mem.read(idx, ren) rdata = m.concat(*(mem.read(idx, ren) for mem in data_mem)) rdata_buf = m.Register(type(rdata), has_enable=True)()(rdata, CE=ren_reg) read = m.mux([ m.as_bits(m.mux([rdata_buf, rdata], ren_reg)), m.as_bits(refill_buf.O) ], is_alloc_reg) # m.display("is_alloc_reg=%x", is_alloc_reg)\ # .when(m.posedge(self.io.CLK)) hit @= v.O[idx_reg] & (rmeta.tag == tag_reg) # read mux self.io.cpu.resp.data.data @= m.array( [read[i * x_len:(i + 1) * x_len] for i in range(n_words)])[off_reg] self.io.cpu.resp.valid @= (is_idle | (is_read & hit) | (is_alloc_reg & ~cpu_mask.O.reduce_or())) m.display("resp.valid=%x", self.io.cpu.resp.valid.value())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: valid = %x", m.time(), self.io.cpu.resp.valid.value())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: is_idle = %x, is_read = %x, hit = %x, is_alloc_reg = " "%x, ~cpu_mask.O.reduce_or() = %x", m.time(), is_idle, is_read, hit, is_alloc_reg, ~cpu_mask.O.reduce_or())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: refill_buf.O=%x, %x", m.time(), *refill_buf.O)\ .when(m.posedge(self.io.CLK))\ .if_(self.io.cpu.resp.valid.value() & is_alloc_reg) m.display("[%0t]: read=%x", m.time(), read)\ .when(m.posedge(self.io.CLK))\ .if_(self.io.cpu.resp.valid.value() & is_alloc_reg) addr_reg.I @= addr addr_reg.CE @= m.enable(self.io.cpu.resp.valid.value()) cpu_data.I @= self.io.cpu.req.data.data cpu_data.CE @= m.enable(self.io.cpu.resp.valid.value()) cpu_mask.I @= self.io.cpu.req.data.mask cpu_mask.CE @= m.enable(self.io.cpu.resp.valid.value()) wmeta = MetaData(name="wmeta") wmeta.tag @= tag_reg offset_mask = (m.zext_to(cpu_mask.O, w_bytes * 8) << m.concat( m.bits(0, byte_offset_bits), off_reg)) wmask = m.mux([m.SInt[w_bytes * 8](-1), m.sint(offset_mask)], ~is_alloc) if len(refill_buf.O) == 1: wdata_alloc = self.io.nasti.r.data.data else: wdata_alloc = m.concat( # TODO: not sure why they use `init.reverse` # https://github.com/ucb-bar/riscv-mini/blob/release/src/main/scala/Cache.scala#L116 m.concat(*refill_buf.O[:-1]), self.io.nasti.r.data.data) wdata = m.mux([wdata_alloc, m.as_bits(m.repeat(cpu_data.O, n_words))], ~is_alloc) v.I @= m.set_index(v.O, m.bit(True), idx_reg) v.CE @= m.enable(wen) d.I @= m.set_index(d.O, ~is_alloc, idx_reg) d.CE @= m.enable(wen) # m.display("[%0t]: refill_buf.O = %x", m.time(), # m.concat(*refill_buf.O)).when(m.posedge(self.io.CLK)).if_(wen) # m.display("[%0t]: nasti.r.data.data = %x", m.time(), # self.io.nasti.r.data.data).when(m.posedge(self.io.CLK)).if_(wen) meta_mem.write(wmeta, idx_reg, m.enable(wen & is_alloc)) for i, mem in enumerate(data_mem): data = [ wdata[i * x_len + j * 8:i * x_len + (j + 1) * 8] for j in range(w_bytes) ] mem.write(m.array(data), idx_reg, wmask[i * w_bytes:(i + 1) * w_bytes], m.enable(wen)) # m.display("[%0t]: wdata = %x, %x, %x, %x", m.time(), # *mem.WDATA.value()).when(m.posedge(self.io.CLK)).if_(wen) # m.display("[%0t]: wmask = %x, %x, %x, %x", m.time(), # *mem.WMASK.value()).when(m.posedge(self.io.CLK)).if_(wen) tag_and_idx = m.zext_to(m.concat(idx_reg, tag_reg), nasti_params.x_addr_bits) self.io.nasti.ar.data @= NastiReadAddressChannel( nasti_params, 0, tag_and_idx << m.Bits[len(tag_and_idx)](b_len), m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1) rmeta_and_idx = m.zext_to(m.concat(idx_reg, rmeta.tag), nasti_params.x_addr_bits) self.io.nasti.aw.data @= NastiWriteAddressChannel( nasti_params, 0, rmeta_and_idx << m.Bits[len(rmeta_and_idx)](b_len), m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1) self.io.nasti.w.data @= NastiWriteDataChannel( nasti_params, m.array([ read[i * nasti_params.x_data_bits:(i + 1) * nasti_params.x_data_bits] for i in range(data_beats) ])[write_count[:-1]], None, write_wrap_out) is_dirty = v.O[idx_reg] & d.O[idx_reg] # TODO: Have to use temporary so we can invoke `fired()` aw_valid = m.Bit(name="aw_valid") self.io.nasti.aw.valid @= aw_valid ar_valid = m.Bit(name="ar_valid") self.io.nasti.ar.valid @= ar_valid b_ready = m.Bit(name="b_ready") self.io.nasti.b.ready @= b_ready @m.inline_combinational() def logic(): state.I @= state.O aw_valid @= False ar_valid @= False self.io.nasti.w.valid @= False b_ready @= False if state.O == State.IDLE: if self.io.cpu.req.valid: if self.io.cpu.req.data.mask.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.READ_CACHE elif state.O == State.READ_CACHE: if hit: if self.io.cpu.req.valid: if self.io.cpu.req.data.mask.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.READ_CACHE else: state.I @= State.IDLE else: aw_valid @= is_dirty ar_valid @= ~is_dirty if self.io.nasti.aw.fired(): state.I @= State.WRITE_BACK elif self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.WRITE_CACHE: if hit | is_alloc_reg | self.io.cpu.abort: state.I @= State.IDLE else: aw_valid @= is_dirty ar_valid @= ~is_dirty if self.io.nasti.aw.fired(): state.I @= State.WRITE_BACK elif self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.WRITE_BACK: self.io.nasti.w.valid @= True if write_wrap_out: state.I @= State.WRITE_ACK elif state.O == State.WRITE_ACK: b_ready @= True if self.io.nasti.b.fired(): state.I @= State.REFILL_READY elif state.O == State.REFILL_READY: ar_valid @= True if self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.REFILL: if read_wrap_out: if cpu_mask.O.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.IDLE if data_beats > 1: # TODO: Have to do this at the end since the inline comb logic # wires up nasti.w write_counter.CE @= m.enable(self.io.nasti.w.fired())
from magma import uint, bits, concat, wire, compile, EndCircuit from mantle import UGT from loam.boards.icestick import IceStick icestick = IceStick() for i in range(4): icestick.J1[i].input().on() icestick.D1.on() main = icestick.main() A = uint(concat(main.J1[0:2], bits(0, 6))) B = uint(concat(main.J1[2:4], bits(0, 6))) O = main.D1 ugt = UGT(8) wire(ugt(A, B), main.D1) EndCircuit()
def definition(io): # ------------------- # Your code goes here # ------------------- # You may define any combinational functions you may need # Finally, assign values to # box_clamped # box_valid # These signals feed into the pipeline registers x_comp = m.concat(\ m.bits(io.poly_in[0][0]) <= m.bits(io.poly_in[1][0]), \ m.bits(io.poly_in[1][0]) <= m.bits(io.poly_in[2][0]), \ m.bits(io.poly_in[0][0]) <= m.bits(io.poly_in[2][0])) y_comp = m.concat(\ m.bits(io.poly_in[0][1]) <= m.bits(io.poly_in[1][1]), \ m.bits(io.poly_in[1][1]) <= m.bits(io.poly_in[2][1]), \ m.bits(io.poly_in[0][1]) <= m.bits(io.poly_in[2][1])) (ll_x, ur_x) = io.return_ll_ur(x_comp, m.bit(0)) (ll_y, ur_y) = io.return_ll_ur(y_comp, m.bit(1)) (hash_mask) = io.get_hash_mask(io.sample_size) box_init = Polygon(2, 2, bits) rounded_box = Polygon(2, 2, bits) box_clamped = Polygon(2, 2, bits) box_init[0][0] = ll_x box_init[1][0] = ur_x box_init[0][1] = ll_y box_init[1][1] = ur_y m.wire(box_init[0][0][fractional_bits:bits - 1], rounded_box[0][0][fractional_bits:bits - 1]) m.wire(box_init[0][1][fractional_bits:bits - 1], rounded_box[0][1][fractional_bits:bits - 1]) m.wire(box_init[1][0][fractional_bits:bits - 1], rounded_box[1][0][fractional_bits:bits - 1]) m.wire(box_init[1][1][fractional_bits:bits - 1], rounded_box[1][1][fractional_bits:bits - 1]) m.wire( box_init[0][0][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[0][0][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[0][1][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[0][1][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[1][0][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[1][0][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[1][1][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[1][1][6:fractional_bits - 1] & hash_mask))) box_clamped[0][0] = mux(rounded_box[0][0], m.repeat(m.bit(0), bits), (rounded_box[0][0] < 0)) box_clamped[1][0] = mux(rounded_box[1][0], io.screen_max[0], (rounded_box[1][0] > io.screen_max[0])) box_clamped[0][1] = mux(rounded_box[0][1], m.repeat(m.bit(0), bits), (rounded_box[0][1] < 0)) box_clamped[1][1] = mux(rounded_box[1][1], io.screen_max[1], (rounded_box[1][1] > io.screen_max[1])) box_valid = io.valid_in & ~( (rounded_box[0][0] < 0) | (rounded_box[1][0] > io.screen_max[0]) | (rounded_box[0][1] < 0) | (rounded_box[1][1] > io.screen_max[1])) # ------------------- # Your code goes here # ------------------- # Put values into pipeline registers def wire_reg(reg, reg_input, reg_output=None): m.wire(reg_input, reg.data_in) m.wire(reg.clk, io.CLK) m.wire(reg.reset, io.RESET) m.wire(reg.en, io.halt[0]) if reg_output is not None: m.wire(reg.data_out, reg_output) poly_retime_r = dff.DefineDFF3(axes, vertices, bits, pipe_depth - 1, 1)() wire_reg(poly_retime_r, io.poly_in) poly_r = dff.DefineDFF3(axes, vertices, bits, 1, 0)() wire_reg(poly_r, poly_retime_r.data_out, io.poly_out) color_retime_r = dff.DefineDFF2(color_channels, bits, pipe_depth - 1, 1)() wire_reg(color_retime_r, io.color_in) color_r = dff.DefineDFF2(color_channels, bits, 1, 0)() wire_reg(color_r, color_retime_r.data_out, io.color_out) box_retime_r = dff.DefineDFF3(2, 2, bits, pipe_depth - 1, 1)() wire_reg(box_retime_r, box_clamped) box_r = dff.DefineDFF3(2, 2, bits, 1, 0)() wire_reg(box_r, box_retime_r.data_out, io.box) valid_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(valid_retime_r, box_valid) valid_r = dff.DefineDFF(1, 1, 0)() wire_reg(valid_r, valid_retime_r.data_out, io.valid_out) is_quad_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(is_quad_retime_r, m.bits(io.is_quad_in)) is_quad_r = dff.DefineDFF(1, 1, 0)() wire_reg(is_quad_r, is_quad_retime_r.data_out, m.bits(io.is_quad_out))
def definition(io): hash_in_width = (bits - 4) * 2 hash_out_width = fractional_bits - 2 (hash_mask) = io.get_hash_mask(io.sample_size) jitter = [ define_tree_hash(hash_in_width, hash_out_width)() for _ in range(2) ] subsample1 = io.sample_in[1][4:bits] subsample0 = io.sample_in[0][4:bits] m.wire(jitter[0].data_in, m.bits(m.concat(subsample0, subsample1))) m.wire(jitter[0].mask, hash_mask) m.wire(jitter[1].data_in, m.bits(m.concat(subsample1, subsample0))) m.wire(jitter[1].mask, hash_mask) # Jitter the sample coordinates sample_jittered = m.array([ m.bits(io.sample_in[i][0:bits]) | m.concat(m.bits(0, fractional_bits - hash_out_width), m.bits(jitter[i].data_out[0:hash_out_width]), m.bits(0, integer_bits)) for i in range(2) ]) # Put values into pipeline registers def wire_reg(reg, reg_input, reg_output=None): m.wire(reg_input, reg.data_in) m.wire(reg.clk, io.CLK) m.wire(reg.reset, io.RESET) m.wire(reg.en, m.bit(1)) if reg_output is not None: m.wire(reg.data_out, reg_output) poly_retime_r = dff.DefineDFF3(axes, vertices, bits, pipe_depth - 1, 1)() wire_reg(poly_retime_r, io.poly_in) poly_r = dff.DefineDFF3(axes, vertices, bits, 1, 0)() wire_reg(poly_r, poly_retime_r.data_out, io.poly_out) color_retime_r = dff.DefineDFF2(color_channels, bits, pipe_depth - 1, 1)() wire_reg(color_retime_r, io.color_in) color_r = dff.DefineDFF2(color_channels, bits, 1, 0)() wire_reg(color_r, color_retime_r.data_out, io.color_out) is_quad_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(is_quad_retime_r, m.bits(io.is_quad_in)) is_quad_r = dff.DefineDFF(1, 1, 0)() wire_reg(is_quad_r, is_quad_retime_r.data_out, m.bits(io.is_quad_out)) valid_sample_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(valid_sample_retime_r, io.valid_sample_in) valid_sample_r = dff.DefineDFF(1, 1, 0)() wire_reg(valid_sample_r, valid_sample_retime_r.data_out, io.valid_sample_out) sample_retime_r = dff.DefineDFF2(2, bits, pipe_depth - 1, 1)() wire_reg(sample_retime_r, sample_jittered) sample_r = dff.DefineDFF2(2, bits, 1, 0)() wire_reg(sample_r, sample_retime_r.data_out, io.sample_out)
import magma as m from magma.bitutils import clog2 from mantle.util.compressor import PopCount from loam.shields.megawing import MegaWing N = 128 LOGN = min( clog2(N) + 1, 8 ) if m.mantle_target == 'spartan3': from loam.boards.papilioone import PapilioOne as Papilio elif m.mantle_target == 'spartan6': from loam.boards.papiliopro import PapilioPro as Papilio megawing = MegaWing(Papilio) megawing.Switch.on(8) megawing.LED.on(LOGN) main = megawing.main() pop8 = PopCount(N) O = pop8( m.concat(main.SWITCH, m.bits(0,N-8) ) ) m.wire( O[0:LOGN], main.LED ) m.EndCircuit()
class DUT(m.Circuit): io = m.IO(done=m.Out(m.Bit)) + m.ClockIO() x_len = 32 n_sets = 256 b_bytes = 4 * (x_len >> 3) b_len = m.bitutils.clog2(b_bytes) s_len = m.bitutils.clog2(n_sets) t_len = x_len - (s_len + b_len) nasti_params = NastiParameters(data_bits=64, addr_bits=x_len, id_bits=5) dut = Cache(x_len, 1, n_sets, b_bytes)() dut_mem = make_NastiIO(nasti_params).undirected_t(name="dut_mem") dut_mem.ar @= make_Queue(dut.nasti.ar, 32) dut_mem.aw @= make_Queue(dut.nasti.aw, 32) dut_mem.w @= make_Queue(dut.nasti.w, 32) dut.nasti.b @= make_Queue(dut_mem.b, 32) dut.nasti.r @= make_Queue(dut_mem.r, 32) gold = GoldCache(x_len, 1, n_sets, b_bytes)() gold_req = type(gold.req).undirected_t(name="gold_req") gold_resp = type(gold.resp).undirected_t(name="gold_resp") gold_mem = make_NastiIO(nasti_params).undirected_t(name="gold_mem") gold.req @= make_Queue(gold_req, 32) gold_resp @= make_Queue(gold.resp, 32) gold_mem.ar @= make_Queue(gold.nasti.ar, 32) gold_mem.aw @= make_Queue(gold.nasti.aw, 32) gold_mem.w @= make_Queue(gold.nasti.w, 32) gold.nasti.b @= make_Queue(gold_mem.b, 32) gold.nasti.r @= make_Queue(gold_mem.r, 32) size = m.bitutils.clog2(nasti_params.x_data_bits // 8) b_bits = b_bytes << 3 data_beats = b_bits // nasti_params.x_data_bits mem = m.Memory(1 << 20, m.UInt[nasti_params.x_data_bits])() class MemState(m.Enum): IDLE = 0 WRITE = 1 WRITE_ACK = 2 READ = 3 mem_state = m.Register(init=MemState.IDLE)() write_counter = mantle.CounterModM(data_beats, data_beats.bit_length(), has_ce=True) write_counter.CE @= m.enable((mem_state.O == MemState.WRITE) & dut_mem.w.valid & gold_mem.w.valid) read_counter = mantle.CounterModM(data_beats, data_beats.bit_length(), has_ce=True) read_counter.CE @= m.enable((mem_state.O == MemState.READ) & dut_mem.r.ready & gold_mem.r.ready) dut_mem.b.valid @= mem_state.O == MemState.WRITE_ACK dut_mem.b.data @= NastiWriteResponseChannel(nasti_params, 0) dut_mem.r.valid @= mem_state.O == MemState.READ dut_mem.r.data @= NastiReadDataChannel( nasti_params, 0, mem.read( ((gold_mem.ar.data.addr) + m.zext_to(read_counter.O, nasti_params.x_addr_bits))[:20]), read_counter.COUT) gold_mem.ar.ready @= dut_mem.ar.ready gold_mem.aw.ready @= dut_mem.aw.ready gold_mem.w.ready @= dut_mem.w.ready gold_mem.b.valid @= dut_mem.b.valid gold_mem.b.data @= dut_mem.b.data gold_mem.r.valid @= dut_mem.r.valid gold_mem.r.data @= dut_mem.r.data mem_wen0 = m.Bit(name="mem_wen0") mem_wdata0 = m.UInt[nasti_params.x_data_bits](name="mem_wdata0") mem_wen1 = m.Bit(name="mem_wen1") mem_wdata1 = m.UInt[nasti_params.x_data_bits](name="mem_wdata1") mem_waddr1 = m.UInt[20](name="mem_waddr1") mem.write( m.mux([dut_mem.w.data.data, mem_wdata1], mem_wen1), m.mux([((dut_mem.aw.data.addr) + m.zext_to(write_counter.O, nasti_params.x_addr_bits))[:20], mem_waddr1], mem_wen1), m.enable(mem_wen0 | mem_wen1)) # m.display("mem_wen0 = %x, mem_wen1 = %x", mem_wen0, # mem_wen1).when(m.posedge(io.CLK)) # m.display("dut_mem.w.valid = %x", # dut_mem.w.valid).when(m.posedge(io.CLK)) # m.display("gold_mem.w.valid = %x", # gold_mem.w.valid).when(m.posedge(io.CLK)) f.assert_immediate( (mem_state.O != MemState.IDLE) | ~(gold_mem.aw.valid & dut_mem.aw.valid) | (dut_mem.aw.data.addr == gold_mem.aw.data.addr), failure_msg=( "[dut_mem.aw.data.addr] %x != [gold_mem.aw.data.addr] %x", dut_mem.aw.data.addr, gold_mem.aw.data.addr)) f.assert_immediate( (mem_state.O != MemState.IDLE) | ~(gold_mem.aw.valid & dut_mem.aw.valid) | ~(gold_mem.ar.valid & dut_mem.ar.valid) | (dut_mem.ar.data.addr == gold_mem.ar.data.addr), failure_msg=( "[dut_mem.ar.data.addr] %x != [gold_mem.ar.data.addr] %x", dut_mem.ar.data.addr, gold_mem.ar.data.addr)) f.assert_immediate( (mem_state.O != MemState.WRITE) | ~(gold_mem.w.valid & dut_mem.w.valid) | (dut_mem.w.data.data == gold_mem.w.data.data), failure_msg=( "[dut_mem.w.data.data] %x != [gold_mem.w.data.data] %x", dut_mem.w.data.data, gold_mem.w.data.data)) @m.inline_combinational() def mem_fsm(): dut_mem.w.ready @= False dut_mem.aw.ready @= False dut_mem.ar.ready @= False mem_wen0 @= False mem_state.I @= mem_state.O if mem_state.O == MemState.IDLE: if gold_mem.aw.valid & dut_mem.aw.valid: mem_state.I @= MemState.WRITE elif gold_mem.ar.valid & dut_mem.ar.valid: mem_state.I @= MemState.READ elif mem_state.O == MemState.WRITE: if gold_mem.w.valid & dut_mem.w.valid: mem_wen0 @= True dut_mem.w.ready @= True if write_counter.COUT: dut_mem.aw.ready @= True mem_state.I @= MemState.WRITE_ACK elif mem_state.O == MemState.WRITE_ACK: if gold_mem.b.ready & dut_mem.b.ready: mem_state.I @= MemState.IDLE elif mem_state.O == MemState.READ: if read_counter.COUT: dut_mem.ar.ready @= True mem_state.I @= MemState.IDLE if TRACE: m.display("[%0t]: [write] mem[%x] <= %x", m.time(), mem.WADDR.value(), dut_mem.w.data.data).when( m.posedge(io.CLK)).if_(mem_wen0) m.display("[%0t]: [read] mem[%x] => %x", m.time(), mem.RADDR.value(), dut_mem.r.data.data).when(m.posedge( io.CLK)).if_((mem_state.O == MemState.READ) & dut_mem.r.ready & gold_mem.r.ready) def rand_data(nasti_params): rand_data = BitVector[nasti_params.x_data_bits](0) for i in range(nasti_params.x_data_bits // 8): rand_data |= BitVector[nasti_params.x_data_bits]( random.randint(0, 0xff) << (8 * i)) return rand_data def rand_mask(x_len): return BitVector[x_len // 8](random.randint( 1, (1 << (x_len // 8)) - 2)) def make_test(rand_data, nasti_params, x_len): # Wrapper because function definition in side class namespace # doesn't inherit class variables def test(b_bits, tag, idx, off, mask=BitVector[x_len // 8](0)): test_data = rand_data(nasti_params) for i in range((b_bits // nasti_params.x_data_bits) - 1): test_data = test_data.concat(rand_data(nasti_params)) return m.uint(m.concat(off, idx, tag, test_data, mask)) return test test = make_test(rand_data, nasti_params, x_len) tags = [] for _ in range(3): tags.append(BitVector.random(t_len)) idxs = [] for _ in range(2): idxs.append(BitVector.random(s_len)) offs = [] for _ in range(6): offs.append(BitVector.random(b_len) & -4) init_addr = [] init_data = [] _iter = itertools.product(tags, idxs, range(0, data_beats)) for tag, idx, off in _iter: init_addr.append(m.uint(m.concat(BitVector[b_len](off), idx, tag))) init_data.append(rand_data(nasti_params)) test_vec = [ test(b_bits, tags[0], idxs[0], offs[0]), # 0: read miss test(b_bits, tags[0], idxs[0], offs[1]), # 1: read hit test(b_bits, tags[1], idxs[0], offs[0]), # 2: read miss test(b_bits, tags[1], idxs[0], offs[2]), # 3: read hit test(b_bits, tags[1], idxs[0], offs[3]), # 4: read hit test(b_bits, tags[1], idxs[0], offs[4], rand_mask(x_len)), # 5: write hit # noqa test(b_bits, tags[1], idxs[0], offs[4]), # 6: read hit test(b_bits, tags[2], idxs[0], offs[5]), # 7: read miss & write back # noqa test(b_bits, tags[0], idxs[1], offs[0], rand_mask(x_len)), # 8: write miss # noqa test(b_bits, tags[0], idxs[1], offs[0]), # 9: read hit test(b_bits, tags[0], idxs[1], offs[1]), # 10: read hit test(b_bits, tags[1], idxs[1], offs[2], rand_mask(x_len)), # 11: write miss & write back # noqa test(b_bits, tags[1], idxs[1], offs[3]), # 12: read hit test(b_bits, tags[2], idxs[1], offs[4]), # 13: read write back test(b_bits, tags[2], idxs[1], offs[5]) # 14: read hit ] class TestState(m.Enum): INIT = 0 START = 1 WAIT = 2 DONE = 3 state = m.Register(init=TestState.INIT)() timeout = m.Register(m.UInt[32])() init_m = len(init_addr) - 1 init_counter = mantle.CounterModM(init_m, init_m.bit_length(), has_ce=True) init_counter.CE @= m.enable(state.O == TestState.INIT) test_m = len(test_vec) - 1 test_counter = mantle.CounterModM(test_m, test_m.bit_length(), has_ce=True) test_counter.CE @= m.enable(state.O == TestState.DONE) curr_vec = m.mux(test_vec, test_counter.O) mask = (curr_vec >> (b_len + s_len + t_len + b_bits))[:x_len // 8] data = (curr_vec >> (b_len + s_len + t_len))[:b_bits] tag = (curr_vec >> (b_len + s_len))[:t_len] idx = (curr_vec >> b_len)[:s_len] off = curr_vec[:b_len] dut.cpu.req.data.addr @= m.concat(off, idx, tag) # TODO: Is truncating this fine? req_data = data[:x_len] dut.cpu.req.data.data @= req_data dut.cpu.req.data.mask @= mask dut.cpu.req.valid @= state.O == TestState.WAIT dut.cpu.abort @= 0 gold_req.data @= dut.cpu.req.data.value() gold_req.valid @= state.O == TestState.START gold_resp.ready @= state.O == TestState.DONE mem_waddr1 @= m.mux(init_addr, init_counter.O)[:20] mem_wdata1 @= m.mux(init_data, init_counter.O) check_resp_data = m.Bit() if TRACE: m.display("[%0t]: [init] mem[%x] <= %x", m.time(), mem_waddr1, mem_wdata1)\ .when(m.posedge(io.CLK))\ .if_(state.O == TestState.INIT) @m.inline_combinational() def state_fsm(): timeout.I @= timeout.O mem_wen1 @= m.bit(False) check_resp_data @= m.bit(False) state.I @= state.O if state.O == TestState.INIT: mem_wen1 @= m.bit(True) if init_counter.COUT: state.I @= TestState.START elif state.O == TestState.START: if gold_req.ready: timeout.I @= m.bits(0, 32) state.I @= TestState.WAIT elif state.O == TestState.WAIT: timeout.I @= timeout.O + 1 if dut.cpu.resp.valid & gold_resp.valid: if ~mask.reduce_or(): check_resp_data @= m.bit(True) state.I @= TestState.DONE elif state.O == TestState.DONE: state.I @= TestState.START f.assert_immediate((state.O != TestState.WAIT) | (timeout.O < 100)) f.assert_immediate( ~check_resp_data | (dut.cpu.resp.data.data == gold_resp.data.data), failure_msg=("dut.cpu.resp.data.data => %x != %x", dut.cpu.resp.data.data, gold_resp.data.data)) # m.display("mem_state=%x", mem_state.O).when(m.posedge(io.CLK)) # m.display("test_state=%x", state.O).when(m.posedge(io.CLK)) # m.display("dut req valid = %x", # dut.cpu.req.valid).when(m.posedge(io.CLK)) # m.display("gold req valid = %x, ready = %x", gold_req.valid, # gold_req.ready).when(m.posedge(io.CLK)) # m.display("[%0t]: dut resp data = %x, gold resp data = %x", m.time(), # dut.cpu.resp.data.data, gold_resp.data.data)\ # .when(m.posedge(io.CLK)) io.done @= test_counter.COUT
def test(b_bits, tag, idx, off, mask=BitVector[x_len // 8](0)): test_data = rand_data(nasti_params) for i in range((b_bits // nasti_params.x_data_bits) - 1): test_data = test_data.concat(rand_data(nasti_params)) return m.uint(m.concat(off, idx, tag, test_data, mask))
reg = Register(n) m.wire(reg(m.uint(reg.O) + io.I), io.O) return _DDS def DDS(n): return DefineDDS(n)() icestick = IceStick() icestick.Clock.on() for i in range(8): icestick.J1[i].input().on() icestick.J3[i].output().on() main = icestick.main() dds = DDS(16) wavetable = wavetable.astype(int) rom = Memory(height=256, width=16, rom=list(wavetable), readonly=True) phase = m.concat(main.J1, m.bits(0, 8)) addr = dds(phase) O = rom(addr[8:]) m.wire(1, rom.RE) m.wire(O[8:16], main.J3) m.EndCircuit()
def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int): nasti_params = NastiParameters(data_bits=64, addr_bits=x_len, id_bits=5) self.io = m.IO(req=m.Consumer(m.Decoupled[make_CacheReq(x_len)]), resp=m.Producer(m.Decoupled[make_CacheResp(x_len)]), nasti=make_NastiIO(nasti_params)) + m.ClockIO() size = m.bitutils.clog2(nasti_params.x_data_bits) b_bits = b_bytes << 3 b_len = m.bitutils.clog2(b_bytes) s_len = m.bitutils.clog2(n_sets) t_len = x_len - (s_len + b_len) nasti_params = NastiParameters(data_bits=64, addr_bits=x_len, id_bits=5) data_beats = b_bits // nasti_params.x_data_bits length = data_beats - 1 data = m.Memory(n_sets, m.UInt[b_bits])() tags = m.Memory(n_sets, m.UInt[t_len])() v = m.Memory(n_sets, m.Bit)() d = m.Memory(n_sets, m.Bit)() req = self.io.req.data tag = (req.addr >> (b_len + s_len))[:t_len] idx = req.addr[b_len:b_len + s_len] off = req.addr[:b_len] read = data.read(idx) write = m.bits(0, b_bits) for i in range(b_bytes): write |= m.mux([(read & (0xff << (8 * i))), ((m.zext_to(req.data, b_bits) >> ((8 * (i & 0x3)))) & 0xff) << (8 * i)], ((off // 4) == (i // 4)) & (req.mask >> (i & 0x3))[0])[:b_bits] class State(m.Enum): IDLE = 0 WRITE = 1 WRITE_ACK = 2 READ = 3 state = m.Register(init=State.IDLE)() write_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) write_counter.CE @= m.enable(state.O == State.WRITE) w_cnt, w_done = write_counter.O, write_counter.COUT read_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) read_counter.CE @= m.enable((state.O == State.READ) & self.io.nasti.r.valid) r_cnt, r_done = read_counter.O, read_counter.COUT self.io.resp.data.data @= (read >> (m.zext_to( (off // 4), b_bits) * x_len))[:x_len] self.io.nasti.ar.data @= NastiReadAddressChannel( nasti_params, 0, (req.addr >> b_len) << b_len, size, length) tags_rdata = tags.read(idx) self.io.nasti.aw.data @= NastiWriteAddressChannel( nasti_params, 0, m.bits(m.concat(idx, tags_rdata), nasti_params.x_addr_bits) << b_len, size, length) self.io.nasti.w.data @= NastiWriteDataChannel( nasti_params, (read >> (m.zext_to(w_cnt, b_bits) * nasti_params.x_data_bits))[:nasti_params.x_data_bits], None, w_done) self.io.nasti.w.valid @= state.O == State.WRITE self.io.nasti.b.ready @= state.O == State.WRITE_ACK self.io.nasti.r.ready @= state.O == State.READ d_wen = m.Bit(name="d_wen") d.write(True, idx, m.enable(d_wen)) data_wen = m.Bit(name="data_wen") data_wdata = m.UInt[b_bits](name="data_wdata") data.write(data_wdata, idx, m.enable(data_wen)) # m.display("data_wdata=%x", data_wdata).when(m.posedge(self.io.CLK)) v_wen = m.Bit(name="v_wen") v.write(True, idx, m.enable(v_wen)) v_rdata = v.read(idx) tags_wen = m.Bit(name="tags_wen") tags.write(tag, idx, m.enable(tags_wen)) d_rdata = d.read(idx) # m.display("gold_state=%x", state.O).when(m.posedge(self.io.CLK)) # m.display("gold_w_done=%x", w_done).when(m.posedge(self.io.CLK)) # m.display("gold_b_valid=%x", # self.io.nasti.b.valid).when(m.posedge(self.io.CLK)) if TRACE: m.display( "[%0t] [cache] data[%x] <= %x, off: %x, req: %x, mask: %b", m.time(), idx, write, off, self.io.req.data.data, self.io.req.data.mask)\ .when(m.posedge(self.io.CLK))\ .if_((state.O == State.IDLE) & (self.io.req.valid & self.io.resp.ready) & (v_rdata & (tags_rdata == tag)) & req.mask.reduce_or()) m.display( "[%0t] [cache] data[%x] => %x, off: %x, resp: %x", m.time(), idx, write, off, self.io.resp.data.data.value())\ .when(m.posedge(self.io.CLK))\ .if_((state.O == State.IDLE) & (self.io.req.valid & self.io.resp.ready) & (v_rdata & (tags_rdata == tag)) & ~req.mask.reduce_or()) @m.inline_combinational() def logic(): self.io.resp.valid @= False self.io.req.ready @= False self.io.nasti.ar.valid @= False self.io.nasti.aw.valid @= False d_wen @= False data_wen @= False data_wdata @= m.UInt[b_bits](0) state.I @= state.O tags_wen @= False v_wen @= False if state.O == State.IDLE: if self.io.req.valid & self.io.resp.ready: if v_rdata & (tags_rdata == tag): if req.mask.reduce_or(): d_wen @= True data_wdata @= write data_wen @= True self.io.req.ready @= True self.io.resp.valid @= True else: if d_rdata: self.io.nasti.aw.valid @= True state.I @= State.WRITE else: data_wdata @= 0 data_wen @= True self.io.nasti.ar.valid @= True state.I @= State.READ elif state.O == State.WRITE: if w_done: state.I @= State.WRITE_ACK elif state.O == State.WRITE_ACK: if self.io.nasti.b.valid: data_wdata @= 0 data_wen @= True self.io.nasti.ar.valid @= True state.I @= State.READ elif state.O == State.READ: if self.io.nasti.r.valid: data_wdata @= read | ( m.zext_to(self.io.nasti.r.data.data, b_bits) << (m.zext_to(r_cnt, b_bits) * nasti_params.x_data_bits)) data_wen @= True if r_done: tags_wen @= True v_wen @= True state.I @= State.IDLE
def __init__(self, x_len): Cause = make_Cause(x_len) self.io = io = m.IO( stall=m.In(m.Bit), cmd=m.In(m.UInt[3]), I=m.In(m.UInt[x_len]), O=m.Out(m.UInt[x_len]), # Excpetion pc=m.In(m.UInt[x_len]), addr=m.In(m.UInt[x_len]), inst=m.In(m.UInt[x_len]), illegal=m.In(m.Bit), st_type=m.In(m.UInt[2]), ld_type=m.In(m.UInt[3]), pc_check=m.In(m.Bit), expt=m.Out(m.Bit), evec=m.Out(m.UInt[x_len]), epc=m.Out( m.UInt[x_len])) + HostIO(x_len) + m.ClockIO(has_reset=True) csr_addr = io.inst[20:32] rs1_addr = io.inst[15:20] # user counters time = m.Register(m.UInt[x_len], reset_type=m.Reset)() timeh = m.Register(m.UInt[x_len], reset_type=m.Reset)() cycle = m.Register(m.UInt[x_len], reset_type=m.Reset)() cycleh = m.Register(m.UInt[x_len], reset_type=m.Reset)() instret = m.Register(m.UInt[x_len], reset_type=m.Reset)() instreth = m.Register(m.UInt[x_len], reset_type=m.Reset)() mcpuid = m.concat( BV[26]( 1 << (ord('I') - ord('A')) | # Base ISA 1 << (ord('U') - ord('A'))), # User Mode BV[x_len - 28](0), BV[2](0), # RV32I ) mimpid = BV[x_len](0) mhartid = BV[x_len](0) # interrupt enable stack PRV = m.Register(m.UInt[len(CSR.PRV_M)], init=CSR.PRV_M, reset_type=m.Reset)() PRV1 = m.Register(m.UInt[len(CSR.PRV_M)], init=CSR.PRV_M, reset_type=m.Reset)() PRV2 = BV[2](0) PRV3 = BV[2](0) IE = m.Register(m.Bit, init=False, reset_type=m.Reset)() IE1 = m.Register(m.Bit, init=False, reset_type=m.Reset)() IE2 = False IE3 = False # virtualization management field VM = BV[5](0) # memory privilege MPRV = False # Extension context status XS = BV[2](0) FS = BV[2](0) SD = BV[1](0) mstatus = m.concat(IE.O, PRV.O, IE1.O, PRV1.O, IE2, PRV2, IE3, PRV3, FS, XS, MPRV, VM, BV[x_len - 23](0), SD) mtvec = BV[x_len](Const.PC_EVEC) mtdeleg = BV[x_len](0) # interrupt registers MTIP = m.Register(m.Bit, init=False, reset_type=m.Reset)() HTIP = False STIP = False MTIE = m.Register(m.Bit, init=False, reset_type=m.Reset)() HTIE = False STIE = False MSIP = m.Register(m.Bit, init=False, reset_type=m.Reset)() HSIP = False SSIP = False MSIE = m.Register(m.Bit, init=False, reset_type=m.Reset)() HSIE = False SSIE = False mip = m.concat(Bit(False), SSIP, HSIP, MSIP.O, Bit(False), STIP, HTIP, MTIP.O, BV[x_len - 8](0)) mie = m.concat(Bit(False), SSIE, HSIE, MSIE.O, Bit(False), STIE, HTIE, MTIE.O, BV[x_len - 8](0)) mtimecmp = m.Register(m.UInt[x_len], reset_type=m.Reset)() mscratch = m.Register(m.UInt[x_len], reset_type=m.Reset)() mepc = m.Register(m.UInt[x_len], reset_type=m.Reset)() mcause = m.Register(m.UInt[x_len], reset_type=m.Reset)() mbadaddr = m.Register(m.UInt[x_len], reset_type=m.Reset)() mtohost = m.Register(m.UInt[x_len], reset_type=m.Reset)() mfromhost = m.Register(m.UInt[x_len], reset_type=m.Reset)() io.host.tohost @= mtohost.O csr_file = { CSR.cycle: cycle.O, CSR.time: time.O, CSR.instret: instret.O, CSR.cycleh: cycleh.O, CSR.timeh: timeh.O, CSR.instreth: instreth.O, CSR.cyclew: cycle.O, CSR.timew: time.O, CSR.instretw: instret.O, CSR.cyclehw: cycleh.O, CSR.timehw: timeh.O, CSR.instrethw: instreth.O, CSR.mcpuid: mcpuid, CSR.mimpid: mimpid, CSR.mhartid: mhartid, CSR.mtvec: mtvec, CSR.mtdeleg: mtdeleg, CSR.mie: mie, CSR.mtimecmp: mtimecmp.O, CSR.mtime: time.O, CSR.mtimeh: timeh.O, CSR.mscratch: mscratch.O, CSR.mepc: mepc.O, CSR.mcause: mcause.O, CSR.mbadaddr: mbadaddr.O, CSR.mip: mip, CSR.mtohost: mtohost.O, CSR.mfromhost: mfromhost.O, CSR.mstatus: mstatus, } out = m.dict_lookup(csr_file, csr_addr) io.O @= out priv_valid = csr_addr[8:10] <= PRV.O priv_inst = io.cmd == CSR.P is_E_call = priv_inst & ~csr_addr[0] & ~csr_addr[8] is_E_break = priv_inst & csr_addr[0] & ~csr_addr[8] is_E_ret = priv_inst & ~csr_addr[0] & csr_addr[8] csr_valid = m.reduce(operator.or_, m.bits([csr_addr == key for key in csr_file])) csr_RO = (csr_addr[10:12].reduce_and() | (csr_addr == CSR.mtvec) | (csr_addr == CSR.mtdeleg)) wen = (io.cmd == CSR.W) | io.cmd[1] & rs1_addr.reduce_or() wdata = m.dict_lookup( { CSR.W: io.I, CSR.S: out | io.I, CSR.C: out & ~io.I }, io.cmd) iaddr_invalid = io.pc_check & io.addr[1] laddr_invalid = m.dict_lookup( { Control.LD_LW: io.addr[0:2].reduce_or(), Control.LD_LH: io.addr[0], Control.LD_LHU: io.addr[0] }, io.ld_type) saddr_invalid = m.dict_lookup( { Control.ST_SW: io.addr[0:2].reduce_or(), Control.ST_SH: io.addr[0] }, io.st_type) expt = (io.illegal | iaddr_invalid | laddr_invalid | saddr_invalid | io.cmd[0:2].reduce_or() & (~csr_valid | ~priv_valid) | wen & csr_RO | (priv_inst & ~priv_valid) | is_E_call | is_E_break) io.expt @= expt io.evec @= mtvec + (m.zext_to(PRV.O, x_len) << 6) io.epc @= mepc.O @m.inline_combinational() def logic(): # Counters time.I @= time.O + 1 timeh.I @= timeh.O if time.O.reduce_and(): timeh.I @= timeh.O + 1 cycle.I @= cycle.O + 1 cycleh.I @= cycleh.O if cycle.O.reduce_and(): cycleh.I @= cycleh.O + 1 instret.I @= instret.O is_inst_ret = ((io.inst != Instructions.NOP) & (~expt | is_E_call | is_E_break) & ~io.stall) if is_inst_ret: instret.I @= instret.O + 1 instreth.I @= instreth.O if is_inst_ret & instret.O.reduce_and(): instreth.I @= instreth.O + 1 mbadaddr.I @= mbadaddr.O mepc.I @= mepc.O mcause.I @= mcause.O PRV.I @= PRV.O IE.I @= IE.O IE1.I @= IE1.O PRV1.I @= PRV1.O MTIP.I @= MTIP.O MSIP.I @= MSIP.O MTIE.I @= MTIE.O MSIE.I @= MSIE.O mtimecmp.I @= mtimecmp.O mscratch.I @= mscratch.O mtohost.I @= mtohost.O mfromhost.I @= mfromhost.O if io.host.fromhost.valid: mfromhost.I @= io.host.fromhost.data if ~io.stall: if expt: mepc.I @= io.pc >> 2 << 2 if iaddr_invalid: mcause.I @= Cause.InstAddrMisaligned elif laddr_invalid: mcause.I @= Cause.LoadAddrMisaligned elif saddr_invalid: mcause.I @= Cause.StoreAddrMisaligned elif is_E_call: mcause.I @= Cause.Ecall + m.zext_to(PRV.O, x_len) elif is_E_break: mcause.I @= Cause.Breakpoint else: mcause.I @= Cause.IllegalInst PRV.I @= CSR.PRV_M IE.I @= False PRV1.I @= PRV.O IE1.I @= IE.O if iaddr_invalid | laddr_invalid | saddr_invalid: mbadaddr.I @= io.addr elif is_E_ret: PRV.I @= PRV1.O IE.I @= IE1.O PRV1.I @= CSR.PRV_U IE1.I @= True elif wen: if csr_addr == CSR.mstatus: PRV1.I @= wdata[4:6] IE1.I @= wdata[3] PRV.I @= wdata[1:3] IE.I @= wdata[0] elif csr_addr == CSR.mip: MTIP.I @= wdata[7] MSIP.I @= wdata[3] elif csr_addr == CSR.mie: MTIE.I @= wdata[7] MSIE.I @= wdata[3] elif csr_addr == CSR.mtime: time.I @= wdata elif csr_addr == CSR.mtimeh: timeh.I @= wdata elif csr_addr == CSR.mtimecmp: mtimecmp.I @= wdata elif csr_addr == CSR.mscratch: mscratch.I @= wdata elif csr_addr == CSR.mepc: mepc.I @= wdata >> 2 << 2 elif csr_addr == CSR.mcause: mcause.I @= wdata & (1 << (x_len - 1) | 0xf) elif csr_addr == CSR.mbadaddr: mbadaddr.I @= wdata elif csr_addr == CSR.mtohost: mtohost.I @= wdata elif csr_addr == CSR.mfromhost: mfromhost.I @= wdata elif csr_addr == CSR.cyclew: cycle.I @= wdata elif csr_addr == CSR.timew: time.I @= wdata elif csr_addr == CSR.instretw: instret.I @= wdata elif csr_addr == CSR.cyclehw: cycleh.I @= wdata elif csr_addr == CSR.timehw: timeh.I @= wdata elif csr_addr == CSR.instrethw: instreth.I @= wdata