def __init__( self, addr: int, # CSR's address name: str, # CSR's name layout: Layout # CSR's layout ) -> None: mask = 0 offset = 0 fields = list() for _name, _shape, _access in layout: if not isinstance(_shape, int): raise TypeError('Shape must be a flat int: {}'.format(_shape)) fields.append((_name, _shape)) if _access in [CSRAccess.WLRL, CSRAccess.WARL]: _mask = (1 << _shape) - 1 mask = mask | (_mask << offset) offset = offset + _shape self.addr = addr self.name = name self.mask = Const( mask) # using the same mask for read and write operations # IO self.read = Record(fields, name=self.name + '_r') self.write = Record(fields, name=self.name + '_w') self.we = Signal()
def __init__(self): self.ulpi = Record([('data', [('i', 8, DIR_FANIN), ('o', 8, DIR_FANOUT), ('oe', 1, DIR_FANOUT)]), ('clk', [('o', 1, DIR_FANOUT)]), ('stp', 1, DIR_FANOUT), ('nxt', [('i', 1, DIR_FANIN)]), ('dir', [('i', 1, DIR_FANIN)]), ('rst', 1, DIR_FANOUT)]) self.usb0 = usb = LunaDeviceACM(bus=self.ulpi, idVendor=0x16d0, idProduct=0x0f3b, manufacturer_string="GsD", product_string="ButterStick r1.0") self.rx = Record(usb.rx.layout) self.tx = Record(usb.tx.layout) self.clk_sync = Signal() self.clk_usb = Signal() self.rst_sync = Signal() self.usb_holdoff = Signal() ...
def __init__(self, configuration): if not isinstance(configuration, cfg.Configuration): raise TypeError( 'Invalid data type for configuration. Must be a "Configuration" type' ) self.configuration = configuration self.iport = Record(wishbone_layout) self.dport = Record(wishbone_layout) self.external_interrupt = Signal() self.timer_interrupt = Signal() self.software_interrupt = Signal()
def __init__(self, nlines, nwords, nways, start_addr=0, end_addr=2**32, enable_write=True): if nlines == 0 or (nlines & (nlines - 1)): raise ValueError(f'nlines must be a power of 2: {nlines}') if nwords not in (4, 8, 16): raise ValueError(f'nwords must be 4, 8 or 16: {nwords}') if nways not in (1, 2): raise ValueError(f'nways must be 1 or 2: {nways}') self.enable_write = enable_write self.nlines = nlines self.nwords = nwords self.nways = nways offset_bits = log2_int(nwords) line_bits = log2_int(nlines) addr_bits = log2_int(end_addr - start_addr, need_pow2=False) tag_bits = addr_bits - line_bits - offset_bits - 2 # -2 because word line. extra_bits = 32 - tag_bits - line_bits - offset_bits - 2 pc_layout = [ ('byte', 2), ('offset', offset_bits), ('line', line_bits), ('tag', tag_bits) ] if (extra_bits != 0): pc_layout.append(('unused', extra_bits)) self.s1_address = Record(pc_layout) self.s1_flush = Signal() self.s1_valid = Signal() self.s1_stall = Signal() self.s2_address = Record(pc_layout) self.s2_evict = Signal() self.s2_valid = Signal() self.s2_miss = Signal() self.s2_rdata = Signal(32) self.s2_re = Signal() if enable_write: self.s2_wdata = Signal(32) self.s2_sel = Signal(4) self.s2_we = Signal() self.bus_addr = Record(pc_layout) self.bus_valid = Signal() self.bus_last = Signal() self.bus_data = Signal(32) self.bus_ack = Signal() self.bus_err = Signal()
def __init__(self, width, name, last=False): self._last = last self.layout = [ ('data', width), ('valid', 1), ('ready', 1), ] if self._last: self.layout += [('last', 1)] Record.__init__(self, self.layout, name=name) # For simulation self._sent = [] self._received = []
def instantiate_dut(self): self.ulpi = Record([("dir", 1), ("nxt", 1), ("data", [ ("i", 8), ])]) return ULPIRxEventDecoder(ulpi_bus=self.ulpi)
def test_directional_record(self): m = Module() record = Record([ ('sig_in', 1, DIR_FANIN), ('sig_out', 1, DIR_FANOUT) ]) synchronize(m, record)
def create_read_port(self): layout = [ ('addr', self.addr_w), ('data', self.width) ] port = Record(layout) self._read_ports.append(port) return port
def instantiate_dut(self): # Create a record that recreates the layout of our RAM signals. self.ram_signals = Record([("clk", 1), ("clkN", 1), ("dq", [("i", 8), ("o", 8), ("oe", 1)]), ("rwds", [("i", 1), ("o", 1), ("oe", 1)]), ("cs", 1), ("reset", 1)]) # Create our HyperRAM interface... return HyperRAMInterface(bus=self.ram_signals)
def __init__(self): # self.pll = ICE40_PLL( # 50, # Mhz # "pll", # ) # self.cordic = DomainRenamer("pll")(CORDIC(width=12)) # self.nco = DomainRenamer("pll")(NCO(width=12, samples=1024)) # self.dac = DomainRenamer("pll")(SigmaDeltaDAC(width=12)) self.nco = NCO(width=12, samples=1024) self.sine_dac = SigmaDeltaDAC(width=12) self.cosine_dac = SigmaDeltaDAC(width=12) # self.uart = UART(clk_freq=16e6, baud_rate=9600) # self.blinky = Blinky() self.nco_ctrl = Record([ ("enable", 1), ]) # self.uart_data = Signal(8) self.led = Signal() # def uart_write(m: Module, mem_wdata: Signal): # m.d.comb += self.uart_data.eq(mem_wdata) # m.d.sync += [ # self.uart.tx_rdy.eq(1), # self.uart # ] self.picorv32 = PicoRV32([ Mapping( addr=0xcafebab0, signal=self.led, write=True, read=False, ), # NCO Mapping( addr=0xf000_0000, signal=self.nco_ctrl, read=True, write=True, ), Mapping( addr=0xf000_0004, # offset by 1 word signal=self.nco.phase_step, read=True, write=True, ), # UART # Mapping( # addr=0xf000_0008, # offset by 1 word, # # signal=self.uart_data, # read=False, # write=uart_write, # ) ])
def test_nested_record(self): m = Module() record = Record([('sig_in', 1, DIR_FANIN), ('sig_out', 1, DIR_FANOUT), ('nested', [ ('subsig_in', 1, DIR_FANIN), ('subsig_out', 1, DIR_FANOUT), ])]) synchronize(m, record)
def add_port(self, priority): # check if the priority is a number if not isinstance(priority, int) or priority < 0: raise TypeError('Priority must be a positive integer: {}'.format(priority)) # check for duplicates if priority in self._ports: raise ValueError('Duplicated priority: {}'.format(priority)) port = self._ports[priority] = Record(wishbone_layout) return port
def get_axi(self, axi): assert axi in self.MAXI + self.SAXI if axi in self.MAXI: layout = get_axi_layout('master') elif axi in self.SAXI: layout = get_axi_layout('slave') fields = {f: self._ports[axi.upper() + f] for f, w, d in layout} layout = [(f, w) for f, w, _ in layout] rec = Record(layout, fields=fields, name=axi) return rec
def instantiate_dut(self): self.umti = Record([ ('data_in', 8), ('data_out', 8), ('rx_valid', 1), ('rx_active', 1), ('rx_error', 1), ('rx_complete', 1), ]) return USBAnalyzer(umti_interface=self.umti, mem_depth=128)
def __init__(self, clk_freq): self.clk_freq = clk_freq self.i2s = Record([ ('mclk', 1), ('lrck', 1), ('sck', 1), ('sd', 1), ]) self.samples = Array((Signal(signed(16)), Signal(signed(16)))) self.stb = Signal() self.ack = Signal() self.ports = [self.i2s.mclk, self.i2s.lrck, self.i2s.sck, self.i2s.sd] self.ports += [self.samples[0], self.samples[1], self.stb, self.ack]
def __init__(self, addr, name, layout): self.addr = addr self.name = name mask = 0 offset = 0 fields = list() # layout for CSRs: # (name, shape/size, access type) for _name, _shape, _access in layout: if not isinstance(_shape, int): raise TypeError('Shape must be a flat int: {}'.format(_shape)) fields.append((_name, _shape)) if _access in [CSRAccess.WLRL, CSRAccess.WARL]: _mask = (1 << _shape) - 1 mask = mask | (_mask << offset) offset = offset + _shape self.mask = Const(mask) # using the same mask for read and write operations # IO self.read = Record(fields, name=self.name) self.write = Record(fields, name=self.name) self.we = Signal()
def __init__(self, width): self.width = width # ALPHA - Alpha Blending self.i_blend_a = Signal(2) # Blending Parameter A; see BlendRGB self.i_blend_b = Signal(2) # Blending Parameter B; see BlendRGB self.i_blend_c = Signal(2) # Blending Parameter C; see BlendAlpha self.i_blend_d = Signal(2) # Blending Parameter D; see BlendRGB self.i_blend_fix = Signal(8) # Q8.0; Fixed alpha value # PABE - Per-Pixel Alpha Blending Enable self.i_pabe_pabe = Signal() # Whether to perform per-pixel alpha blending # PRIM/PRMODE - Primitive Settings self.i_prim_abe = Signal() # Whether to perform alpha blending # TEST - Pixel Test Settings self.i_test_ate = Signal() # Whether to perform alpha testing self.i_test_atst = Signal(3) # Alpha test to perform self.i_test_aref = Signal(8) # Reference alpha value self.i_test_afail = Signal(2) # Action to perform on test failure self.i_test_date = Signal() # Whether to perform destination alpha testing self.i_test_datm = Signal() # Destination alpha test comparison value self.i_test_zte = Signal() # Z test enable (buggy) self.i_test_ztst = Signal(2) # Z test type # DIMX - Dither Matrix self.i_dimx_dm = [[Signal((3, True)) for i in range(4)] for i in range(4)] # DTHE - Dither Enable self.i_dthe_dthe = Signal() # Whether to perform dithering # COLCLAMP - Colour Clamping Enable self.i_colclamp = Signal() # Whether to saturate or overflow colour channels # FBA - Framebuffer Alpha Correction value self.i_fba_fba = Signal() # Value ORed with most significant bit of alpha channel. # FRAME - Framebuffer Settings self.i_frame_psm = Signal(6) # Framebuffer pixel storage format # ZBUF - Z Buffer Settings self.i_zbuf_psm = Signal(4) # Z buffer pixel storage format self.pipes = [Record(PIPE) for i in range(width)] self.i_address = Signal(9) # 8-bit address, plus "privilege" bit self.i_data = Signal(64)
def __init__(self): self.iport = Record(wishbone_layout) # from address stage self.a_pc = Signal(32) # control signals self.a_stall = Signal( ) # needed because the unit uses the pc@address stage self.a_valid = Signal( ) # needed because the unit uses the pc@address stage self.f_stall = Signal() self.f_valid = Signal() self.f_busy = Signal() # to decode stage self.f_instruction = Signal(32) self.f_bus_error = Signal() self.f_badaddr = Signal(32)
def __init__(self): # exceptions. Misaligned exception detected in X stage self.dport = Record(wishbone_layout) self.x_addr = Signal(32) self.x_data_w = Signal(32) self.x_store = Signal() self.x_load = Signal() self.x_byte_sel = Signal(4) self.x_valid = Signal() self.x_stall = Signal() self.m_valid = Signal() self.m_stall = Signal() self.m_load_data = Signal(32) self.m_busy = Signal() self.m_load_error = Signal() self.m_store_error = Signal() self.m_badaddr = Signal(32)
def instantiate_dut(self): from ..interface.ulpi import UMTITranslator self.ulpi = Record([('data', [ ('i', 8), ('o', 8), ('oe', 8), ]), ('nxt', 1), ('stp', 1), ('dir', 1), ('clk', 1), ('rst', 1)]) # Create a stack of our UMTITranslator and our USBAnalyzer. # We'll wrap the both in a module to establish a synthetic hierarchy. m = Module() m.submodules.translator = self.translator = UMTITranslator( ulpi=self.ulpi) m.submodules.analyzer = self.analyzer = USBAnalyzer( umti_interface=self.translator, mem_depth=128) return m
def request_optional(self, name, number=0, *, dir=None, xdr=None): """ Specialized version of .request() for "optional" I/O. If the platform has the a resource with the given name, it is requested and returned. Otherwise, this method returns a Signal() or Record() that will cause the relevant logic to be optimized out. This is useful for designs that support multiple platforms; and allows for resources such as e.g. LEDs to be omitted on platforms that lack them. """ # Attempt to request the relevant I/O... try: return self.request(name, number, dir=dir, xdr=xdr) # ... and if it does not exist, create an empty stand-in signal. This signal isn't used # anywhere else; and thus should typically be optimized away. except ResourceError: if dir in ("i", "o"): return Signal() else: return Record(["i", "o", "oe"])
def __init__(self): self.bus = Record(wishbone_layout) self._ports = dict()
def elaborate(self, platform): m = Module() wbuffer_layout = [("addr", 32), ("data", 32), ("sel", 4)] wbuffer_din = Record(wbuffer_layout) wbuffer_dout = Record(wbuffer_layout) dcache = m.submodules.dcache = Cache(nlines=self.nlines, nwords=self.nwords, nways=self.nways, start_addr=self.start_addr, end_addr=self.end_addr, enable_write=True) arbiter = m.submodules.arbiter = Arbiter() wbuffer = m.submodules.wbuffer = SyncFIFOBuffered( width=len(wbuffer_din), depth=self.nwords) wbuffer_port = arbiter.add_port(priority=0) cache_port = arbiter.add_port(priority=1) bare_port = arbiter.add_port(priority=2) x_use_cache = Signal() m_use_cache = Signal() m_data_w = Signal(32) m_byte_sel = Signal(4) bits_range = log2_int(self.end_addr - self.start_addr, need_pow2=False) m.d.comb += x_use_cache.eq( (self.x_addr[bits_range:] == (self.start_addr >> bits_range))) with m.If(~self.x_stall): m.d.sync += [ m_use_cache.eq(x_use_cache), m_data_w.eq(self.x_data_w), m_byte_sel.eq(self.x_byte_sel) ] m.d.comb += arbiter.bus.connect(self.dport) # -------------------------------------------------- # write buffer IO m.d.comb += [ # input wbuffer.w_data.eq(wbuffer_din), wbuffer.w_en.eq(x_use_cache & self.x_store & self.x_valid & ~self.x_stall), wbuffer_din.addr.eq(self.x_addr), wbuffer_din.data.eq(self.x_data_w), wbuffer_din.sel.eq(self.x_byte_sel), # output wbuffer_dout.eq(wbuffer.r_data), ] # drive the arbiter port with m.If(wbuffer_port.cyc): with m.If(wbuffer_port.ack | wbuffer_port.err): m.d.comb += wbuffer.r_en.eq(1) m.d.sync += wbuffer_port.stb.eq(0) with m.If(wbuffer.level == 1): # Buffer is empty m.d.sync += [wbuffer_port.cyc.eq(0), wbuffer_port.we.eq(0)] with m.Elif(~wbuffer_port.stb): m.d.sync += [ wbuffer_port.stb.eq(1), wbuffer_port.addr.eq(wbuffer_dout.addr), wbuffer_port.dat_w.eq(wbuffer_dout.data), wbuffer_port.sel.eq(wbuffer_dout.sel) ] with m.Elif(wbuffer.r_rdy): m.d.sync += [ wbuffer_port.cyc.eq(1), wbuffer_port.stb.eq(1), wbuffer_port.we.eq(1), wbuffer_port.addr.eq(wbuffer_dout.addr), wbuffer_port.dat_w.eq(wbuffer_dout.data), wbuffer_port.sel.eq(wbuffer_dout.sel) ] m.d.comb += wbuffer.r_en.eq(0) m.d.comb += [ wbuffer_port.cti.eq(CycleType.CLASSIC), wbuffer_port.bte.eq(0) ] # -------------------------------------------------- # connect IO: cache m.d.comb += [ dcache.s1_address.eq(self.x_addr), dcache.s1_flush.eq(0), dcache.s1_valid.eq(self.x_valid), dcache.s1_stall.eq(self.x_stall), dcache.s2_address.eq(self.m_addr), dcache.s2_evict.eq(0), # Evict is not used. Remove maybe? dcache.s2_valid.eq(self.m_valid), dcache.s2_re.eq(self.m_load), dcache.s2_wdata.eq(m_data_w), dcache.s2_sel.eq(m_byte_sel), dcache.s2_we.eq(self.m_store) ] # connect cache to arbiter m.d.comb += [ cache_port.addr.eq(dcache.bus_addr), cache_port.dat_w.eq(0), cache_port.sel.eq(0), cache_port.we.eq(0), cache_port.cyc.eq(dcache.bus_valid), cache_port.stb.eq(dcache.bus_valid), cache_port.cti.eq( Mux(dcache.bus_last, CycleType.END, CycleType.INCREMENT)), cache_port.bte.eq(log2_int(self.nwords) - 1), dcache.bus_data.eq(cache_port.dat_r), dcache.bus_ack.eq(cache_port.ack), dcache.bus_err.eq(cache_port.err) ] # -------------------------------------------------- # bare port rdata = Signal.like(bare_port.dat_r) op = Signal() m.d.comb += op.eq(self.x_load | self.x_store) # transaction logic with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.m_valid): m.d.sync += [ rdata.eq(bare_port.dat_r), bare_port.we.eq(0), bare_port.cyc.eq(0), bare_port.stb.eq(0) ] with m.Elif(op & self.x_valid & ~self.x_stall & ~x_use_cache): m.d.sync += [ bare_port.addr.eq(self.x_addr), bare_port.dat_w.eq(self.x_data_w), bare_port.sel.eq(self.x_byte_sel), bare_port.we.eq(self.x_store), bare_port.cyc.eq(1), bare_port.stb.eq(1) ] m.d.comb += [bare_port.cti.eq(CycleType.CLASSIC), bare_port.bte.eq(0)] # -------------------------------------------------- # extra logic with m.If(self.x_fence_i): m.d.comb += self.x_busy.eq(wbuffer.r_rdy) with m.Elif(x_use_cache): m.d.comb += self.x_busy.eq(self.x_store & ~wbuffer.w_rdy) with m.Else(): m.d.comb += self.x_busy.eq(bare_port.cyc) with m.If(m_use_cache): m.d.comb += [ self.m_busy.eq(dcache.s2_re & dcache.s2_miss), self.m_load_data.eq(dcache.s2_rdata) ] with m.Elif(self.m_load_error | self.m_store_error): m.d.comb += [self.m_busy.eq(0), self.m_load_data.eq(0)] with m.Else(): m.d.comb += [ self.m_busy.eq(bare_port.cyc), self.m_load_data.eq(rdata) ] # -------------------------------------------------- # exceptions with m.If(self.dport.cyc & self.dport.err): m.d.sync += [ self.m_load_error.eq(~self.dport.we), self.m_store_error.eq(self.dport.we), self.m_badaddr.eq(self.dport.addr) ] with m.Elif(~self.m_stall): m.d.sync += [self.m_load_error.eq(0), self.m_store_error.eq(0)] return m
def elaborate(self, platform: Platform) -> Module: m = Module() triggers = [Record.like(self.tdata1.read) for _ in range(self.ntriggers)] triggers_data = [Record.like(self.tdata2.read) for _ in range(self.ntriggers)] for t in triggers: m.d.comb += t.type.eq(TriggerType.MATCH) # support only address/data match # handle writes to tselect with m.If(self.tselect.we): with m.If(self.tselect.write < self.ntriggers): # no more than ntriggers m.d.sync += self.tselect.read.eq(self.tselect.write) # select the trigger with m.Switch(self.tselect.read): for idx, (trigger, trigger_data) in enumerate(zip(triggers, triggers_data)): with m.Case(idx): m.d.comb += [ self.tdata1.read.eq(trigger), # trigger visible @tdata1 self.tdata2.read.eq(trigger_data) # data visible @tdata2 ] # handle writes to tdata1 with m.If(self.tdata1.we): mcontrol = Record([('i', mcontrol_layout), ('o', mcontrol_layout)]) m.d.comb += [ mcontrol.i.eq(self.tdata1.write.data), # casting mcontrol.o.execute.eq(mcontrol.i.execute), mcontrol.o.store.eq(mcontrol.i.store), mcontrol.o.load.eq(mcontrol.i.load), mcontrol.o.m.eq(mcontrol.i.m), mcontrol.o.u.eq(mcontrol.i.u), mcontrol.o.action.eq(mcontrol.i.action) ] m.d.sync += [ trigger.dmode.eq(self.tdata1.write.dmode), trigger.data.eq(mcontrol.o) ] # handle writes to tdata2 with m.If(self.tdata2.we): m.d.sync += trigger_data.data.eq(self.tdata2.write) # trigger logic hit = Signal() halt = Signal() hit_v = Signal(self.ntriggers) halt_v = Signal(self.ntriggers) for idx, (trigger, trigger_data) in enumerate(zip(triggers, triggers_data)): with m.Switch(trigger.type): with m.Case(TriggerType.MATCH): match = Signal() mcontrol = Record(mcontrol_layout) m.d.comb += mcontrol.eq(trigger) # casting, lol with m.If(mcontrol.execute): m.d.comb += match.eq(self.x_valid & (trigger_data == self.x_pc)) with m.Elif(mcontrol.store): m.d.comb += match.eq(self.x_valid & self.x_store & (trigger_data == self.x_bus_addr)) with m.Elif(mcontrol.load): m.d.comb += match.eq(self.x_valid & self.x_load & (trigger_data == self.x_bus_addr)) if self.enable_user_mode: # check the current priv mode, and check the priv enable mode priv_m = self.privmode == PrivMode.Machine priv_u = self.privmode == PrivMode.User hit_tmp = match & ((mcontrol.m & priv_m) | (mcontrol.u & priv_u)) else: hit_tmp = match & mcontrol.m m.d.comb += [ hit_v[idx].eq(hit_tmp), halt_v[idx].eq(mcontrol.action) ] # request signals: halt/exception m.d.comb += [ hit.eq(reduce(or_, hit_v, 0)), halt.eq(reduce(or_, halt_v, 0)) ] with m.If(hit): with m.If(halt): # halt = mcontrol.action m.d.comb += self.haltreq.eq(self.tdata1.read.dmode) # enter debug mode only if dmode = 1 with m.Else(): m.d.comb += self.trap.eq(1) # generate exception return m
def create_port(self) -> Record: layout = [('addr', self.addr_w), ('data_r', self.width), ('data_w', self.width), ('en', 1)] port = Record(layout) self._ports.append(port) return port
def data_width(self): return Record((('d', self.dsol), )).shape()[0]
def elaborate(self, platform): m = Module() size = self.configuration.getOption('predictor', 'size') if size == 0 or (size & (size - 1)): raise ValueError(f'size must be a power of 2: {size}') _bits_index = log2_int(size) _bits_tag = 32 - _bits_index _btb_width = 1 + 32 + _bits_tag # valid + data + tag _btb_depth = 1 << _bits_index _btb_layout = [('target', 32), ('tag', _bits_tag), ('valid', 1)] _pc_layout = [('index', _bits_index), ('tag', _bits_tag)] btb = Memory(width=_btb_width, depth=_btb_depth) btb_rp = btb.read_port() btb_wp = btb.write_port() bht = Memory(width=2, depth=_btb_depth) bht_rp = bht.read_port() bht_wp = bht.write_port() m.submodules += btb_rp, btb_wp m.submodules += bht_rp, bht_wp btb_r = Record(_btb_layout) a_pc = Record(_pc_layout) f_pc = Record(_pc_layout) m_pc = Record(_pc_layout) hit = Signal() pstate_next = Signal(2) m.d.comb += [ btb_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)), bht_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)), btb_r.eq(btb_rp.data), # a_pc.eq(self.a_pc), f_pc.eq(self.f_pc), hit.eq(btb_r.valid & (btb_r.tag == f_pc.tag)), # self.f_prediction.eq(hit & bht_rp.data[1]), self.f_prediction_state.eq(bht_rp.data), self.f_prediction_pc.eq(btb_r.target) ] # update m.d.comb += [ btb_wp.addr.eq(m_pc.index), btb_wp.data.eq(Cat(self.m_target_pc, m_pc.tag, 1)), btb_wp.en.eq(self.m_update), bht_wp.addr.eq(m_pc.index), bht_wp.data.eq(pstate_next), bht_wp.en.eq(self.m_update), m_pc.eq(self.m_pc), pstate_next.eq(0) ] with m.Switch(Cat(self.m_prediction_state, self.m_take_jmp_branch)): with m.Case(0b000, 0b001): m.d.comb += pstate_next.eq(0b00) with m.Case(0b010, 0b100): m.d.comb += pstate_next.eq(0b01) with m.Case(0b011, 0b101): m.d.comb += pstate_next.eq(0b10) with m.Case(0b110, 0b111): m.d.comb += pstate_next.eq(0b11) return m
def elaborate(self, platform: Platform) -> Module: m = Module() snoop_addr = Record(self.pc_layout) snoop_valid = Signal() # ------------------------------------------------------------------------- # Performance counter # TODO: connect to CSR's performance counter with m.If(~self.s1_stall & self.s1_valid & self.s1_access): m.d.sync += self.access_cnt.eq(self.access_cnt + 1) with m.If(self.s2_valid & self.s2_miss & ~self.bus_valid & self.s2_access): m.d.sync += self.miss_cnt.eq(self.miss_cnt + 1) # ------------------------------------------------------------------------- way_layout = [('data', 32 * self.nwords), ('tag', self.s1_address.tag.shape()), ('valid', 1), ('sel_lru', 1), ('snoop_hit', 1)] if self.enable_write: way_layout.append(('sel_we', 1)) ways = Array( Record(way_layout, name='way_idx{}'.format(_way)) for _way in range(self.nways)) fill_cnt = Signal.like(self.s1_address.offset) # Check hit/miss way_hit = m.submodules.way_hit = Encoder(self.nways) for idx, way in enumerate(ways): m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag) & way.valid) m.d.comb += self.s2_miss.eq(way_hit.n) if self.enable_write: # Asumiendo que hay un HIT, indicar que la vía que dió hit es en la cual se va a escribir m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid) # set the LRU if self.nways == 1: # One way: LRU is useless lru = Const(0) # self.nlines else: # LRU es un vector de N bits, cada uno indicado el set a reemplazar # como NWAY es máximo 2, cada LRU es de un bit lru = Signal(self.nlines) _lru = lru.bit_select(self.s2_address.line, 1) write_ended = self.bus_valid & self.bus_ack & self.bus_last # err ^ ack = = 1 access_hit = ~self.s2_miss & self.s2_valid & (way_hit.o == _lru) with m.If(write_ended | access_hit): m.d.sync += _lru.eq(~_lru) # read data from the cache m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select( self.s2_address.offset, 32)) # Internal Snoop snoop_use_cache = Signal() snoop_tag_match = Signal() snoop_line_match = Signal() snoop_cancel_refill = Signal() if not self.enable_write: bits_range = log2_int(self.end_addr - self.start_addr, need_pow2=False) m.d.comb += [ snoop_addr.eq(self.dcache_snoop.addr), # aux snoop_valid.eq(self.dcache_snoop.we & self.dcache_snoop.valid & self.dcache_snoop.ack), snoop_use_cache.eq(snoop_addr[bits_range:] == ( self.start_addr >> bits_range)), snoop_tag_match.eq(snoop_addr.tag == self.s2_address.tag), snoop_line_match.eq(snoop_addr.line == self.s2_address.line), snoop_cancel_refill.eq(snoop_use_cache & snoop_valid & snoop_line_match & snoop_tag_match), ] else: m.d.comb += snoop_cancel_refill.eq(0) with m.FSM(): with m.State('READ'): with m.If(self.s2_re & self.s2_miss & self.s2_valid): m.d.sync += [ self.bus_addr.eq(self.s2_address), self.bus_valid.eq(1), fill_cnt.eq(self.s2_address.offset - 1) ] m.next = 'REFILL' with m.State('REFILL'): m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset) with m.If(self.bus_ack): m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset + 1) with m.If(self.bus_ack & self.bus_last | self.bus_err): m.d.sync += self.bus_valid.eq(0) with m.If(~self.bus_valid | self.s1_flush | snoop_cancel_refill): m.next = 'READ' m.d.sync += self.bus_valid.eq(0) # mark the way to use (replace) m.d.comb += ways[lru.bit_select(self.s2_address.line, 1)].sel_lru.eq(self.bus_valid) # generate for N ways for way in ways: # create the memory structures for valid, tag and data. valid = Signal(self.nlines) # Valid bits tag_m = Memory(width=len(way.tag), depth=self.nlines) # tag memory tag_rp = tag_m.read_port() snoop_rp = tag_m.read_port() tag_wp = tag_m.write_port() m.submodules += tag_rp, tag_wp, snoop_rp data_m = Memory(width=len(way.data), depth=self.nlines) # data memory data_rp = data_m.read_port() data_wp = data_m.write_port( granularity=32 ) # implica que solo puedo escribir palabras de 32 bits. m.submodules += data_rp, data_wp # handle valid with m.If(self.s1_flush & self.s1_valid): # flush m.d.sync += valid.eq(0) with m.Elif(way.sel_lru & self.bus_last & self.bus_ack): # refill ok m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1) with m.Elif(way.sel_lru & self.bus_err): # refill error m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0) with m.Elif(self.s2_evict & self.s2_valid & (way.tag == self.s2_address.tag)): # evict m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0) # assignments m.d.comb += [ tag_rp.addr.eq( Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)), tag_wp.addr.eq(self.bus_addr.line), tag_wp.data.eq(self.bus_addr.tag), tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last), data_rp.addr.eq( Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)), way.data.eq(data_rp.data), way.tag.eq(tag_rp.data), way.valid.eq(valid.bit_select(self.s2_address.line, 1)) ] # update cache: CPU or Refill # El puerto de escritura se multiplexa debido a que la memoria solo puede tener un # puerto de escritura. if self.enable_write: update_addr = Signal(len(data_wp.addr)) update_data = Signal(len(data_wp.data)) update_we = Signal(len(data_wp.en)) aux_wdata = Signal(32) with m.If(self.bus_valid): m.d.comb += [ update_addr.eq(self.bus_addr.line), update_data.eq(Repl(self.bus_data, self.nwords)), update_we.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack), ] with m.Else(): m.d.comb += [ update_addr.eq(self.s2_address.line), update_data.eq(Repl(aux_wdata, self.nwords)), update_we.bit_select(self.s2_address.offset, 1).eq(way.sel_we & ~self.s2_miss) ] m.d.comb += [ # Aux data: no tengo granularidad de byte en el puerto de escritura. Así que para el # caso en el cual el CPU tiene que escribir, hay que construir el dato (wrord) a reemplazar aux_wdata.eq( Cat( Mux(self.s2_sel[0], self.s2_wdata.word_select(0, 8), self.s2_rdata.word_select(0, 8)), Mux(self.s2_sel[1], self.s2_wdata.word_select(1, 8), self.s2_rdata.word_select(1, 8)), Mux(self.s2_sel[2], self.s2_wdata.word_select(2, 8), self.s2_rdata.word_select(2, 8)), Mux(self.s2_sel[3], self.s2_wdata.word_select(3, 8), self.s2_rdata.word_select(3, 8)))), # data_wp.addr.eq(update_addr), data_wp.data.eq(update_data), data_wp.en.eq(update_we), ] else: m.d.comb += [ data_wp.addr.eq(self.bus_addr.line), data_wp.data.eq(Repl(self.bus_data, self.nwords)), data_wp.en.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack), ] # -------------------------------------------------------------- # intenal snoop # for FENCE.i instruction _match_snoop = Signal() m.d.comb += [ snoop_rp.addr.eq(snoop_addr.line), # read tag memory _match_snoop.eq(snoop_rp.data == snoop_addr.tag), way.snoop_hit.eq(snoop_use_cache & snoop_valid & _match_snoop & valid.bit_select(snoop_addr.line, 1)), ] # check is the snoop match a write from this core with m.If(way.snoop_hit): m.d.sync += valid.bit_select(snoop_addr.line, 1).eq(0) # -------------------------------------------------------------- return m
def __init__( self, nlines: int, # number of lines nwords: int, # number of words x line x way nways: int, # number of ways start_addr: int = 0, # start of cacheable region end_addr: int = 2**32, # end of cacheable region enable_write: bool = True # enable writes to cache ) -> None: # enable write -> data cache if nlines == 0 or (nlines & (nlines - 1)): raise ValueError(f'nlines must be a power of 2: {nlines}') if nwords not in (4, 8, 16): raise ValueError(f'nwords must be 4, 8 or 16: {nwords}') if nways not in (1, 2): raise ValueError(f'nways must be 1 or 2: {nways}') self.enable_write = enable_write self.nlines = nlines self.nwords = nwords self.nways = nways self.start_addr = start_addr self.end_addr = end_addr offset_bits = log2_int(nwords) line_bits = log2_int(nlines) addr_bits = log2_int(end_addr - start_addr, need_pow2=False) tag_bits = addr_bits - line_bits - offset_bits - 2 # -2 because word line. extra_bits = 32 - tag_bits - line_bits - offset_bits - 2 self.pc_layout = [('byte', 2), ('offset', offset_bits), ('line', line_bits), ('tag', tag_bits)] if extra_bits != 0: self.pc_layout.append(('unused', extra_bits)) # ------------------------------------------------------------------------- # IO self.s1_address = Record(self.pc_layout) self.s1_flush = Signal() self.s1_valid = Signal() self.s1_stall = Signal() self.s1_access = Signal() self.s2_address = Record(self.pc_layout) self.s2_evict = Signal() self.s2_valid = Signal() self.s2_stall = Signal() self.s2_access = Signal() self.s2_miss = Signal() self.s2_rdata = Signal(32) self.s2_re = Signal() if enable_write: self.s2_wdata = Signal(32) self.s2_sel = Signal(4) self.s2_we = Signal() self.bus_addr = Record(self.pc_layout) self.bus_valid = Signal() self.bus_last = Signal() self.bus_data = Signal(32) self.bus_ack = Signal() self.bus_err = Signal() self.access_cnt = Signal(40) self.miss_cnt = Signal(40) # snoop bus if not enable_write: self.dcache_snoop = InternalSnoopPort( name='cache_snoop' ) # RO cache. Implement the Internal snooping port
def instantiate_dut(self): self.utmi = Record([("rx_data", 8), ("rx_active", 1), ("rx_valid", 1)]) return self.FRAGMENT_UNDER_TEST(utmi=self.utmi)