def __init__(self, nways, nlines, nwords, base, limit): if not nlines or nlines & nlines-1: raise ValueError("nlines must be a power of 2, not {!r}".format(nlines)) if nwords not in {4, 8, 16}: raise ValueError("nwords must be 4, 8 or 16, not {!r}".format(nwords)) if nways not in {1, 2}: raise ValueError("nways must be 1 or 2, not {!r}".format(nways)) self.nways = nways self.nlines = nlines self.nwords = nwords self.base = base self.limit = limit offsetbits = log2_int(nwords) linebits = log2_int(nlines) tagbits = log2_int(limit-base) - log2_int(nlines) - log2_int(nwords) - 2 self.s1_addr = Record([("offset", offsetbits), ("line", linebits), ("tag", tagbits)]) self.s1_flush = Signal() self.s1_stall = Signal() self.s1_valid = Signal() self.s2_addr = Record.like(self.s1_addr) self.s2_re = Signal() self.s2_evict = Signal() self.s2_valid = Signal() self.bus_valid = Signal() self.bus_error = Signal() self.bus_rdata = Signal(32) self.s2_miss = Signal() self.s2_rdata = Signal(32) self.bus_re = Signal() self.bus_addr = Record.like(self.s1_addr) self.bus_last = Signal()
def __init__(self, *, size, data_width=32, granularity=8, writable=True): super().__init__() if not isinstance(size, int) or size <= 0 or size & size - 1: raise ValueError( "Size must be an integer power of two, not {!r}".format(size)) if size < data_width // granularity: raise ValueError( "Size {} cannot be lesser than the data width/granularity ratio " "of {} ({} / {})".format(size, data_width // granularity, data_width, granularity)) self._mem = Memory(depth=(size * granularity) // data_width, width=data_width, name=self.name) self.bus = wishbone.Interface(addr_width=log2_int(self._mem.depth), data_width=self._mem.width, granularity=granularity, features={"cti", "bte"}) map = MemoryMap(addr_width=log2_int(size), data_width=granularity) map.add_resource(self._mem, size=size) self.bus.memory_map = map self.size = size self.granularity = granularity self.writable = writable
def __init__(self, nways, nlines, nwords, base, limit): if not isinstance(nlines, int): raise TypeError( "nlines must be an integer, not {!r}".format(nlines)) if nlines == 0 or nlines & nlines - 1: raise ValueError( "nlines must be a power of 2, not {}".format(nlines)) if nwords not in {4, 8, 16}: raise ValueError( "nwords must be 4, 8 or 16, not {!r}".format(nwords)) if nways not in {1, 2}: raise ValueError("nways must be 1 or 2, not {!r}".format(nways)) if not isinstance(base, int): raise TypeError("base must be an integer, not {!r}".format(base)) if base not in range(0, 2**32) or base & base - 1: raise ValueError( "base must be 0 or a power of 2 (< 2**32), not {:#x}".format( base)) if not isinstance(limit, int): raise TypeError("limit must be an integer, not {!r}".format(limit)) if limit not in range(1, 2**32 + 1) or limit & limit - 1: raise ValueError( "limit must be a power of 2 (<= 2**32), not {:#x}".format( limit)) if base >= limit: raise ValueError( "limit {:#x} must be greater than base {:#x}".format( limit, base)) self.nways = nways self.nlines = nlines self.nwords = nwords self.base = base self.limit = limit offsetbits = log2_int(nwords) linebits = log2_int(nlines) tagbits = log2_int(limit) - linebits - offsetbits - 2 self.s1_addr = Record([("offset", offsetbits), ("line", linebits), ("tag", tagbits)]) self.s1_stall = Signal() self.s1_valid = Signal() self.s2_addr = Record.like(self.s1_addr) self.s2_re = Signal() self.s2_flush = Signal() self.s2_evict = Signal() self.s2_valid = Signal() self.bus_valid = Signal() self.bus_error = Signal() self.bus_rdata = Signal(32) self.s2_miss = Signal() self.s2_flush_ack = Signal() self.s2_rdata = Signal(32) self.bus_re = Signal() self.bus_addr = Record.like(self.s1_addr) self.bus_last = Signal()
def elaborate(self, platform): m = Module() ack_fanin = 0 err_fanin = 0 rty_fanin = 0 stall_fanin = 0 with m.Switch(self.bus.adr): for sub_map, (sub_pat, sub_ratio) in self._map.window_patterns(): sub_bus = self._subs[sub_map] m.d.comb += [ sub_bus.adr.eq(self.bus.adr << log2_int(sub_ratio)), sub_bus.dat_w.eq(self.bus.dat_w), sub_bus.sel.eq( Cat(Repl(sel, sub_ratio) for sel in self.bus.sel)), sub_bus.we.eq(self.bus.we), sub_bus.stb.eq(self.bus.stb), ] if hasattr(sub_bus, "lock"): m.d.comb += sub_bus.lock.eq(getattr(self.bus, "lock", 0)) if hasattr(sub_bus, "cti"): m.d.comb += sub_bus.cti.eq( getattr(self.bus, "cti", CycleType.CLASSIC)) if hasattr(sub_bus, "bte"): m.d.comb += sub_bus.bte.eq( getattr(self.bus, "bte", BurstTypeExt.LINEAR)) granularity_bits = log2_int(self.bus.data_width // self.bus.granularity) with m.Case(sub_pat[:-granularity_bits if granularity_bits > 0 else None]): m.d.comb += [ sub_bus.cyc.eq(self.bus.cyc), self.bus.dat_r.eq(sub_bus.dat_r), ] ack_fanin |= sub_bus.ack if hasattr(sub_bus, "err"): err_fanin |= sub_bus.err if hasattr(sub_bus, "rty"): rty_fanin |= sub_bus.rty if hasattr(sub_bus, "stall"): stall_fanin |= sub_bus.stall m.d.comb += self.bus.ack.eq(ack_fanin) if hasattr(self.bus, "err"): m.d.comb += self.bus.err.eq(err_fanin) if hasattr(self.bus, "rty"): m.d.comb += self.bus.rty.eq(rty_fanin) if hasattr(self.bus, "stall"): m.d.comb += self.bus.stall.eq(stall_fanin) return m
def __init__(self, nlines, nwords, nways, start_addr=0, end_addr=2**32, enable_write=True): if nlines == 0 or (nlines & (nlines - 1)): raise ValueError(f'nlines must be a power of 2: {nlines}') if nwords not in (4, 8, 16): raise ValueError(f'nwords must be 4, 8 or 16: {nwords}') if nways not in (1, 2): raise ValueError(f'nways must be 1 or 2: {nways}') self.enable_write = enable_write self.nlines = nlines self.nwords = nwords self.nways = nways offset_bits = log2_int(nwords) line_bits = log2_int(nlines) addr_bits = log2_int(end_addr - start_addr, need_pow2=False) tag_bits = addr_bits - line_bits - offset_bits - 2 # -2 because word line. extra_bits = 32 - tag_bits - line_bits - offset_bits - 2 pc_layout = [ ('byte', 2), ('offset', offset_bits), ('line', line_bits), ('tag', tag_bits) ] if (extra_bits != 0): pc_layout.append(('unused', extra_bits)) self.s1_address = Record(pc_layout) self.s1_flush = Signal() self.s1_valid = Signal() self.s1_stall = Signal() self.s2_address = Record(pc_layout) self.s2_evict = Signal() self.s2_valid = Signal() self.s2_miss = Signal() self.s2_rdata = Signal(32) self.s2_re = Signal() if enable_write: self.s2_wdata = Signal(32) self.s2_sel = Signal(4) self.s2_we = Signal() self.bus_addr = Record(pc_layout) self.bus_valid = Signal() self.bus_last = Signal() self.bus_data = Signal(32) self.bus_ack = Signal() self.bus_err = Signal()
def __init__(self, csr_bus, *, data_width=None): if not isinstance(csr_bus, CSRInterface): raise ValueError( "CSR bus must be an instance of CSRInterface, not {!r}".format( csr_bus)) if csr_bus.data_width not in (8, 16, 32, 64): raise ValueError( "CSR bus data width must be one of 8, 16, 32, 64, not {!r}". format(csr_bus.data_width)) if data_width is None: data_width = csr_bus.data_width self.csr_bus = csr_bus self.wb_bus = WishboneInterface(addr_width=max( 0, csr_bus.addr_width - log2_int(data_width // csr_bus.data_width)), data_width=data_width, granularity=csr_bus.data_width, name="wb") self.wb_bus.memory_map = MemoryMap(addr_width=csr_bus.addr_width, data_width=csr_bus.data_width) # Since granularity of the Wishbone interface matches the data width of the CSR bus, # no width conversion is performed, even if the Wishbone data width is greater. self.wb_bus.memory_map.add_window(self.csr_bus.memory_map)
def elaborate(self, platform): divisor = self.divisor cd_out = self.cd_out cd_in = self.cd_in clk = self.clk divisor_bits = log2_int(divisor) m = Module() m.domains += ClockDomain(cd_out) cd_temp = "_{}_{}".format(cd_out, 0) m.domains += ClockDomain(cd_temp, local=True) m.d.comb += ClockSignal(cd_temp).eq(ClockSignal(cd_in)) for i in range(divisor_bits): cd_cur = "_{}_{}".format(cd_out, i) cd_next = "_{}_{}".format(cd_out, i + 1) m.domains += ClockDomain(cd_next, local=True) m.d[cd_cur] += ClockSignal(cd_next).eq(~ClockSignal(cd_next)) cd_last = "_{}_{}".format(cd_out, divisor_bits) m.d.comb += ClockSignal(cd_out).eq(ClockSignal(cd_last)) return m
def window(self, *, addr_width, data_width, granularity=None, features=frozenset(), alignment=0, addr=None, sparse=None): """Request a window to a subordinate bus. See :meth:`nmigen_soc.wishbone.Decoder.add` for details. Return value ------------ An instance of :class:`nmigen_soc.wishbone.Interface`. """ window = wishbone.Interface(addr_width=addr_width, data_width=data_width, granularity=granularity, features=features) granularity_bits = log2_int(data_width // window.granularity) window.memory_map = MemoryMap(addr_width=addr_width + granularity_bits, data_width=window.granularity, alignment=alignment) self._windows.append((window, addr, sparse)) return window
def bus(self): if self._bus is None: self._map.freeze() granularity_bits = log2_int(self.data_width // self.granularity) self._bus = Interface(addr_width=self._map.addr_width - granularity_bits, data_width=self.data_width, granularity=self.granularity, features=self.features) self._bus.memory_map = self._map return self._bus
def __init__(self, settings, clk_freq, zqcs_freq=1e0, postponing=1): assert postponing <= 8 self._abits = settings.geom.addressbits self._babits = settings.geom.bankbits + log2_int(settings.phy.nranks) self.cmd = cmd = stream.Endpoint(cmd_request_rw_layout(a=self._abits, ba=self._babits)) self._postponing = postponing self._settings = settings self._clk_freq = clk_freq self._zqcs_freq = zqcs_freq
def __init__(self, core, data_width=32, granularity=8): super().__init__(name="wishbone") self.native_port = core.crossbar.get_native_port() self.ratio = self.native_port.data_width // data_width addr_width = log2_int(core.size // (self.native_port.data_width // data_width)) self.bus = wishbone.Interface(addr_width=addr_width + log2_int(self.ratio), data_width=data_width, granularity=granularity) map = MemoryMap(addr_width=addr_width + log2_int(self.ratio) + log2_int(data_width // granularity), data_width=granularity) self.bus.memory_map = map
def __init__(self, pads, sys_clk_freq=100e6): super().__init__(name="phy") self.pads = pads self._sys_clk_freq = sys_clk_freq databits = len(self.pads.dq.io) if databits % 8 != 0: raise ValueError("DQ pads should come in a multiple of 8") # CSR bank = self.csr_bank() self.burstdet = bank.csr(databits // 8, "rw") self.rdly = [] self.rdly += [bank.csr(3, "rw", name="rdly_p0")] self.rdly += [bank.csr(3, "rw", name="rdly_p1")] self._bridge = self.bridge(data_width=32, granularity=8, alignment=2) self.bus = self._bridge.bus addressbits = len(self.pads.a.o0) bankbits = len(self.pads.ba.o0) nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0) databits = len(self.pads.dq.io) self.dfi = Interface(addressbits, bankbits, nranks, 4 * databits, 4) # PHY settings ----------------------------------------------------------------------------- tck = 1 / (2 * self._sys_clk_freq) nphases = 2 databits = len(self.pads.dq.io) nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0) cl, cwl = get_cl_cw("DDR3", tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) self.settings = PhySettings(phytype="ECP5DDRPHY", memtype="DDR3", databits=databits, dfi_databits=4 * databits, nranks=nranks, nphases=nphases, rdphase=rdphase, wrphase=wrphase, rdcmdphase=rdcmdphase, wrcmdphase=wrcmdphase, cl=cl, cwl=cwl, read_latency=2 + cl_sys_latency + 2 + log2_int(4 // nphases) + 4, write_latency=cwl_sys_latency)
def NMux(select: Signal, signals: List[Signal]) -> Value: """Multiplex arbitrarily many signals.""" if len(signals) == 0: raise MultiplexError('Cannot mux zero signals') if len(signals) == 1: return signals[0] nbits = log2_int(len(signals), need_pow2=False) midpoint = (1 << nbits) // 2 low = signals[:midpoint] high = signals[midpoint:] return Mux(select[nbits - 1], NMux(select[:nbits - 1], high), NMux(select[:nbits - 1], low))
def __init__(self, clk_freq, rate, speedgrade=None, fine_refresh_mode=None): self.clk_freq = clk_freq self.rate = rate self.speedgrade = speedgrade self.geom_settings = GeomSettings( bankbits=log2_int(self.nbanks), rowbits=log2_int(self.nrows), colbits=log2_int(self.ncols), ) assert not (self.memtype != "DDR4" and fine_refresh_mode != None) assert fine_refresh_mode in [None, "1x", "2x", "4x"] if (fine_refresh_mode is None) and (self.memtype == "DDR4"): fine_refresh_mode = "1x" self.timing_settings = TimingSettings( tRP=self.ns_to_cycles(self.get("tRP")), tRCD=self.ns_to_cycles(self.get("tRCD")), tWR=self.ns_to_cycles(self.get("tWR")), tREFI=self.ns_to_cycles(self.get("tREFI", fine_refresh_mode), False), tRFC=self.ck_ns_to_cycles(*self.get("tRFC", fine_refresh_mode)), tWTR=self.ck_ns_to_cycles(*self.get("tWTR")), tFAW=None if self.get("tFAW") is None else self.ck_ns_to_cycles( *self.get("tFAW")), tCCD=None if self.get("tCCD") is None else self.ck_ns_to_cycles( *self.get("tCCD")), tRRD=None if self.get("tRRD") is None else self.ck_ns_to_cycles( *self.get("tRRD")), tRC=None if self.get("tRAS") is None else self.ns_to_cycles(self.get("tRP") + self.get("tRAS")), tRAS=None if self.get("tRAS") is None else self.ns_to_cycles( self.get("tRAS")), tZQCS=None if self.get("tZQCS") is None else self.ck_ns_to_cycles( *self.get("tZQCS"))) self.timing_settings.fine_refresh_mode = fine_refresh_mode
def __init__(self, address_align, settings): rankbits = log2_int(settings.phy.nranks) self.address_align = address_align self.address_width = settings.geom.rowbits + \ settings.geom.colbits + rankbits - address_align self.data_width = settings.phy.dfi_databits * settings.phy.nphases self.nbanks = settings.phy.nranks * (2**settings.geom.bankbits) self.nranks = settings.phy.nranks self.settings = settings layout = [("bank" + str(i), cmd_layout(self.address_width)) for i in range(self.nbanks)] layout += data_layout(self.data_width) Record.__init__(self, layout)
def read_incr(self, dut, *, addr, count, wrap=0): # FIXME clean data = [] yield dut.bus.cyc.eq(1) yield dut.bus.stb.eq(1) yield dut.bus.adr.eq(addr) yield dut.bus.bte.eq(_burst_type(wrap)) yield dut.bus.cti.eq(CycleType.END_OF_BURST if count == 0 else CycleType.INCR_BURST) yield self.assertFalse((yield dut.bus.ack)) for i in range(count): yield self.assertTrue((yield dut.bus.ack)) data.append((yield dut.bus.dat_r)) if wrap == 0: yield dut.bus.adr.eq((yield dut.bus.adr) + 1) else: yield dut.bus.adr[:log2_int(wrap)].eq( (yield dut.bus.adr[:log2_int(wrap)]) + 1) yield dut.bus.cti.eq(CycleType.END_OF_BURST if i == count - 1 else CycleType.INCR_BURST) yield dut.bus.cyc.eq(0) yield dut.bus.stb.eq(0) return data
def __init__(self, *, addr_width, data_width, granularity=None, features=frozenset(), alignment=0): if granularity is None: granularity = data_width _check_interface(addr_width, data_width, granularity, features) self.data_width = data_width self.granularity = granularity self.features = set(features) self.alignment = alignment granularity_bits = log2_int(data_width // granularity) self._map = MemoryMap(addr_width=max(1, addr_width + granularity_bits), data_width=granularity, alignment=alignment) self._subs = dict() self._bus = None
def memory_map(self, memory_map): if not isinstance(memory_map, MemoryMap): raise TypeError("Memory map must be an instance of MemoryMap, not {!r}" .format(memory_map)) if memory_map.data_width != self.granularity: raise ValueError("Memory map has data width {}, which is not the same as bus " "interface granularity {}" .format(memory_map.data_width, self.granularity)) granularity_bits = log2_int(self.data_width // self.granularity) if memory_map.addr_width != max(1, self.addr_width + granularity_bits): raise ValueError("Memory map has address width {}, which is not the same as bus " "interface address width {} ({} address bits + {} granularity bits)" .format(memory_map.addr_width, self.addr_width + granularity_bits, self.addr_width, granularity_bits)) memory_map.freeze() self._map = memory_map
def elaborate(self, platform): csr_bus = self.csr_bus wb_bus = self.wb_bus m = Module() cycle = Signal(range(len(wb_bus.sel) + 1)) m.d.comb += csr_bus.addr.eq( Cat(cycle[:log2_int(len(wb_bus.sel))], wb_bus.adr)) with m.If(wb_bus.cyc & wb_bus.stb): with m.Switch(cycle): def segment(index): return slice(index * wb_bus.granularity, (index + 1) * wb_bus.granularity) for index, sel_index in enumerate(wb_bus.sel): with m.Case(index): if index > 0: # CSR reads are registered, and we need to re-register them. m.d.sync += wb_bus.dat_r[segment(index - 1)].eq( csr_bus.r_data) m.d.comb += csr_bus.r_stb.eq(sel_index & ~wb_bus.we) m.d.comb += csr_bus.w_data.eq( wb_bus.dat_w[segment(index)]) m.d.comb += csr_bus.w_stb.eq(sel_index & wb_bus.we) m.d.sync += cycle.eq(index + 1) with m.Default(): m.d.sync += wb_bus.dat_r[segment(index)].eq(csr_bus.r_data) m.d.sync += wb_bus.ack.eq(1) with m.Else(): m.d.sync += wb_bus.ack.eq(0) with m.If(wb_bus.ack): m.d.sync += cycle.eq(0) return m
def __init__(self, phy_settings, geom_settings, timing_settings, clk_freq, controller_settings=ControllerSettings()): self._address_align = log2_int(burst_lengths[phy_settings.memtype]) # Settings --------------------------------------------------------------------------------- self.settings = controller_settings self.settings.phy = phy_settings self.settings.geom = geom_settings self.settings.timing = timing_settings # LiteDRAM Interface (User) ---------------------------------------------------------------- self.interface = interface = gramInterface( self._address_align, self.settings) # DFI Interface (Memory) ------------------------------------------------------------------- self.dfi = dfi.Interface( addressbits=geom_settings.addressbits, bankbits=geom_settings.bankbits, nranks=phy_settings.nranks, databits=phy_settings.dfi_databits, nphases=phy_settings.nphases) self._clk_freq = clk_freq
def elaborate(self, platform): m = Module() dcache = m.submodules.dcache = L1Cache(*self.dcache_args) x_dcache_select = Signal() # Test whether the target address is inside the L1 cache region. We use bit masks in order # to avoid carry chains from arithmetic comparisons. This restricts the region boundaries # to powers of 2. with m.Switch(self.x_addr[2:]): def addr_below(limit): assert limit in range(1, 2**30 + 1) range_bits = log2_int(limit) const_bits = 30 - range_bits return "{}{}".format("0" * const_bits, "-" * range_bits) if dcache.base >= 4: with m.Case(addr_below(dcache.base >> 2)): m.d.comb += x_dcache_select.eq(0) with m.Case(addr_below(dcache.limit >> 2)): m.d.comb += x_dcache_select.eq(1) with m.Default(): m.d.comb += x_dcache_select.eq(0) m_dcache_select = Signal() m_addr = Signal.like(self.x_addr) with m.If(~self.x_stall): m.d.sync += [ m_dcache_select.eq(x_dcache_select), m_addr.eq(self.x_addr), ] m.d.comb += [ dcache.s1_addr.eq(self.x_addr[2:]), dcache.s1_stall.eq(self.x_stall), dcache.s1_valid.eq(self.x_valid), dcache.s2_addr.eq(m_addr[2:]), dcache.s2_re.eq(self.m_load & m_dcache_select), dcache.s2_evict.eq(self.m_store & m_dcache_select), dcache.s2_flush.eq(self.m_flush), dcache.s2_valid.eq(self.m_valid), ] wrbuf_w_data = Record([("addr", 30), ("mask", 4), ("data", 32)]) wrbuf_r_data = Record.like(wrbuf_w_data) wrbuf = m.submodules.wrbuf = SyncFIFO(width=len(wrbuf_w_data), depth=dcache.nwords) m.d.comb += [ wrbuf.w_data.eq(wrbuf_w_data), wrbuf_w_data.addr.eq(self.x_addr[2:]), wrbuf_w_data.mask.eq(self.x_mask), wrbuf_w_data.data.eq(self.x_store_data), wrbuf.w_en.eq(self.x_store & self.x_valid & x_dcache_select & ~self.x_stall), wrbuf_r_data.eq(wrbuf.r_data), ] dbus_arbiter = m.submodules.dbus_arbiter = WishboneArbiter() m.d.comb += dbus_arbiter.bus.connect(self.dbus) wrbuf_port = dbus_arbiter.port(priority=0) m.d.comb += [ wrbuf_port.cyc.eq(wrbuf.r_rdy), wrbuf_port.we.eq(Const(1)), ] with m.If(wrbuf_port.stb): with m.If(wrbuf_port.ack | wrbuf_port.err): m.d.sync += wrbuf_port.stb.eq(0) m.d.comb += wrbuf.r_en.eq(1) with m.Elif(wrbuf.r_rdy): m.d.sync += [ wrbuf_port.stb.eq(1), wrbuf_port.adr.eq(wrbuf_r_data.addr), wrbuf_port.sel.eq(wrbuf_r_data.mask), wrbuf_port.dat_w.eq(wrbuf_r_data.data) ] dcache_port = dbus_arbiter.port(priority=1) m.d.comb += [ dcache_port.cyc.eq(dcache.bus_re), dcache_port.stb.eq(dcache.bus_re), dcache_port.adr.eq(dcache.bus_addr), dcache_port.cti.eq(Mux(dcache.bus_last, Cycle.END, Cycle.INCREMENT)), dcache_port.bte.eq(Const(log2_int(dcache.nwords) - 1)), dcache.bus_valid.eq(dcache_port.ack), dcache.bus_error.eq(dcache_port.err), dcache.bus_rdata.eq(dcache_port.dat_r) ] bare_port = dbus_arbiter.port(priority=2) bare_rdata = Signal.like(bare_port.dat_r) with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.m_valid): m.d.sync += [ bare_port.cyc.eq(0), bare_port.stb.eq(0), bare_rdata.eq(bare_port.dat_r) ] with m.Elif((self.x_load | self.x_store) & ~x_dcache_select & self.x_valid & ~self.x_stall): m.d.sync += [ bare_port.cyc.eq(1), bare_port.stb.eq(1), bare_port.adr.eq(self.x_addr[2:]), bare_port.sel.eq(self.x_mask), bare_port.we.eq(self.x_store), bare_port.dat_w.eq(self.x_store_data) ] with m.If(self.dbus.cyc & self.dbus.err): m.d.sync += [ self.m_load_error.eq(~self.dbus.we), self.m_store_error.eq(self.dbus.we), self.m_badaddr.eq(self.dbus.adr) ] with m.Elif(~self.m_stall): m.d.sync += [self.m_load_error.eq(0), self.m_store_error.eq(0)] with m.If(self.x_fence_i): m.d.comb += self.x_busy.eq(wrbuf.r_rdy) with m.Elif(x_dcache_select): m.d.comb += self.x_busy.eq(self.x_store & ~wrbuf.w_rdy) with m.Else(): m.d.comb += self.x_busy.eq(bare_port.cyc) with m.If(self.m_flush): m.d.comb += self.m_busy.eq(~dcache.s2_flush_ack) with m.If(self.m_load_error | self.m_store_error): m.d.comb += self.m_busy.eq(0) with m.Elif(m_dcache_select): m.d.comb += [ self.m_busy.eq(self.m_load & dcache.s2_miss), self.m_load_data.eq(dcache.s2_rdata) ] with m.Else(): m.d.comb += [ self.m_busy.eq(bare_port.cyc), self.m_load_data.eq(bare_rdata) ] return m
def elaborate(self, platform): m = Module() icache = m.submodules.icache = L1Cache(*self.icache_args) a_icache_select = Signal() f_icache_select = Signal() m.d.comb += a_icache_select.eq((self.a_pc >= icache.base) & (self.a_pc < icache.limit)) with m.If(~self.a_stall): m.d.sync += f_icache_select.eq(a_icache_select) m.d.comb += [ icache.s1_addr.eq(self.a_pc[2:]), icache.s1_flush.eq(self.a_flush), icache.s1_stall.eq(self.a_stall), icache.s1_valid.eq(self.a_valid & a_icache_select), icache.s2_addr.eq(self.f_pc[2:]), icache.s2_re.eq(Const(1)), icache.s2_evict.eq(Const(0)), icache.s2_valid.eq(self.f_valid & f_icache_select) ] ibus_arbiter = m.submodules.ibus_arbiter = WishboneArbiter() m.d.comb += ibus_arbiter.bus.connect(self.ibus) icache_port = ibus_arbiter.port(priority=0) m.d.comb += [ icache_port.cyc.eq(icache.bus_re), icache_port.stb.eq(icache.bus_re), icache_port.adr.eq(icache.bus_addr), icache_port.cti.eq(Mux(icache.bus_last, Cycle.END, Cycle.INCREMENT)), icache_port.bte.eq(Const(log2_int(icache.nwords) - 1)), icache.bus_valid.eq(icache_port.ack), icache.bus_error.eq(icache_port.err), icache.bus_rdata.eq(icache_port.dat_r) ] bare_port = ibus_arbiter.port(priority=1) bare_rdata = Signal.like(bare_port.dat_r) with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.f_valid): m.d.sync += [ bare_port.cyc.eq(0), bare_port.stb.eq(0), bare_rdata.eq(bare_port.dat_r) ] with m.Elif(~a_icache_select & self.a_valid & ~self.a_stall): m.d.sync += [ bare_port.cyc.eq(1), bare_port.stb.eq(1), bare_port.adr.eq(self.a_pc[2:]) ] with m.If(self.ibus.cyc & self.ibus.err): m.d.sync += [ self.f_fetch_error.eq(1), self.f_badaddr.eq(self.ibus.adr) ] with m.Elif(~self.f_stall): m.d.sync += self.f_fetch_error.eq(0) with m.If(a_icache_select): m.d.comb += self.a_busy.eq(0) with m.Else(): m.d.comb += self.a_busy.eq(bare_port.cyc) with m.If(self.f_fetch_error): m.d.comb += [ self.f_busy.eq(0), self.f_instruction.eq(0x00000013) # nop (addi x0, x0, 0) ] with m.Elif(f_icache_select): m.d.comb += [ self.f_busy.eq(icache.s2_re & icache.s2_miss), self.f_instruction.eq(icache.s2_rdata) ] with m.Else(): m.d.comb += [ self.f_busy.eq(bare_port.cyc), self.f_instruction.eq(bare_rdata) ] return m
def elaborate(self, platform: Platform) -> Module: m = Module() snoop_addr = Record(self.pc_layout) snoop_valid = Signal() # ------------------------------------------------------------------------- # Performance counter # TODO: connect to CSR's performance counter with m.If(~self.s1_stall & self.s1_valid & self.s1_access): m.d.sync += self.access_cnt.eq(self.access_cnt + 1) with m.If(self.s2_valid & self.s2_miss & ~self.bus_valid & self.s2_access): m.d.sync += self.miss_cnt.eq(self.miss_cnt + 1) # ------------------------------------------------------------------------- way_layout = [('data', 32 * self.nwords), ('tag', self.s1_address.tag.shape()), ('valid', 1), ('sel_lru', 1), ('snoop_hit', 1)] if self.enable_write: way_layout.append(('sel_we', 1)) ways = Array( Record(way_layout, name='way_idx{}'.format(_way)) for _way in range(self.nways)) fill_cnt = Signal.like(self.s1_address.offset) # Check hit/miss way_hit = m.submodules.way_hit = Encoder(self.nways) for idx, way in enumerate(ways): m.d.comb += way_hit.i[idx].eq((way.tag == self.s2_address.tag) & way.valid) m.d.comb += self.s2_miss.eq(way_hit.n) if self.enable_write: # Asumiendo que hay un HIT, indicar que la vía que dió hit es en la cual se va a escribir m.d.comb += ways[way_hit.o].sel_we.eq(self.s2_we & self.s2_valid) # set the LRU if self.nways == 1: # One way: LRU is useless lru = Const(0) # self.nlines else: # LRU es un vector de N bits, cada uno indicado el set a reemplazar # como NWAY es máximo 2, cada LRU es de un bit lru = Signal(self.nlines) _lru = lru.bit_select(self.s2_address.line, 1) write_ended = self.bus_valid & self.bus_ack & self.bus_last # err ^ ack = = 1 access_hit = ~self.s2_miss & self.s2_valid & (way_hit.o == _lru) with m.If(write_ended | access_hit): m.d.sync += _lru.eq(~_lru) # read data from the cache m.d.comb += self.s2_rdata.eq(ways[way_hit.o].data.word_select( self.s2_address.offset, 32)) # Internal Snoop snoop_use_cache = Signal() snoop_tag_match = Signal() snoop_line_match = Signal() snoop_cancel_refill = Signal() if not self.enable_write: bits_range = log2_int(self.end_addr - self.start_addr, need_pow2=False) m.d.comb += [ snoop_addr.eq(self.dcache_snoop.addr), # aux snoop_valid.eq(self.dcache_snoop.we & self.dcache_snoop.valid & self.dcache_snoop.ack), snoop_use_cache.eq(snoop_addr[bits_range:] == ( self.start_addr >> bits_range)), snoop_tag_match.eq(snoop_addr.tag == self.s2_address.tag), snoop_line_match.eq(snoop_addr.line == self.s2_address.line), snoop_cancel_refill.eq(snoop_use_cache & snoop_valid & snoop_line_match & snoop_tag_match), ] else: m.d.comb += snoop_cancel_refill.eq(0) with m.FSM(): with m.State('READ'): with m.If(self.s2_re & self.s2_miss & self.s2_valid): m.d.sync += [ self.bus_addr.eq(self.s2_address), self.bus_valid.eq(1), fill_cnt.eq(self.s2_address.offset - 1) ] m.next = 'REFILL' with m.State('REFILL'): m.d.comb += self.bus_last.eq(fill_cnt == self.bus_addr.offset) with m.If(self.bus_ack): m.d.sync += self.bus_addr.offset.eq(self.bus_addr.offset + 1) with m.If(self.bus_ack & self.bus_last | self.bus_err): m.d.sync += self.bus_valid.eq(0) with m.If(~self.bus_valid | self.s1_flush | snoop_cancel_refill): m.next = 'READ' m.d.sync += self.bus_valid.eq(0) # mark the way to use (replace) m.d.comb += ways[lru.bit_select(self.s2_address.line, 1)].sel_lru.eq(self.bus_valid) # generate for N ways for way in ways: # create the memory structures for valid, tag and data. valid = Signal(self.nlines) # Valid bits tag_m = Memory(width=len(way.tag), depth=self.nlines) # tag memory tag_rp = tag_m.read_port() snoop_rp = tag_m.read_port() tag_wp = tag_m.write_port() m.submodules += tag_rp, tag_wp, snoop_rp data_m = Memory(width=len(way.data), depth=self.nlines) # data memory data_rp = data_m.read_port() data_wp = data_m.write_port( granularity=32 ) # implica que solo puedo escribir palabras de 32 bits. m.submodules += data_rp, data_wp # handle valid with m.If(self.s1_flush & self.s1_valid): # flush m.d.sync += valid.eq(0) with m.Elif(way.sel_lru & self.bus_last & self.bus_ack): # refill ok m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(1) with m.Elif(way.sel_lru & self.bus_err): # refill error m.d.sync += valid.bit_select(self.bus_addr.line, 1).eq(0) with m.Elif(self.s2_evict & self.s2_valid & (way.tag == self.s2_address.tag)): # evict m.d.sync += valid.bit_select(self.s2_address.line, 1).eq(0) # assignments m.d.comb += [ tag_rp.addr.eq( Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)), tag_wp.addr.eq(self.bus_addr.line), tag_wp.data.eq(self.bus_addr.tag), tag_wp.en.eq(way.sel_lru & self.bus_ack & self.bus_last), data_rp.addr.eq( Mux(self.s1_stall, self.s2_address.line, self.s1_address.line)), way.data.eq(data_rp.data), way.tag.eq(tag_rp.data), way.valid.eq(valid.bit_select(self.s2_address.line, 1)) ] # update cache: CPU or Refill # El puerto de escritura se multiplexa debido a que la memoria solo puede tener un # puerto de escritura. if self.enable_write: update_addr = Signal(len(data_wp.addr)) update_data = Signal(len(data_wp.data)) update_we = Signal(len(data_wp.en)) aux_wdata = Signal(32) with m.If(self.bus_valid): m.d.comb += [ update_addr.eq(self.bus_addr.line), update_data.eq(Repl(self.bus_data, self.nwords)), update_we.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack), ] with m.Else(): m.d.comb += [ update_addr.eq(self.s2_address.line), update_data.eq(Repl(aux_wdata, self.nwords)), update_we.bit_select(self.s2_address.offset, 1).eq(way.sel_we & ~self.s2_miss) ] m.d.comb += [ # Aux data: no tengo granularidad de byte en el puerto de escritura. Así que para el # caso en el cual el CPU tiene que escribir, hay que construir el dato (wrord) a reemplazar aux_wdata.eq( Cat( Mux(self.s2_sel[0], self.s2_wdata.word_select(0, 8), self.s2_rdata.word_select(0, 8)), Mux(self.s2_sel[1], self.s2_wdata.word_select(1, 8), self.s2_rdata.word_select(1, 8)), Mux(self.s2_sel[2], self.s2_wdata.word_select(2, 8), self.s2_rdata.word_select(2, 8)), Mux(self.s2_sel[3], self.s2_wdata.word_select(3, 8), self.s2_rdata.word_select(3, 8)))), # data_wp.addr.eq(update_addr), data_wp.data.eq(update_data), data_wp.en.eq(update_we), ] else: m.d.comb += [ data_wp.addr.eq(self.bus_addr.line), data_wp.data.eq(Repl(self.bus_data, self.nwords)), data_wp.en.bit_select(self.bus_addr.offset, 1).eq(way.sel_lru & self.bus_ack), ] # -------------------------------------------------------------- # intenal snoop # for FENCE.i instruction _match_snoop = Signal() m.d.comb += [ snoop_rp.addr.eq(snoop_addr.line), # read tag memory _match_snoop.eq(snoop_rp.data == snoop_addr.tag), way.snoop_hit.eq(snoop_use_cache & snoop_valid & _match_snoop & valid.bit_select(snoop_addr.line, 1)), ] # check is the snoop match a write from this core with m.If(way.snoop_hit): m.d.sync += valid.bit_select(snoop_addr.line, 1).eq(0) # -------------------------------------------------------------- return m
def __init__( self, nlines: int, # number of lines nwords: int, # number of words x line x way nways: int, # number of ways start_addr: int = 0, # start of cacheable region end_addr: int = 2**32, # end of cacheable region enable_write: bool = True # enable writes to cache ) -> None: # enable write -> data cache if nlines == 0 or (nlines & (nlines - 1)): raise ValueError(f'nlines must be a power of 2: {nlines}') if nwords not in (4, 8, 16): raise ValueError(f'nwords must be 4, 8 or 16: {nwords}') if nways not in (1, 2): raise ValueError(f'nways must be 1 or 2: {nways}') self.enable_write = enable_write self.nlines = nlines self.nwords = nwords self.nways = nways self.start_addr = start_addr self.end_addr = end_addr offset_bits = log2_int(nwords) line_bits = log2_int(nlines) addr_bits = log2_int(end_addr - start_addr, need_pow2=False) tag_bits = addr_bits - line_bits - offset_bits - 2 # -2 because word line. extra_bits = 32 - tag_bits - line_bits - offset_bits - 2 self.pc_layout = [('byte', 2), ('offset', offset_bits), ('line', line_bits), ('tag', tag_bits)] if extra_bits != 0: self.pc_layout.append(('unused', extra_bits)) # ------------------------------------------------------------------------- # IO self.s1_address = Record(self.pc_layout) self.s1_flush = Signal() self.s1_valid = Signal() self.s1_stall = Signal() self.s1_access = Signal() self.s2_address = Record(self.pc_layout) self.s2_evict = Signal() self.s2_valid = Signal() self.s2_stall = Signal() self.s2_access = Signal() self.s2_miss = Signal() self.s2_rdata = Signal(32) self.s2_re = Signal() if enable_write: self.s2_wdata = Signal(32) self.s2_sel = Signal(4) self.s2_we = Signal() self.bus_addr = Record(self.pc_layout) self.bus_valid = Signal() self.bus_last = Signal() self.bus_data = Signal(32) self.bus_ack = Signal() self.bus_err = Signal() self.access_cnt = Signal(40) self.miss_cnt = Signal(40) # snoop bus if not enable_write: self.dcache_snoop = InternalSnoopPort( name='cache_snoop' ) # RO cache. Implement the Internal snooping port
def elaborate(self, platform): m = Module() dcache = m.submodules.dcache = L1Cache(*self.dcache_args) x_dcache_select = Signal() m_dcache_select = Signal() m.d.comb += x_dcache_select.eq((self.x_addr >= dcache.base) & (self.x_addr < dcache.limit)) with m.If(~self.x_stall): m.d.sync += m_dcache_select.eq(x_dcache_select) m.d.comb += [ dcache.s1_addr.eq(self.x_addr[2:]), dcache.s1_flush.eq(self.x_flush), dcache.s1_stall.eq(self.x_stall), dcache.s1_valid.eq(self.x_valid & x_dcache_select), dcache.s2_addr.eq(self.m_addr[2:]), dcache.s2_re.eq(self.m_load), dcache.s2_evict.eq(self.m_store), dcache.s2_valid.eq(self.m_valid & m_dcache_select) ] wrbuf_w_data = Record([("addr", 30), ("mask", 4), ("data", 32)]) wrbuf_r_data = Record.like(wrbuf_w_data) wrbuf = m.submodules.wrbuf = SyncFIFO(width=len(wrbuf_w_data), depth=dcache.nwords) m.d.comb += [ wrbuf.w_data.eq(wrbuf_w_data), wrbuf_w_data.addr.eq(self.x_addr[2:]), wrbuf_w_data.mask.eq(self.x_mask), wrbuf_w_data.data.eq(self.x_store_data), wrbuf.w_en.eq(self.x_store & self.x_valid & x_dcache_select & ~self.x_stall), wrbuf_r_data.eq(wrbuf.r_data), ] dbus_arbiter = m.submodules.dbus_arbiter = WishboneArbiter() m.d.comb += dbus_arbiter.bus.connect(self.dbus) wrbuf_port = dbus_arbiter.port(priority=0) with m.If(wrbuf_port.cyc): with m.If(wrbuf_port.ack | wrbuf_port.err): m.d.sync += [wrbuf_port.cyc.eq(0), wrbuf_port.stb.eq(0)] m.d.comb += wrbuf.r_en.eq(1) with m.Elif(wrbuf.r_rdy): m.d.sync += [ wrbuf_port.cyc.eq(1), wrbuf_port.stb.eq(1), wrbuf_port.adr.eq(wrbuf_r_data.addr), wrbuf_port.sel.eq(wrbuf_r_data.mask), wrbuf_port.dat_w.eq(wrbuf_r_data.data) ] m.d.comb += wrbuf_port.we.eq(Const(1)) dcache_port = dbus_arbiter.port(priority=1) m.d.comb += [ dcache_port.cyc.eq(dcache.bus_re), dcache_port.stb.eq(dcache.bus_re), dcache_port.adr.eq(dcache.bus_addr), dcache_port.cti.eq(Mux(dcache.bus_last, Cycle.END, Cycle.INCREMENT)), dcache_port.bte.eq(Const(log2_int(dcache.nwords) - 1)), dcache.bus_valid.eq(dcache_port.ack), dcache.bus_error.eq(dcache_port.err), dcache.bus_rdata.eq(dcache_port.dat_r) ] bare_port = dbus_arbiter.port(priority=2) bare_rdata = Signal.like(bare_port.dat_r) with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.m_valid): m.d.sync += [ bare_port.cyc.eq(0), bare_port.stb.eq(0), bare_rdata.eq(bare_port.dat_r) ] with m.Elif((self.x_load | self.x_store) & ~x_dcache_select & self.x_valid & ~self.x_stall): m.d.sync += [ bare_port.cyc.eq(1), bare_port.stb.eq(1), bare_port.adr.eq(self.x_addr[2:]), bare_port.sel.eq(self.x_mask), bare_port.we.eq(self.x_store), bare_port.dat_w.eq(self.x_store_data) ] with m.If(self.dbus.cyc & self.dbus.err): m.d.sync += [ self.m_load_error.eq(~self.dbus.we), self.m_store_error.eq(self.dbus.we), self.m_badaddr.eq(self.dbus.adr) ] with m.Elif(~self.m_stall): m.d.sync += [self.m_load_error.eq(0), self.m_store_error.eq(0)] with m.If(self.x_fence_i): m.d.comb += self.x_busy.eq(wrbuf.r_rdy) with m.Elif(x_dcache_select): m.d.comb += self.x_busy.eq(self.x_store & ~wrbuf.w_rdy) with m.Else(): m.d.comb += self.x_busy.eq(bare_port.cyc) with m.If(self.m_load_error | self.m_store_error): m.d.comb += [self.m_busy.eq(0), self.m_load_data.eq(0)] with m.Elif(m_dcache_select): m.d.comb += [ self.m_busy.eq(dcache.s2_re & dcache.s2_miss), self.m_load_data.eq(dcache.s2_rdata) ] with m.Else(): m.d.comb += [ self.m_busy.eq(bare_port.cyc), self.m_load_data.eq(bare_rdata) ] return m
def elaborate(self, platform: Platform) -> Module: m = Module() arbiter = m.submodules.arbiter = Arbiter( addr_width=32, data_width=32, granularity=32, features=['err', 'cti', 'bte']) icache = m.submodules.icache = Cache(enable_write=False, **self.cache_kwargs) cache_port = arbiter.add_port(priority=0) bare_port = arbiter.add_port(priority=1) a_use_cache = Signal() f_use_cache = Signal() bits_range = log2_int(self.end_addr - self.start_addr, need_pow2=False) m.d.comb += a_use_cache.eq( (self.a_pc[bits_range:] == (self.start_addr >> bits_range))) with m.If(~self.a_stall): m.d.sync += f_use_cache.eq(a_use_cache) m.d.comb += arbiter.bus.connect(self.iport) # connect IO: cache m.d.comb += [ icache.dcache_snoop.connect(self.dcache_snoop), icache.s1_address.eq(self.a_pc), icache.s1_flush.eq(self.flush), icache.s1_valid.eq(self.a_valid & a_use_cache), icache.s1_stall.eq(self.a_stall), icache.s1_access.eq(1), icache.s2_address.eq(self.f_pc), icache.s2_evict.eq(0), icache.s2_valid.eq(self.f_valid & f_use_cache), icache.s2_access.eq(1), icache.s2_stall.eq(self.f_stall), icache.s2_re.eq(1) ] # connect cache to arbiter m.d.comb += [ cache_port.adr.eq(icache.bus_addr), cache_port.dat_w.eq(0), cache_port.sel.eq(0), cache_port.we.eq(0), cache_port.cyc.eq(icache.bus_valid), cache_port.stb.eq(icache.bus_valid), cache_port.cti.eq( Mux(icache.bus_last, CycleType.END_OF_BURST, CycleType.INCR_BURST)), cache_port.bte.eq(log2_int(self.nwords) - 1), icache.bus_data.eq(cache_port.dat_r), icache.bus_ack.eq(cache_port.ack), icache.bus_err.eq(cache_port.err) ] # drive the bare bus IO rdata = Signal.like(bare_port.dat_r) with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.f_valid): m.d.sync += [ bare_port.cyc.eq(0), bare_port.stb.eq(0), rdata.eq(bare_port.dat_r) ] with m.Elif(self.a_valid & ~self.a_stall & ~a_use_cache): m.d.sync += [ bare_port.adr.eq(self.a_pc), bare_port.cyc.eq(1), bare_port.stb.eq(1) ] m.d.comb += [ bare_port.dat_w.eq(0), bare_port.sel.eq(0), bare_port.we.eq(0), bare_port.cti.eq(CycleType.CLASSIC), bare_port.bte.eq(0) ] # in case of error, make the instruction a NOP with m.If(f_use_cache): m.d.comb += [ self.f_instruction.eq(icache.s2_rdata), self.f_busy.eq(icache.s2_miss & self.f_valid) ] with m.Elif(self.f_bus_error): m.d.comb += [ self.f_instruction.eq(0x00000013), # NOP self.f_busy.eq(0) ] with m.Else(): m.d.comb += [ self.f_instruction.eq(rdata), self.f_busy.eq(bare_port.cyc) ] # excepcion with m.If(self.iport.cyc & self.iport.err): m.d.sync += [ self.f_bus_error.eq(1), self.f_badaddr.eq(self.iport.adr) ] with m.Elif( ~self.f_stall ): # in case of error, but the pipe is stalled, do not lose the error m.d.sync += self.f_bus_error.eq(0) return m
def elaborate(self, platform): m = Module() ep_info = Array( Record([("max_size", bits_for(512)), ("xfer_type", 2)]) for _ in self.endpoint_map) for i, (ep_addr, (port, max_size, xfer_type)) in enumerate(self.endpoint_map.items()): m.d.comb += [ ep_info[i].max_size.eq(Const(max_size)), ep_info[i].xfer_type.eq(Const(xfer_type)) ] rd_index = Signal(range(len(self.endpoint_map))) rd_bad_ep = Signal() with m.Switch(self.source_read.ep): for i, ep_addr in enumerate(self.endpoint_map): with m.Case(ep_addr): m.d.comb += rd_index.eq(i) with m.Case(): m.d.comb += rd_bad_ep.eq(1) wr_index = Signal.like(rd_index) wr_bad_ep = Signal() with m.Switch(self.sink_write.ep): for i, ep_addr in enumerate(self.endpoint_map): with m.Case(ep_addr): m.d.comb += wr_index.eq(i) with m.Case(): m.d.comb += wr_bad_ep.eq(1) m.d.comb += self.write_xfer.eq(ep_info[wr_index].xfer_type) # state memory buf_fields = [("valid", 1), ("setup", 1), ("level", bits_for(512))] state_rp1_data = Record([("lru", 1), ("buf1", buf_fields), ("buf2", buf_fields)]) state_rp2_data = Record.like(state_rp1_data) state_wp_data = Record.like(state_rp1_data) state_mem = Memory(width=len(state_rp1_data), depth=len(self.endpoint_map)) state_rp1 = m.submodules.state_rp1 = state_mem.read_port() state_rp2 = m.submodules.state_rp2 = state_mem.read_port() state_wp = m.submodules.state_wp = state_mem.write_port() m.d.comb += [ state_rp1_data.eq(state_rp1.data), state_rp2_data.eq(state_rp2.data), state_wp.data.eq(state_wp_data) ] # data memory data_rp_addr = Record([("index", rd_index.width), ("buf_sel", 1), ("offset", log2_int(512))]) data_wp_addr = Record.like(data_rp_addr) data_mem = Memory(width=8, depth=2**len(data_rp_addr)) data_rp = m.submodules.data_rp = data_mem.read_port(transparent=False) data_wp = m.submodules.data_wp = data_mem.write_port() data_rp.en.reset = 0 m.d.comb += [ data_rp.addr.eq(data_rp_addr), data_wp.addr.eq(data_wp_addr) ] # control FSMs is_empty = Array( Signal(len(self.endpoint_map), reset=2**len(self.endpoint_map) - 1, name="is_empty")) is_full = Array(Signal(len(self.endpoint_map), name="is_full")) is_last = Array( Signal(2, name=f"ep{addr}_last") for addr in self.endpoint_map) rd_buf_sel = Signal.like(data_rp_addr.buf_sel) rd_offset = Signal(range(514)) rd_buf = Record(buf_fields) rd_done = Signal() m.d.comb += [ self.source_read.ready.eq(~rd_bad_ep & ~is_empty[rd_index]), self.sink_write.ready.eq(~wr_bad_ep & ~is_full[wr_index]) ] with m.If(self.source_read.ready & self.source_read.valid): m.d.comb += state_rp1.addr.eq(rd_index) with m.Else(): m.d.comb += state_rp1.addr.eq(data_rp_addr.index) with m.If(self.sink_write.ready & self.sink_write.valid): m.d.comb += state_rp2.addr.eq(wr_index) with m.Else(): m.d.comb += state_rp2.addr.eq(data_wp_addr.index) with m.FSM(name="read_fsm") as read_fsm: with m.State("IDLE"): with m.If(self.source_read.ready & self.source_read.valid): m.d.sync += data_rp_addr.index.eq(rd_index) m.next = "READ-0" with m.State("READ-0"): with m.If(state_rp1_data.buf1.valid & state_rp1_data.buf2.valid): m.d.comb += data_rp_addr.buf_sel.eq(state_rp1_data.lru) with m.Else(): m.d.comb += data_rp_addr.buf_sel.eq( state_rp1_data.buf2.valid) m.d.sync += [ rd_buf_sel.eq(data_rp_addr.buf_sel), rd_offset.eq(1), rd_buf.eq( Mux(data_rp_addr.buf_sel, state_rp1_data.buf2, state_rp1_data.buf1)) ] m.d.comb += [data_rp_addr.offset.eq(0), data_rp.en.eq(1)] m.next = "READ-1" with m.State("READ-1"): rd_last = Signal() m.d.comb += [ rd_last.eq(is_last[data_rp_addr.index].bit_select( rd_buf_sel, 1)), self.source_data.valid.eq(1), self.source_data.setup.eq(rd_buf.setup), self.source_data.data.eq(data_rp.data), self.source_data.last.eq(rd_last & (rd_offset == rd_buf.level)) ] with m.If(self.source_data.ready): with m.If(rd_offset == rd_buf.level): m.d.comb += rd_done.eq(1) m.next = "IDLE" with m.Else(): m.d.sync += rd_offset.eq(rd_offset + 1) m.d.comb += [ data_rp_addr.buf_sel.eq(rd_buf_sel), data_rp_addr.offset.eq(rd_offset), data_rp.en.eq(1) ] with m.FSM(name="write_fsm") as write_fsm: with m.State("IDLE"): with m.If(self.sink_write.ready & self.sink_write.valid): m.d.sync += data_wp_addr.index.eq(wr_index) m.next = "WRITE-0" with m.State("WRITE-0"): m.d.sync += [ data_wp_addr.buf_sel.eq(state_rp2_data.buf1.valid), data_wp_addr.offset.eq(0) ] m.next = "WRITE-1" with m.State("WRITE-1"): with m.If(rd_done): # Wait because state_wp is being driven. m.d.comb += self.sink_data.ready.eq(~self.sink_data.last) with m.Else(): m.d.comb += self.sink_data.ready.eq(1) with m.If(self.recv_zlp): # The host sent a zero-length packet. These are used to mark the previous # packet (sent to this endpoint) as the last of an OUT transfer. m.d.sync += is_last[data_wp_addr.index].eq( 1 << ~data_wp_addr.buf_sel) m.next = "IDLE" with m.Elif(self.sink_data.ready & self.sink_data.valid): with m.If(self.sink_data.last ): # TODO drop packet if overflow m.next = "IDLE" with m.Else(): m.d.sync += data_wp_addr.offset.eq( data_wp_addr.offset + 1) m.d.comb += [ data_wp.data.eq(self.sink_data.data), data_wp.en.eq(1) ] # state update with m.If(rd_done): m.d.sync += is_full[data_rp_addr.index].eq(0) with m.If(rd_buf_sel): m.d.sync += is_empty[data_rp_addr.index].eq( ~state_rp1_data.buf1.valid) with m.Else(): m.d.sync += is_empty[data_rp_addr.index].eq( ~state_rp1_data.buf2.valid) m.d.comb += [ state_wp.addr.eq(data_rp_addr.index), state_wp.en.eq(1), state_wp_data.lru.eq(state_rp1_data.lru), state_wp_data.buf1.eq(Mux(rd_buf_sel, state_rp1_data.buf1, 0)), state_wp_data.buf2.eq(Mux(rd_buf_sel, 0, state_rp1_data.buf2)) ] with m.Elif(self.sink_data.valid & self.sink_data.last & self.sink_data.crc_ok & self.sink_data.ready): m.d.sync += is_empty[data_wp_addr.index].eq(0) with m.If(data_wp_addr.buf_sel): m.d.sync += [ is_full[data_wp_addr.index].eq(state_rp2_data.buf1.valid), is_last[data_wp_addr.index][1].eq( data_wp_addr.offset + 1 != ep_info[data_wp_addr.index].max_size) ] with m.Else(): m.d.sync += [ is_full[data_wp_addr.index].eq(state_rp2_data.buf2.valid), is_last[data_wp_addr.index][0].eq( data_wp_addr.offset + 1 != ep_info[data_wp_addr.index].max_size) ] m.d.comb += [ state_wp.addr.eq(data_wp_addr.index), state_wp.en.eq(1), state_wp_data.lru.eq(~data_wp_addr.buf_sel) ] with m.If(data_wp_addr.buf_sel): m.d.comb += state_wp_data.buf1.eq(state_rp2_data.buf1) m.d.comb += [ state_wp_data.buf2.valid.eq(1), state_wp_data.buf2.setup.eq(self.sink_data.setup), state_wp_data.buf2.level.eq(data_wp_addr.offset + 1) ] with m.Else(): m.d.comb += state_wp_data.buf2.eq(state_rp2_data.buf2) m.d.comb += [ state_wp_data.buf1.valid.eq(1), state_wp_data.buf1.setup.eq(self.sink_data.setup), state_wp_data.buf1.level.eq(data_wp_addr.offset + 1) ] return m
def addr_below(limit): assert limit in range(1, 2**30 + 1) range_bits = log2_int(limit) const_bits = 30 - range_bits return "{}{}".format("0" * const_bits, "-" * range_bits)
def elaborate(self, platform): m = Module() icache = m.submodules.icache = L1Cache(*self.icache_args) a_icache_select = Signal() # Test whether the target address is inside the L1 cache region. We use bit masks in order # to avoid carry chains from arithmetic comparisons. This restricts the region boundaries # to powers of 2. with m.Switch(self.a_pc[2:]): def addr_below(limit): assert limit in range(1, 2**30 + 1) range_bits = log2_int(limit) const_bits = 30 - range_bits return "{}{}".format("0" * const_bits, "-" * range_bits) if icache.base >= 4: with m.Case(addr_below(icache.base >> 2)): m.d.comb += a_icache_select.eq(0) with m.Case(addr_below(icache.limit >> 2)): m.d.comb += a_icache_select.eq(1) with m.Default(): m.d.comb += a_icache_select.eq(0) f_icache_select = Signal() f_flush = Signal() with m.If(~self.a_stall): m.d.sync += [ f_icache_select.eq(a_icache_select), f_flush.eq(self.a_flush), ] m.d.comb += [ icache.s1_addr.eq(self.a_pc[2:]), icache.s1_stall.eq(self.a_stall), icache.s1_valid.eq(self.a_valid), icache.s2_addr.eq(self.f_pc[2:]), icache.s2_re.eq(f_icache_select), icache.s2_evict.eq(Const(0)), icache.s2_flush.eq(f_flush), icache.s2_valid.eq(self.f_valid), ] ibus_arbiter = m.submodules.ibus_arbiter = WishboneArbiter() m.d.comb += ibus_arbiter.bus.connect(self.ibus) icache_port = ibus_arbiter.port(priority=0) m.d.comb += [ icache_port.cyc.eq(icache.bus_re), icache_port.stb.eq(icache.bus_re), icache_port.adr.eq(icache.bus_addr), icache_port.cti.eq(Mux(icache.bus_last, Cycle.END, Cycle.INCREMENT)), icache_port.bte.eq(Const(log2_int(icache.nwords) - 1)), icache.bus_valid.eq(icache_port.ack), icache.bus_error.eq(icache_port.err), icache.bus_rdata.eq(icache_port.dat_r) ] bare_port = ibus_arbiter.port(priority=1) bare_rdata = Signal.like(bare_port.dat_r) with m.If(bare_port.cyc): with m.If(bare_port.ack | bare_port.err | ~self.f_valid): m.d.sync += [ bare_port.cyc.eq(0), bare_port.stb.eq(0), bare_rdata.eq(bare_port.dat_r) ] with m.Elif(~a_icache_select & self.a_valid & ~self.a_stall): m.d.sync += [ bare_port.cyc.eq(1), bare_port.stb.eq(1), bare_port.adr.eq(self.a_pc[2:]) ] m.d.comb += self.a_busy.eq(bare_port.cyc) with m.If(self.ibus.cyc & self.ibus.err): m.d.sync += [ self.f_fetch_error.eq(1), self.f_badaddr.eq(self.ibus.adr) ] with m.Elif(~self.f_stall): m.d.sync += self.f_fetch_error.eq(0) with m.If(f_flush): m.d.comb += self.f_busy.eq(~icache.s2_flush_ack) with m.Elif(self.f_fetch_error): m.d.comb += self.f_busy.eq(0) with m.Elif(f_icache_select): m.d.comb += [ self.f_busy.eq(icache.s2_miss), self.f_instruction.eq(icache.s2_rdata) ] with m.Else(): m.d.comb += [ self.f_busy.eq(bare_port.cyc), self.f_instruction.eq(bare_rdata) ] return m
def elaborate(self, platform): m = Module() size = self.configuration.getOption('predictor', 'size') if size == 0 or (size & (size - 1)): raise ValueError(f'size must be a power of 2: {size}') _bits_index = log2_int(size) _bits_tag = 32 - _bits_index _btb_width = 1 + 32 + _bits_tag # valid + data + tag _btb_depth = 1 << _bits_index _btb_layout = [('target', 32), ('tag', _bits_tag), ('valid', 1)] _pc_layout = [('index', _bits_index), ('tag', _bits_tag)] btb = Memory(width=_btb_width, depth=_btb_depth) btb_rp = btb.read_port() btb_wp = btb.write_port() bht = Memory(width=2, depth=_btb_depth) bht_rp = bht.read_port() bht_wp = bht.write_port() m.submodules += btb_rp, btb_wp m.submodules += bht_rp, bht_wp btb_r = Record(_btb_layout) a_pc = Record(_pc_layout) f_pc = Record(_pc_layout) m_pc = Record(_pc_layout) hit = Signal() pstate_next = Signal(2) m.d.comb += [ btb_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)), bht_rp.addr.eq(Mux(self.a_stall, f_pc.index, a_pc.index)), btb_r.eq(btb_rp.data), # a_pc.eq(self.a_pc), f_pc.eq(self.f_pc), hit.eq(btb_r.valid & (btb_r.tag == f_pc.tag)), # self.f_prediction.eq(hit & bht_rp.data[1]), self.f_prediction_state.eq(bht_rp.data), self.f_prediction_pc.eq(btb_r.target) ] # update m.d.comb += [ btb_wp.addr.eq(m_pc.index), btb_wp.data.eq(Cat(self.m_target_pc, m_pc.tag, 1)), btb_wp.en.eq(self.m_update), bht_wp.addr.eq(m_pc.index), bht_wp.data.eq(pstate_next), bht_wp.en.eq(self.m_update), m_pc.eq(self.m_pc), pstate_next.eq(0) ] with m.Switch(Cat(self.m_prediction_state, self.m_take_jmp_branch)): with m.Case(0b000, 0b001): m.d.comb += pstate_next.eq(0b00) with m.Case(0b010, 0b100): m.d.comb += pstate_next.eq(0b01) with m.Case(0b011, 0b101): m.d.comb += pstate_next.eq(0b10) with m.Case(0b110, 0b111): m.d.comb += pstate_next.eq(0b11) return m