Exemplo n.º 1
0
    def __init__(self,
                 pads,
                 *,
                 ser_latency,
                 des_latency,
                 serdes_reset_cnt=0,
                 **kwargs):
        super().__init__(
            pads,
            ser_latency=ser_latency + Latency(sys=Serializer.LATENCY),
            des_latency=des_latency + Latency(sys=Deserializer.LATENCY),
            **kwargs)

        self._out = self.out
        self.out = LPDDR4Output(nphases=self.nphases // 2,
                                databits=self.databits)

        def ser(i, o):
            assert len(o) == len(i) // 2
            self.submodules += Serializer(
                clkdiv="sys",
                clk="sys2x",
                i_dw=len(i),
                o_dw=len(o),
                i=i,
                o=o,
                reset_cnt=serdes_reset_cnt,
            )

        def des(i, o):
            assert len(i) == len(o) // 2
            self.submodules += Deserializer(
                clkdiv="sys",
                clk="sys2x",
                i_dw=len(i),
                o_dw=len(o),
                i=i,
                o=o,
                reset_cnt=serdes_reset_cnt,
            )

        # handle ser/des for both the lists (like dq) and just Signal (like cs)
        def apply(fn, i, o):
            if not isinstance(i, list):
                i, o = [i], [o]
            for i_n, o_n in zip(i, o):
                fn(i=i_n, o=o_n)

        for name in vars(self.out):
            old = getattr(self._out, name)
            new = getattr(self.out, name)
            if name.endswith("_oe"):  # OE signals need to be delayed
                self.comb += new.eq(
                    delayed(self, old, cycles=Serializer.LATENCY))
            elif name.endswith("_i"):  # Deserialize inputs
                apply(des, o=old, i=new)
            else:  # All other signals are outputs
                apply(ser, i=old, o=new)
Exemplo n.º 2
0
    def __init__(self,
                 pads,
                 data_cdc,
                 *,
                 clk_freq,
                 log_level,
                 init_delays=False):
        self.submodules.log = log = SimLogger(log_level=log_level,
                                              clk_freq=clk_freq)
        self.log.add_csrs()

        # Mode Registers storage
        self.mode_regs = Array([Signal(8) for _ in range(64)])
        # Active banks
        self.active_banks = Array([Signal() for _ in range(8)])
        self.active_rows = Array([Signal(17) for _ in range(8)])
        # Connection to DataSim
        self.data_en = TappedDelayLine(ntaps=20)
        self.data = data_cdc
        self.submodules += self.data, self.data_en

        # CS/CA shift registers
        cs = TappedDelayLine(pads.cs, ntaps=2)
        ca = TappedDelayLine(pads.ca, ntaps=2)
        self.submodules += cs, ca

        self.cs_low = Signal(6)
        self.cs_high = Signal(6)
        self.handle_cmd = Signal()
        self.mpc_op = Signal(7)

        cmds_enabled = Signal()
        cmd_handlers = OrderedDict(
            MRW=self.mrw_handler(),
            REF=self.refresh_handler(),
            ACT=self.activate_handler(),
            PRE=self.precharge_handler(),
            CAS=self.cas_handler(),
            MPC=self.mpc_handler(),
        )
        self.comb += [
            If(
                cmds_enabled,
                If(
                    Cat(cs.taps) == 0b10,
                    self.handle_cmd.eq(1),
                    self.cs_high.eq(ca.taps[1]),
                    self.cs_low.eq(ca.taps[0]),
                )),
            If(
                self.handle_cmd & ~reduce(or_, cmd_handlers.values()),
                self.log.error(
                    "Unexpected command: cs_high=0b%06b cs_low=0b%06b",
                    self.cs_high, self.cs_low)),
        ]

        def ck(t):
            return math.ceil(t * clk_freq)

        self.submodules.tinit0 = PulseTiming(
            ck(20e-3))  # makes no sense in simulation
        self.submodules.tinit1 = PulseTiming(ck(200e-6))
        self.submodules.tinit2 = PulseTiming(ck(10e-9))
        self.submodules.tinit3 = PulseTiming(ck(2e-3))
        self.submodules.tinit4 = PulseTiming(
            5)  # TODO: would require counting pads.clk_p ticks
        self.submodules.tinit5 = PulseTiming(ck(2e-6))
        self.submodules.tzqcal = PulseTiming(ck(1e-6))
        self.submodules.tzqlat = PulseTiming(max(8, ck(30e-9)))
        self.submodules.tpw_reset = PulseTiming(ck(100e-9))

        self.comb += [
            self.tinit1.trigger.eq(1),
            self.tinit2.trigger.eq(~pads.cke),
            self.tinit3.trigger.eq(pads.reset_n),
            self.tpw_reset.trigger.eq(~pads.reset_n),
            If(
                ~delayed(self, pads.reset_n) & pads.reset_n,
                self.log.info("RESET released"),
                If(~self.tinit1.ready,
                   self.log.warn(
                       "tINIT1 violated: RESET deasserted too fast")),
                If(
                    ~self.tinit2.ready,
                    self.log.warn(
                        "tINIT2 violated: CKE LOW too short before RESET being released"
                    )),
            ),
            If(
                delayed(self, pads.reset_n) & ~pads.reset_n,
                self.log.info("RESET asserted"),
            ),
            If(
                delayed(self, pads.cke) & ~pads.cke,
                self.log.info("CKE falling edge"),
            ),
            If(
                ~delayed(self, pads.cke) & pads.cke,
                self.log.info("CKE rising edge"),
                If(
                    ~self.tinit3.ready,
                    self.log.warn(
                        "tINIT3 violated: CKE set HIGH too fast after RESET being released"
                    )),
            ),
        ]

        self.submodules.fsm = fsm = ResetInserter()(FSM())
        self.comb += [
            If(self.tpw_reset.ready_p, fsm.reset.eq(1),
               self.log.info("FSM reset"))
        ]
        fsm.act(
            "RESET",
            If(
                self.tinit3.ready_p | (not init_delays),
                NextState("EXIT-PD")  # Td
            ))
        fsm.act(
            "EXIT-PD",
            self.tinit5.trigger.eq(1),
            If(
                self.tinit5.ready_p | (not init_delays),
                NextState("MRW")  # Te
            ))
        fsm.act(
            "MRW",
            cmds_enabled.eq(1),
            If(
                self.handle_cmd & ~cmd_handlers["MRW"] & ~cmd_handlers["MPC"],
                self.log.warn(
                    "Only MRW/MRR commands expected before ZQ calibration"),
                self.log.warn(
                    " ".join("{}=%d".format(cmd)
                             for cmd in cmd_handlers.keys()),
                    *cmd_handlers.values()),
            ),
            If(
                cmd_handlers["MPC"],
                If(
                    self.mpc_op != MPC.ZQC_START,
                    self.log.error("ZQC-START expected, got op=0b%07b",
                                   self.mpc_op)).Else(NextState("ZQC")  # Tf
                                                      )),
        )
        fsm.act(
            "ZQC",
            self.tzqcal.trigger.eq(1),
            cmds_enabled.eq(1),
            If(
                self.handle_cmd,
                If(~(cmd_handlers["MPC"] & (self.mpc_op == MPC.ZQC_LATCH)),
                   self.log.error("Expected ZQC-LATCH")).Else(
                       If(init_delays & ~self.tzqcal.ready,
                          self.log.warn("tZQCAL violated")),
                       NextState("NORMAL")  # Tg
                   )),
        )
        fsm.act(
            "NORMAL",
            cmds_enabled.eq(1),
            self.tzqlat.trigger.eq(1),
            If(init_delays & self.handle_cmd & ~self.tzqlat.ready,
               self.log.warn("tZQLAT violated")),
        )

        # Log state transitions
        fsm.finalize()
        prev_state = delayed(self, fsm.state)
        self.comb += If(
            prev_state != fsm.state,
            Case(
                prev_state, {
                    state: Case(
                        fsm.state, {
                            next_state: self.log.info(
                                f"FSM: {state_name} -> {next_state_name}")
                            for next_state, next_state_name in
                            fsm.decoding.items()
                        })
                    for state, state_name in fsm.decoding.items()
                }))
Exemplo n.º 3
0
    def __init__(self, aligned_reset_zero=False, **kwargs):
        pads = LPDDR4SimulationPads()
        self.submodules += pads
        super().__init__(pads,
                         ser_latency=Latency(sys=Serializer.LATENCY),
                         des_latency=Latency(sys=Deserializer.LATENCY),
                         phytype="LPDDR4SimPHY",
                         **kwargs)

        # fake delays (make no nsense in simulation, but sdram.c expects them)
        self.settings.read_leveling = True
        self.settings.delays = 1
        self._rdly_dq_rst = CSR()
        self._rdly_dq_inc = CSR()

        delay = lambda sig, cycles: delayed(self, sig, cycles=cycles)
        sdr = dict(clkdiv="sys", clk="sys8x")
        sdr_90 = dict(clkdiv="sys", clk="sys8x_90")
        ddr = dict(clkdiv="sys", clk="sys8x_ddr")
        ddr_90 = dict(clkdiv="sys", clk="sys8x_90_ddr")

        if aligned_reset_zero:
            sdr["reset_cnt"] = 0
            ddr["reset_cnt"] = 0

        # Clock is shifted 180 degrees to get rising edge in the middle of SDR signals.
        # To achieve that we send negated clock on clk (clk_p).
        self.ser(i=~self.out.clk, o=self.pads.clk, name='clk', **ddr)

        self.ser(i=self.out.cke, o=self.pads.cke, name='cke', **sdr)
        self.ser(i=self.out.odt, o=self.pads.odt, name='odt', **sdr)
        self.ser(i=self.out.reset_n,
                 o=self.pads.reset_n,
                 name='reset_n',
                 **sdr)

        # Command/address
        self.ser(i=self.out.cs, o=self.pads.cs, name='cs', **sdr)
        for i in range(6):
            self.ser(i=self.out.ca[i], o=self.pads.ca[i], name=f'ca{i}', **sdr)

        # Tristate I/O (separate for simulation)
        for i in range(self.databits // 8):
            self.ser(i=self.out.dmi_o[i],
                     o=self.pads.dmi_o[i],
                     name=f'dmi_o{i}',
                     **ddr)
            self.des(o=self.out.dmi_i[i],
                     i=self.pads.dmi[i],
                     name=f'dmi_i{i}',
                     **ddr)
            self.ser(i=self.out.dqs_o[i],
                     o=self.pads.dqs_o[i],
                     name=f'dqs_o{i}',
                     **ddr_90)
            self.des(o=self.out.dqs_i[i],
                     i=self.pads.dqs[i],
                     name=f'dqs_i{i}',
                     **ddr_90)
        for i in range(self.databits):
            self.ser(i=self.out.dq_o[i],
                     o=self.pads.dq_o[i],
                     name=f'dq_o{i}',
                     **ddr)
            self.des(o=self.out.dq_i[i],
                     i=self.pads.dq[i],
                     name=f'dq_i{i}',
                     **ddr)

        # Output enable signals
        self.comb += [
            self.pads.dmi_oe.eq(
                delay(self.out.dmi_oe, cycles=Serializer.LATENCY)),
            self.pads.dqs_oe.eq(
                delay(self.out.dqs_oe, cycles=Serializer.LATENCY)),
            self.pads.dq_oe.eq(delay(self.out.dq_oe,
                                     cycles=Serializer.LATENCY)),
        ]
Exemplo n.º 4
0
    def __init__(self,
                 pads,
                 *,
                 sys_clk_freq,
                 ser_latency,
                 des_latency,
                 phytype,
                 cmd_delay=None,
                 masked_write=True,
                 extended_overlaps_check=False):
        self.pads = pads
        self.memtype = memtype = "LPDDR4"
        self.nranks = nranks = 1 if not hasattr(pads, "cs_n") else len(
            pads.cs_n)
        self.databits = databits = len(pads.dq)
        self.addressbits = addressbits = 17  # for activate row address
        self.bankbits = bankbits = 6  # 3 bankbits, but we use 6 for Mode Register address in MRS
        self.nphases = nphases = 8
        self.tck = tck = 1 / (nphases * sys_clk_freq)
        assert databits % 8 == 0

        # Parameters -------------------------------------------------------------------------------
        def get_cl_cw(memtype, tck):
            # MT53E256M16D1, No DBI, Set A
            f_to_cl_cwl = OrderedDict()
            f_to_cl_cwl[532e6] = (6, 4)
            f_to_cl_cwl[1066e6] = (10, 6)
            f_to_cl_cwl[1600e6] = (14, 8)
            f_to_cl_cwl[2132e6] = (20, 10)
            f_to_cl_cwl[2666e6] = (24, 12)
            f_to_cl_cwl[3200e6] = (28, 14)
            f_to_cl_cwl[3732e6] = (32, 16)
            f_to_cl_cwl[4266e6] = (36, 18)
            for f, (cl, cwl) in f_to_cl_cwl.items():
                if tck >= 2 / f:
                    return cl, cwl
            raise ValueError

        # Bitslip introduces latency from 1 up to `cycles + 1`
        # FIXME: (check if True) from tests on hardware it seems we need 1 more cycle
        #   of read_latency, probably to have space for manipulating bitslip values
        bitslip_cycles = 1
        bitslip_range = 1
        # Commands are sent over 4 DRAM clocks (sys8x) and we count cl/cwl from last bit
        cmd_latency = 4
        # Commands read from adapters are delayed on ConstBitSlips
        ca_latency = 1

        cl, cwl = get_cl_cw(memtype, tck)
        cl_sys_latency = get_sys_latency(nphases, cl)
        cwl_sys_latency = get_sys_latency(nphases, cwl)
        # For reads we need to account for ser+des latency to make sure we get the data in-phase with sys clock
        rdphase = get_sys_phase(
            nphases, cl_sys_latency,
            cl + cmd_latency + ser_latency.sys8x % 8 + des_latency.sys8x % 8)
        # No need to modify wrphase, because ser_latency applies the same to both CA and DQ
        wrphase = get_sys_phase(nphases, cwl_sys_latency, cwl + cmd_latency)

        # When the calculated phase is negative, it means that we need to increase sys latency
        def updated_latency(phase, sys_latency):
            while phase < 0:
                phase += nphases
                sys_latency += 1
            return phase, sys_latency

        wrphase, cwl_sys_latency = updated_latency(wrphase, cwl_sys_latency)
        rdphase, cl_sys_latency = updated_latency(rdphase, cl_sys_latency)

        # Read latency
        read_data_delay = ca_latency + ser_latency.sys8x // 8 + cl_sys_latency  # DFI cmd -> read data on DQ
        read_des_delay = des_latency.sys8x // 8 + bitslip_cycles + bitslip_range  # data on DQ -> data on DFI rddata
        read_latency = read_data_delay + read_des_delay

        # Write latency
        write_latency = cwl_sys_latency

        # Registers --------------------------------------------------------------------------------
        self._rst = CSRStorage()

        self._wlevel_en = CSRStorage()
        self._wlevel_strobe = CSR()

        self._dly_sel = CSRStorage(databits // 8)

        self._rdly_dq_bitslip_rst = CSR()
        self._rdly_dq_bitslip = CSR()

        self._wdly_dq_bitslip_rst = CSR()
        self._wdly_dq_bitslip = CSR()

        self._rdphase = CSRStorage(log2_int(nphases), reset=rdphase)
        self._wrphase = CSRStorage(log2_int(nphases), reset=wrphase)

        # PHY settings -----------------------------------------------------------------------------
        self.settings = PhySettings(
            phytype=phytype,
            memtype=memtype,
            databits=databits,
            dfi_databits=2 * databits,
            nranks=nranks,
            nphases=nphases,
            rdphase=self._rdphase.storage,
            wrphase=self._wrphase.storage,
            cl=cl,
            cwl=cwl,
            read_latency=read_latency,
            write_latency=write_latency,
            cmd_latency=cmd_latency,
            cmd_delay=cmd_delay,
            bitslips=16,
        )

        # DFI Interface ----------------------------------------------------------------------------
        # Due to the fact that LPDDR4 has 16n prefetch we use 8 phases to be able to read/write a
        # whole burst during a single controller clock cycle. PHY should use sys8x clock.
        self.dfi = dfi = Interface(addressbits,
                                   bankbits,
                                   nranks,
                                   2 * databits,
                                   nphases=8)

        # # #

        adapters = [
            DFIPhaseAdapter(phase, masked_write=masked_write)
            for phase in self.dfi.phases
        ]
        self.submodules += adapters

        # Now prepare the data by converting the sequences on adapters into sequences on the pads.
        # We have to ignore overlapping commands, and module timings have to ensure that there are
        # no overlapping commands anyway.
        self.out = LPDDR4Output(nphases, databits)

        # Clocks -----------------------------------------------------------------------------------
        self.comb += self.out.clk.eq(bitpattern("-_-_-_-_" * 2))

        # Simple commands --------------------------------------------------------------------------
        self.comb += [
            self.out.cke.eq(
                Cat(delayed(self, phase.cke) for phase in self.dfi.phases)),
            self.out.odt.eq(
                Cat(delayed(self, phase.odt) for phase in self.dfi.phases)),
            self.out.reset_n.eq(
                Cat(delayed(self, phase.reset_n)
                    for phase in self.dfi.phases)),
        ]

        # LPDDR4 Commands --------------------------------------------------------------------------
        # Each LPDDR4 command can span several phases (2 or 4), so in theory the commands could
        # overlap. No overlap should be guaranteed by the controller based on module timings, but
        # we also include an overlaps check in PHY logic.
        self.submodules.commands = CommandsPipeline(
            adapters,
            cs_ser_width=len(self.out.cs),
            ca_ser_width=len(self.out.ca[0]),
            ca_nbits=len(self.out.ca),
            cmd_nphases_span=4,
            extended_overlaps_check=extended_overlaps_check)

        self.comb += self.out.cs.eq(self.commands.cs)
        for bit in range(6):
            self.comb += self.out.ca[bit].eq(self.commands.ca[bit])

        # DQ ---------------------------------------------------------------------------------------
        dq_oe = Signal()
        self.comb += self.out.dq_oe.eq(delayed(self, dq_oe, cycles=1))

        for bit in range(self.databits):
            # output
            wrdata = [
                self.dfi.phases[i // 2].wrdata[i % 2 * self.databits + bit]
                for i in range(2 * nphases)
            ]
            self.submodules += BitSlip(
                dw=2 * nphases,
                cycles=bitslip_cycles,
                rst=self.get_rst(bit // 8, self._wdly_dq_bitslip_rst.re),
                slp=self.get_inc(bit // 8, self._wdly_dq_bitslip.re),
                i=Cat(*wrdata),
                o=self.out.dq_o[bit],
            )

            # input
            dq_i_bs = Signal(2 * nphases)
            self.submodules += BitSlip(
                dw=2 * nphases,
                cycles=bitslip_cycles,
                rst=self.get_rst(bit // 8, self._rdly_dq_bitslip_rst.re),
                slp=self.get_inc(bit // 8, self._rdly_dq_bitslip.re),
                i=self.out.dq_i[bit],
                o=dq_i_bs,
            )
            for i in range(2 * nphases):
                self.comb += self.dfi.phases[i //
                                             2].rddata[i % 2 * self.databits +
                                                       bit].eq(dq_i_bs[i])

        # DQS --------------------------------------------------------------------------------------
        dqs_oe = Signal()
        dqs_preamble = Signal()
        dqs_postamble = Signal()
        dqs_pattern = DQSPattern(preamble=dqs_preamble,
                                 postamble=dqs_postamble,
                                 wlevel_en=self._wlevel_en.storage,
                                 wlevel_strobe=self._wlevel_strobe.re)
        self.submodules += dqs_pattern
        self.comb += [
            self.out.dqs_oe.eq(delayed(self, dqs_oe, cycles=1)),
        ]

        for byte in range(self.databits // 8):
            # output
            self.submodules += BitSlip(
                dw=2 * nphases,
                cycles=bitslip_cycles,
                rst=self.get_rst(byte, self._wdly_dq_bitslip_rst.re),
                slp=self.get_inc(byte, self._wdly_dq_bitslip.re),
                i=dqs_pattern.o,
                o=self.out.dqs_o[byte],
            )

        # DMI --------------------------------------------------------------------------------------
        # DMI signal is used for Data Mask or Data Bus Invertion depending on Mode Registers values.
        # With DM and DBI disabled, this signal is a Don't Care.
        # With DM enabled, masking is performed only when the command used is WRITE-MASKED.
        # We don't support DBI, DM support is configured statically with `masked_write`.
        for byte in range(self.databits // 8):
            if isinstance(masked_write, Signal) or masked_write:
                self.comb += self.out.dmi_oe.eq(self.out.dq_oe)
                wrdata_mask = [
                    self.dfi.phases[i //
                                    2].wrdata_mask[i % 2 * self.databits // 8 +
                                                   byte]
                    for i in range(2 * nphases)
                ]
                self.submodules += BitSlip(
                    dw=2 * nphases,
                    cycles=bitslip_cycles,
                    rst=self.get_rst(byte, self._wdly_dq_bitslip_rst.re),
                    slp=self.get_inc(byte, self._wdly_dq_bitslip.re),
                    i=Cat(*wrdata_mask),
                    o=self.out.dmi_o[byte],
                )
            else:
                self.comb += self.out.dmi_o[byte].eq(0)
                self.comb += self.out.dmi_oe.eq(0)

        # Read Control Path ------------------------------------------------------------------------
        # Creates a delay line of read commands coming from the DFI interface. The output is used to
        # signal a valid read data to the DFI interface.
        #
        # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
        # interface, the latency is the sum of the OSERDESE2, CAS, ISERDESE2 and Bitslip latencies.
        rddata_en = TappedDelayLine(signal=reduce(
            or_, [dfi.phases[i].rddata_en for i in range(nphases)]),
                                    ntaps=self.settings.read_latency)
        self.submodules += rddata_en

        self.comb += [
            phase.rddata_valid.eq(rddata_en.output | self._wlevel_en.storage)
            for phase in dfi.phases
        ]

        # Write Control Path -----------------------------------------------------------------------
        wrtap = cwl_sys_latency - 1
        assert wrtap >= 0

        # Create a delay line of write commands coming from the DFI interface. This taps are used to
        # control DQ/DQS tristates.
        wrdata_en = TappedDelayLine(signal=reduce(
            or_, [dfi.phases[i].wrdata_en for i in range(nphases)]),
                                    ntaps=wrtap + 2)
        self.submodules += wrdata_en

        self.comb += dq_oe.eq(wrdata_en.taps[wrtap])
        # Always enabled in write leveling mode, else during transfers
        self.comb += dqs_oe.eq(self._wlevel_en.storage
                               | (dqs_preamble | dq_oe | dqs_postamble))

        # Write DQS Postamble/Preamble Control Path ------------------------------------------------
        # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
        # write. During writes, DQS tristate is configured as output for at least 3 sys_clk cycles:
        # 1 for Preamble, 1 for the Write and 1 for the Postamble.
        def wrdata_en_tap(i):  # allows to have wrtap == 0
            return wrdata_en.input if i == -1 else wrdata_en.taps[i]

        self.comb += dqs_preamble.eq(
            wrdata_en_tap(wrtap - 1) & ~wrdata_en_tap(wrtap + 0))
        self.comb += dqs_postamble.eq(
            wrdata_en_tap(wrtap + 1) & ~wrdata_en_tap(wrtap + 0))