示例#1
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_depth=32,
            default_iterator_support=3,
            interconnect_input_ports=2,  # Connection to int
            interconnect_output_ports=2,
            mem_input_ports=1,
            mem_output_ports=1,
            config_data_width=32,
            config_addr_width=8,
            cycle_count_width=16,
            add_clk_enable=True,
            add_flush=True):
        super().__init__("pond", debug=True)

        self.interconnect_input_ports = interconnect_input_ports
        self.interconnect_output_ports = interconnect_output_ports
        self.mem_input_ports = mem_input_ports
        self.mem_output_ports = mem_output_ports
        self.mem_depth = mem_depth
        self.data_width = data_width
        self.config_data_width = config_data_width
        self.config_addr_width = config_addr_width
        self.add_clk_enable = add_clk_enable
        self.add_flush = add_flush
        self.cycle_count_width = cycle_count_width
        self.default_iterator_support = default_iterator_support
        self.default_config_width = kts.clog2(self.mem_depth)
        # inputs
        self._clk = self.clock("clk")
        self._clk.add_attribute(
            FormalAttr(f"{self._clk.name}", FormalSignalConstraint.CLK))
        self._rst_n = self.reset("rst_n")
        self._rst_n.add_attribute(
            FormalAttr(f"{self._rst_n.name}", FormalSignalConstraint.RSTN))
        self._clk_en = self.clock_en("clk_en", 1)

        # Enable/Disable tile
        self._tile_en = self.input("tile_en", 1)
        self._tile_en.add_attribute(
            ConfigRegAttr("Tile logic enable manifested as clock gate"))

        gclk = self.var("gclk", 1)
        self._gclk = kts.util.clock(gclk)
        self.wire(gclk, kts.util.clock(self._clk & self._tile_en))

        self._cycle_count = add_counter(self, "cycle_count",
                                        self.cycle_count_width)

        # Create write enable + addr, same for read.
        # self._write = self.input("write", self.interconnect_input_ports)
        self._write = self.var("write", self.mem_input_ports)
        # self._write.add_attribute(ControlSignalAttr(is_control=True))

        self._write_addr = self.var("write_addr",
                                    kts.clog2(self.mem_depth),
                                    size=self.interconnect_input_ports,
                                    explicit_array=True,
                                    packed=True)

        # Add "_pond" suffix to avoid error during garnet RTL generation
        self._data_in = self.input("data_in_pond",
                                   self.data_width,
                                   size=self.interconnect_input_ports,
                                   explicit_array=True,
                                   packed=True)
        self._data_in.add_attribute(
            FormalAttr(f"{self._data_in.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._read = self.var("read", self.mem_output_ports)
        self._t_write = self.var("t_write", self.interconnect_input_ports)
        self._t_read = self.var("t_read", self.interconnect_output_ports)
        # self._read.add_attribute(ControlSignalAttr(is_control=True))

        self._read_addr = self.var("read_addr",
                                   kts.clog2(self.mem_depth),
                                   size=self.interconnect_output_ports,
                                   explicit_array=True,
                                   packed=True)

        self._s_read_addr = self.var("s_read_addr",
                                     kts.clog2(self.mem_depth),
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)

        self._data_out = self.output("data_out_pond",
                                     self.data_width,
                                     size=self.interconnect_output_ports,
                                     explicit_array=True,
                                     packed=True)
        self._data_out.add_attribute(
            FormalAttr(f"{self._data_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._data_out.add_attribute(ControlSignalAttr(is_control=False))

        self._valid_out = self.output("valid_out_pond",
                                      self.interconnect_output_ports)
        self._valid_out.add_attribute(
            FormalAttr(f"{self._valid_out.name}",
                       FormalSignalConstraint.SEQUENCE))
        self._valid_out.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_out = self.var("mem_data_out",
                                      self.data_width,
                                      size=self.mem_output_ports,
                                      explicit_array=True,
                                      packed=True)

        self._s_mem_data_in = self.var("s_mem_data_in",
                                       self.data_width,
                                       size=self.interconnect_input_ports,
                                       explicit_array=True,
                                       packed=True)

        self._mem_data_in = self.var("mem_data_in",
                                     self.data_width,
                                     size=self.mem_input_ports,
                                     explicit_array=True,
                                     packed=True)

        self._s_mem_write_addr = self.var("s_mem_write_addr",
                                          kts.clog2(self.mem_depth),
                                          size=self.interconnect_input_ports,
                                          explicit_array=True,
                                          packed=True)

        self._s_mem_read_addr = self.var("s_mem_read_addr",
                                         kts.clog2(self.mem_depth),
                                         size=self.interconnect_output_ports,
                                         explicit_array=True,
                                         packed=True)

        self._mem_write_addr = self.var("mem_write_addr",
                                        kts.clog2(self.mem_depth),
                                        size=self.mem_input_ports,
                                        explicit_array=True,
                                        packed=True)

        self._mem_read_addr = self.var("mem_read_addr",
                                       kts.clog2(self.mem_depth),
                                       size=self.mem_output_ports,
                                       explicit_array=True,
                                       packed=True)

        if self.interconnect_output_ports == 1:
            self.wire(self._data_out[0], self._mem_data_out[0])
        else:
            for i in range(self.interconnect_output_ports):
                self.wire(self._data_out[i], self._mem_data_out[0])

        # Valid out is simply passing the read signal through...
        self.wire(self._valid_out, self._t_read)

        # Create write addressors
        for wr_port in range(self.interconnect_input_ports):

            RF_WRITE_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_WRITE_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_WRITE_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_write_iter_{wr_port}",
                           RF_WRITE_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port])
            # Whatever comes through here should hopefully just pipe through seamlessly
            # addressor modules
            self.add_child(f"rf_write_addr_{wr_port}",
                           RF_WRITE_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_write[wr_port],
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           restart=RF_WRITE_ITER.ports.restart)
            safe_wire(self, self._write_addr[wr_port],
                      RF_WRITE_ADDR.ports.addr_out)

            self.add_child(f"rf_write_sched_{wr_port}",
                           RF_WRITE_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_WRITE_ITER.ports.mux_sel_out,
                           finished=RF_WRITE_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_write[wr_port])

        # Create read addressors
        for rd_port in range(self.interconnect_output_ports):

            RF_READ_ITER = ForLoop(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width)
            RF_READ_ADDR = AddrGen(
                iterator_support=self.default_iterator_support,
                config_width=self.default_config_width)
            RF_READ_SCHED = SchedGen(
                iterator_support=self.default_iterator_support,
                config_width=self.cycle_count_width,
                use_enable=True)

            self.add_child(f"rf_read_iter_{rd_port}",
                           RF_READ_ITER,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port])

            self.add_child(f"rf_read_addr_{rd_port}",
                           RF_READ_ADDR,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           step=self._t_read[rd_port],
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           restart=RF_READ_ITER.ports.restart)
            if self.interconnect_output_ports > 1:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)
            else:
                safe_wire(self, self._read_addr[rd_port],
                          RF_READ_ADDR.ports.addr_out)

            self.add_child(f"rf_read_sched_{rd_port}",
                           RF_READ_SCHED,
                           clk=self._gclk,
                           rst_n=self._rst_n,
                           mux_sel=RF_READ_ITER.ports.mux_sel_out,
                           finished=RF_READ_ITER.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self._t_read[rd_port])

        self.wire(self._write, self._t_write.r_or())
        self.wire(self._mem_write_addr[0],
                  decode(self, self._t_write, self._s_mem_write_addr))

        self.wire(self._mem_data_in[0],
                  decode(self, self._t_write, self._s_mem_data_in))

        self.wire(self._read, self._t_read.r_or())
        self.wire(self._mem_read_addr[0],
                  decode(self, self._t_read, self._s_mem_read_addr))
        # ===================================
        # Instantiate config hooks...
        # ===================================
        self.fw_int = 1
        self.data_words_per_set = 2**self.config_addr_width
        self.sets = int(
            (self.fw_int * self.mem_depth) / self.data_words_per_set)

        self.sets_per_macro = max(
            1, int(self.mem_depth / self.data_words_per_set))
        self.total_sets = max(1, 1 * self.sets_per_macro)

        self._config_data_in = self.input("config_data_in",
                                          self.config_data_width)
        self._config_data_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_in_shrt = self.var("config_data_in_shrt",
                                             self.data_width)

        self.wire(self._config_data_in_shrt,
                  self._config_data_in[self.data_width - 1, 0])

        self._config_addr_in = self.input("config_addr_in",
                                          self.config_addr_width)
        self._config_addr_in.add_attribute(ControlSignalAttr(is_control=False))

        self._config_data_out_shrt = self.var("config_data_out_shrt",
                                              self.data_width,
                                              size=self.total_sets,
                                              explicit_array=True,
                                              packed=True)

        self._config_data_out = self.output("config_data_out",
                                            self.config_data_width,
                                            size=self.total_sets,
                                            explicit_array=True,
                                            packed=True)
        self._config_data_out.add_attribute(
            ControlSignalAttr(is_control=False))

        for i in range(self.total_sets):
            self.wire(
                self._config_data_out[i],
                self._config_data_out_shrt[i].extend(self.config_data_width))

        self._config_read = self.input("config_read", 1)
        self._config_read.add_attribute(ControlSignalAttr(is_control=False))

        self._config_write = self.input("config_write", 1)
        self._config_write.add_attribute(ControlSignalAttr(is_control=False))

        self._config_en = self.input("config_en", self.total_sets)
        self._config_en.add_attribute(ControlSignalAttr(is_control=False))

        self._mem_data_cfg = self.var("mem_data_cfg",
                                      self.data_width,
                                      explicit_array=True,
                                      packed=True)

        self._mem_addr_cfg = self.var("mem_addr_cfg",
                                      kts.clog2(self.mem_depth))

        # Add config...
        stg_cfg_seq = StorageConfigSeq(
            data_width=self.data_width,
            config_addr_width=self.config_addr_width,
            addr_width=kts.clog2(self.mem_depth),
            fetch_width=self.data_width,
            total_sets=self.total_sets,
            sets_per_macro=self.sets_per_macro)

        # The clock to config sequencer needs to be the normal clock or
        # if the tile is off, we bring the clock back in based on config_en
        cfg_seq_clk = self.var("cfg_seq_clk", 1)
        self._cfg_seq_clk = kts.util.clock(cfg_seq_clk)
        self.wire(cfg_seq_clk, kts.util.clock(self._gclk))

        self.add_child(f"config_seq",
                       stg_cfg_seq,
                       clk=self._cfg_seq_clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en | self._config_en.r_or(),
                       config_data_in=self._config_data_in_shrt,
                       config_addr_in=self._config_addr_in,
                       config_wr=self._config_write,
                       config_rd=self._config_read,
                       config_en=self._config_en,
                       wr_data=self._mem_data_cfg,
                       rd_data_out=self._config_data_out_shrt,
                       addr_out=self._mem_addr_cfg)

        if self.interconnect_output_ports == 1:
            self.wire(stg_cfg_seq.ports.rd_data_stg, self._mem_data_out)
        else:
            self.wire(stg_cfg_seq.ports.rd_data_stg[0], self._mem_data_out[0])

        self.RF_GEN = RegisterFile(data_width=self.data_width,
                                   write_ports=self.mem_input_ports,
                                   read_ports=self.mem_output_ports,
                                   width_mult=1,
                                   depth=self.mem_depth,
                                   read_delay=0)

        # Now we can instantiate and wire up the register file
        self.add_child(f"rf",
                       self.RF_GEN,
                       clk=self._gclk,
                       rst_n=self._rst_n,
                       data_out=self._mem_data_out)

        # Opt in for config_write
        self._write_rf = self.var("write_rf", self.mem_input_ports)
        self.wire(
            self._write_rf[0],
            kts.ternary(self._config_en.r_or(), self._config_write,
                        self._write[0]))
        for i in range(self.mem_input_ports - 1):
            self.wire(
                self._write_rf[i + 1],
                kts.ternary(self._config_en.r_or(), kts.const(0, 1),
                            self._write[i + 1]))
        self.wire(self.RF_GEN.ports.wen, self._write_rf)

        # Opt in for config_data_in
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_data_in[i],
                kts.ternary(self._config_en.r_or(), self._mem_data_cfg,
                            self._data_in[i]))
        self.wire(self.RF_GEN.ports.data_in, self._mem_data_in)

        # Opt in for config_addr
        for i in range(self.interconnect_input_ports):
            self.wire(
                self._s_mem_write_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._write_addr[i]))

        self.wire(self.RF_GEN.ports.wr_addr, self._mem_write_addr[0])

        for i in range(self.interconnect_output_ports):
            self.wire(
                self._s_mem_read_addr[i],
                kts.ternary(self._config_en.r_or(), self._mem_addr_cfg,
                            self._read_addr[i]))

        self.wire(self.RF_GEN.ports.rd_addr, self._mem_read_addr[0])

        if self.add_clk_enable:
            # self.clock_en("clk_en")
            kts.passes.auto_insert_clock_enable(self.internal_generator)
            clk_en_port = self.internal_generator.get_port("clk_en")
            clk_en_port.add_attribute(ControlSignalAttr(False))

        if self.add_flush:
            self.add_attribute("sync-reset=flush")
            kts.passes.auto_insert_sync_reset(self.internal_generator)
            flush_port = self.internal_generator.get_port("flush")
            flush_port.add_attribute(ControlSignalAttr(True))

        # Finally, lift the config regs...
        lift_config_reg(self.internal_generator)
示例#2
0
def test_input_addr_basic(banks,
                          interconnect_input_ports,
                          mem_depth=512,
                          data_width=16,
                          fetch_width=32,
                          iterator_support=4,
                          address_width=16,
                          multiwrite=1,
                          num_tiles=1):

    fw_int = int(fetch_width / data_width)

    # Set up model...
    model_iac = InputAddrCtrlModel(
        interconnect_input_ports=interconnect_input_ports,
        mem_depth=mem_depth,
        banks=banks,
        num_tiles=num_tiles,
        iterator_support=iterator_support,
        max_port_schedule=64,
        address_width=address_width,
        data_width=data_width,
        fetch_width=fetch_width)

    new_config = {}
    new_config['address_gen_0_starting_addr'] = 0
    new_config['address_gen_0_dimensionality'] = 3
    new_config['address_gen_0_strides_0'] = 1
    new_config['address_gen_0_strides_1'] = 3
    new_config['address_gen_0_strides_2'] = 9
    new_config['address_gen_0_ranges_0'] = 3
    new_config['address_gen_0_ranges_1'] = 3
    new_config['address_gen_0_ranges_2'] = 3

    new_config['address_gen_1_starting_addr'] = mem_depth
    new_config['address_gen_1_dimensionality'] = 3
    new_config['address_gen_1_strides_0'] = 1
    new_config['address_gen_1_strides_1'] = 3
    new_config['address_gen_1_strides_2'] = 9
    new_config['address_gen_1_ranges_0'] = 3
    new_config['address_gen_1_ranges_1'] = 3
    new_config['address_gen_1_ranges_2'] = 3

    model_iac.set_config(new_config=new_config)
    ###

    # Set up dut...
    dut = InputAddrCtrl(interconnect_input_ports=interconnect_input_ports,
                        mem_depth=mem_depth,
                        banks=banks,
                        num_tiles=num_tiles,
                        iterator_support=iterator_support,
                        address_width=address_width,
                        data_width=16,
                        fetch_width=fetch_width,
                        multiwrite=multiwrite,
                        strg_wr_ports=1,
                        config_width=16)

    lift_config_reg(dut.internal_generator)
    magma_dut = k.util.to_magma(dut,
                                flatten_array=True,
                                check_multiple_driver=False,
                                check_flip_flop_always_ff=False)

    tester = fault.Tester(magma_dut, magma_dut.clk)

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    valid_in = []
    wen_en = []
    for i in range(interconnect_input_ports):
        valid_in.append(0)
        wen_en.append(0)

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1
    for i in range(interconnect_input_ports):
        tester.circuit.wen_en[i] = 1
    tester.step(2)

    rand.seed(0)

    data_in = []
    # Init blank data input
    for i in range(interconnect_input_ports):
        data_in.append([0 for z in range(fw_int)])

    for i in range(1000):
        # Set valid and wen enable
        for j in range(interconnect_input_ports):
            valid_in[j] = rand.randint(0, 1)
            wen_en[j] = rand.randint(0, 1)

        # Deal with data in
        for j in range(interconnect_input_ports):
            for z in range(fw_int):
                data_in[j][z] = rand.randint(0, 2**data_width - 1)

        (wen, data_out, addrs,
         port_out) = model_iac.interact(valid_in, data_in, wen_en)

        for z in range(interconnect_input_ports):
            tester.circuit.valid_in[z] = valid_in[z]
            tester.circuit.wen_en[z] = wen_en[z]

        for z in range(interconnect_input_ports):
            for word in range(fw_int):
                setattr(tester.circuit, f"data_in_{z}_{word}",
                        data_in[z][word])

        tester.eval()

        if (banks == 1):
            tester.circuit.addr_out_0_0.expect(addrs[0])
            tester.circuit.wen_to_sram.expect(wen[0])
        else:
            for z in range(banks):
                getattr(tester.circuit, f"addr_out_{z}_0").expect(addrs[z])
                getattr(tester.circuit, f"wen_to_sram_{z}").expect(wen[z])

        for z in range(banks):
            for word in range(fw_int):
                getattr(tester.circuit,
                        f"data_out_{z}_0_{word}").expect(data_out[z][word])

        for j in range(interconnect_input_ports):
            tester.circuit.port_out[j].expect(port_out[j])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#3
0
    def __init__(
            self,
            data_width=16,  # CGRA Params
            mem_width=64,
            mem_depth=512,
            banks=2,
            input_iterator_support=6,  # Addr Controllers
            output_iterator_support=6,
            interconnect_input_ports=1,  # Connection to int
            interconnect_output_ports=3,
            mem_input_ports=1,
            mem_output_ports=1,
            use_sram_stub=1,
            sram_macro_info=SRAMMacroInfo(),
            read_delay=1,  # Cycle delay in read (SRAM vs Register File)
            rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
            agg_height=4,
            max_agg_schedule=32,
            input_max_port_sched=32,
            output_max_port_sched=32,
            align_input=1,
            max_line_length=128,
            max_tb_height=1,
            tb_range_max=128,
            tb_sched_max=64,
            max_tb_stride=15,
            num_tb=1,
            tb_iterator_support=2,
            multiwrite=1,
            max_prefetch=64,
            config_data_width=16,
            config_addr_width=8,
            num_tiles=2,
            remove_tb=False,
            fifo_mode=False,
            add_clk_enable=False,
            add_flush=False):
        super().__init__("LakeChain", debug=True)

        fw_int = int(mem_width / data_width)
        data_words_per_set = 2**config_addr_width
        sets = int((fw_int * mem_depth) / data_words_per_set)

        sets_per_macro = max(1, int(mem_depth / data_words_per_set))
        total_sets = max(1, banks * sets_per_macro)

        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._data_in = self.input("data_in",
                                   data_width,
                                   size=interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)
        self._addr_in = self.input("addr_in",
                                   data_width,
                                   size=interconnect_input_ports,
                                   packed=True,
                                   explicit_array=True)

        self._wen = self.input("wen", interconnect_input_ports)
        self._ren = self.input("ren", interconnect_output_ports)

        self._config_data_in = self.input("config_data_in", config_data_width)

        self._config_addr_in = self.input("config_addr_in", config_addr_width)

        self._config_data_out = self.output("config_data_out",
                                            config_data_width,
                                            size=(num_tiles, total_sets),
                                            explicit_array=True,
                                            packed=True)

        self._config_read = self.input("config_read", 1)
        self._config_write = self.input("config_write", 1)
        self._config_en = self.input("config_en", total_sets)

        self._data_out = self.output("data_out",
                                     data_width,
                                     size=(num_tiles,
                                           interconnect_output_ports),
                                     packed=True,
                                     explicit_array=True)

        self._data_out_inter = self.var("data_out_inter",
                                        data_width,
                                        size=(num_tiles,
                                              interconnect_output_ports),
                                        packed=True,
                                        explicit_array=True)

        self._valid_out = self.output("valid_out",
                                      interconnect_output_ports,
                                      size=num_tiles,
                                      packed=True,
                                      explicit_array=True)

        self._valid_out_inter = self.var("valid_out_inter",
                                         interconnect_output_ports,
                                         size=num_tiles,
                                         packed=True,
                                         explicit_array=True)

        self._enable_chain_output = self.input("enable_chain_output", 1)

        self._chain_data_out = self.output("chain_data_out",
                                           data_width,
                                           size=interconnect_output_ports,
                                           packed=True,
                                           explicit_array=True)

        self._chain_valid_out = self.output("chain_valid_out",
                                            interconnect_output_ports)

        self._tile_output_en = self.var("tile_output_en",
                                        1,
                                        size=(num_tiles,
                                              interconnect_output_ports),
                                        packed=True,
                                        explicit_array=True)

        self.is_valid_ = self.var("is_valid",
                                  1,
                                  size=interconnect_output_ports,
                                  packed=True,
                                  explicit_array=True)

        self.valids = self.var("valids",
                               clog2(num_tiles),
                               size=interconnect_output_ports,
                               packed=True,
                               explicit_array=True)

        for i in range(num_tiles):
            tile = LakeTop(data_width=data_width,
                           mem_width=mem_width,
                           mem_depth=mem_depth,
                           banks=banks,
                           input_iterator_support=input_iterator_support,
                           output_iterator_support=output_iterator_support,
                           interconnect_input_ports=interconnect_input_ports,
                           interconnect_output_ports=interconnect_output_ports,
                           mem_input_ports=mem_input_ports,
                           mem_output_ports=mem_output_ports,
                           use_sram_stub=use_sram_stub,
                           sram_macro_info=sram_macro_info,
                           read_delay=read_delay,
                           rw_same_cycle=rw_same_cycle,
                           agg_height=agg_height,
                           max_agg_schedule=max_agg_schedule,
                           input_max_port_sched=input_max_port_sched,
                           output_max_port_sched=output_max_port_sched,
                           align_input=align_input,
                           max_line_length=max_line_length,
                           max_tb_height=max_tb_height,
                           tb_range_max=tb_range_max,
                           tb_sched_max=tb_sched_max,
                           max_tb_stride=max_tb_stride,
                           num_tb=num_tb,
                           tb_iterator_support=tb_iterator_support,
                           multiwrite=multiwrite,
                           max_prefetch=max_prefetch,
                           config_data_width=config_data_width,
                           config_addr_width=config_addr_width,
                           num_tiles=num_tiles,
                           remove_tb=remove_tb,
                           fifo_mode=fifo_mode,
                           add_clk_enable=add_clk_enable,
                           add_flush=add_flush)

            self.add_child(
                f"tile_{i}",
                tile,
                clk=self._clk,
                rst_n=self._rst_n,
                enable_chain_output=self._enable_chain_output,
                # tile index
                chain_idx_input=i,
                chain_idx_output=0,
                tile_output_en=self._tile_output_en[i],
                # broadcast input data to all tiles
                data_in=self._data_in,
                addr_in=self._addr_in,
                wen=self._wen,
                ren=self._ren,
                config_data_in=self._config_data_in,
                config_addr_in=self._config_addr_in,
                config_data_out=self._config_data_out[i],
                config_read=self._config_read,
                config_write=self._config_write,
                config_en=self._config_en,
                # used if output chaining not enabled
                data_out=self._data_out_inter[i],
                valid_out=self._valid_out_inter[i],
                # unused currently?
                tile_en=1,
                # UB mode
                mode=0)

        self.add_code(self.set_data_out)
        self.add_code(self.set_valid_out)
        self.add_code(self.set_chain_outputs)

        # config regs
        lift_config_reg(self.internal_generator)
示例#4
0
def test_agg_formal():

    agg_dut = AggFormal(
        data_width=16,  # CGRA Params
        mem_width=64,
        mem_depth=512,
        banks=1,
        input_addr_iterator_support=6,
        output_addr_iterator_support=6,
        input_sched_iterator_support=6,
        output_sched_iterator_support=6,
        config_width=16,
        interconnect_input_ports=1,  # Connection to int
        interconnect_output_ports=1,
        mem_input_ports=1,
        mem_output_ports=1,
        read_delay=1,  # Cycle delay in read (SRAM vs Register File)
        rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
        agg_height=4)

    lift_config_reg(agg_dut.internal_generator)

    magma_dut = k.util.to_magma(agg_dut,
                                flatten_array=True,
                                check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)

    config = {}
    config["agg_write_sched_gen_0_sched_addr_gen_starting_addr"] = 0
    config["agg_write_addr_gen_0_strides_0"] = 54657
    config["agg_write_addr_gen_0_strides_1"] = 0
    config["agg_write_addr_gen_0_strides_2"] = 0
    config["agg_write_addr_gen_0_strides_3"] = 0
    config["agg_write_addr_gen_0_strides_4"] = 0
    config["agg_write_addr_gen_0_strides_5"] = 0
    config["agg_read_addr_gen_0_strides_0"] = 65032
    config["agg_read_addr_gen_0_strides_1"] = 0
    config["agg_read_addr_gen_0_strides_2"] = 0
    config["agg_read_addr_gen_0_strides_3"] = 0
    config["agg_read_addr_gen_0_strides_4"] = 0
    config["agg_read_addr_gen_0_strides_5"] = 0
    config["agg_write_loops_0_ranges_0"] = 2
    config["agg_write_loops_0_ranges_1"] = 0
    config["agg_write_loops_0_ranges_2"] = 0
    config["agg_write_loops_0_ranges_3"] = 0
    config["agg_write_loops_0_ranges_4"] = 0
    config["agg_write_loops_0_ranges_5"] = 0
    config["agg_write_loops_0_dimensionality"] = 1
    config["agg_write_addr_gen_0_starting_addr"] = 0
    config["agg_read_loops_0_ranges_0"] = 0
    config["agg_read_loops_0_ranges_1"] = 0
    config["agg_read_loops_0_ranges_2"] = 0
    config["agg_read_loops_0_ranges_3"] = 0
    config["agg_read_loops_0_ranges_4"] = 0
    config["agg_read_loops_0_ranges_5"] = 0
    config["agg_read_output_sched_gen_sched_addr_gen_starting_addr"] = 4
    config["agg_read_output_sched_gen_sched_addr_gen_strides_0"] = 4
    config["agg_read_output_sched_gen_sched_addr_gen_strides_1"] = 0
    config["agg_read_output_sched_gen_sched_addr_gen_strides_2"] = 0
    config["agg_read_output_sched_gen_sched_addr_gen_strides_3"] = 0
    config["agg_read_output_sched_gen_sched_addr_gen_strides_4"] = 0
    config["agg_read_output_sched_gen_sched_addr_gen_strides_5"] = 0
    config["agg_read_addr_gen_0_starting_addr"] = 0
    config["agg_write_sched_gen_0_sched_addr_gen_strides_0"] = 1

    # configuration registers passed through from top level
    for key, value in config.items():
        setattr(tester.circuit, key, value)

    tester.circuit.clk = 0
    tester.circuit.rst_n = 1
    tester.step(2)
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1

    rand.seed(0)

    num_iters = 500
    data_in = 0
    for i in range(num_iters):

        tester.circuit.data_in = data_in

        tester.eval()

        data_in = data_in + 1

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tempdir = "dump_agg_formal"
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal", "--trace"],
                               disp_type="realtime")
示例#5
0
        self.add_code(self.tb_ctrl)
        for idx in range(self.interconnect_output_ports):
            self.add_code(self.tb_to_out, idx=idx)

    @always_ff((posedge, "clk"))
    def tb_ctrl(self):
        if self._read:
            self._tb[self._output_port_sel_addr][self._tb_write_addr[self._output_port_sel_addr][1, 0]] = \
                self._data_in

    @always_comb
    def tb_to_out(self, idx):
        self._data_out[idx] = self._tb[idx][self._tb_read_addr[idx][3, 2]][
            self._tb_read_addr[idx][1, 0]]

    @always_ff((posedge, "clk"), (negedge, "rst_n"))
    def increment_cycle_count(self):
        if ~self._rst_n:
            self._cycle_count = 0
        else:
            self._cycle_count = self._cycle_count + 1


if __name__ == "__main__":
    tb_dut = TBFormal()

    lift_config_reg(tb_dut.internal_generator)
    extract_formal_annotation(tb_dut, 'tb_formal_annotation.txt')

    verilog(tb_dut, filename="tb_formal.sv", optimize_if=False)
示例#6
0
def test_app_ctrl(sprt_stcl_valid,
                  int_in_ports=1,
                  int_out_ports=3,
                  depth_width=16,
                  stcl_cnt_width=16,
                  stcl_iter_support=4):

    # Set up model..
    model_ac = AppCtrlModel(int_in_ports=int_in_ports,
                            int_out_ports=int_out_ports,
                            sprt_stcl_valid=sprt_stcl_valid,
                            stcl_iter_support=stcl_iter_support)

    new_config = {}
    new_config['input_port_0'] = 0
    new_config['input_port_1'] = 0
    new_config['input_port_2'] = 0
    new_config['read_depth_0'] = 196
    new_config['read_depth_1'] = 196
    new_config['read_depth_2'] = 196
    new_config['write_depth_0'] = 196

    for i in range(stcl_iter_support):
        new_config[f'ranges_{i}'] = 4
        new_config[f'threshold_{i}'] = 4

    rand.seed(0)

    prefill = []
    for i in range(int_out_ports):
        prefill_num = rand.randint(0, 1)
        new_config[f'prefill_{i}'] = prefill_num
        prefill.append(prefill_num)

    model_ac.set_config(new_config=new_config)

    # Set up dut...
    dut = AppCtrl(interconnect_input_ports=int_in_ports,
                  interconnect_output_ports=int_out_ports,
                  depth_width=depth_width,
                  sprt_stcl_valid=sprt_stcl_valid,
                  stcl_cnt_width=stcl_cnt_width,
                  stcl_iter_support=stcl_iter_support)

    lift_config_reg(dut.internal_generator)

    magma_dut = kts.util.to_magma(dut, flatten_array=True,
                                  check_multiple_driver=False,
                                  check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)
    ###
    tester.zero_inputs()

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    tester.circuit.write_depth = 196

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.step(2)
    # Seed for posterity

    wen_in = [0] * int_in_ports
    ren_in = [0] * int_out_ports
    tb_valid = [0] * int_out_ports
    ren_update = [0] * int_out_ports

    for i in range(300):
        # Gen random data
        for j in range(int_in_ports):
            wen_in[j] = rand.randint(0, 1)
        ren_in_tmp = rand.randint(0, 1)
        for j in range(int_out_ports):
            tb_valid[j] = rand.randint(0, 1)
            ren_in[j] = ren_in_tmp
            ren_update[j] = rand.randint(0, 1)

        # Apply stimulus to dut
        for j in range(int_in_ports):
            tester.circuit.wen_in[j] = wen_in[j]

        for j in range(int_out_ports):
            tester.circuit.ren_in[j] = ren_in[j]
            tester.circuit.tb_valid[j] = tb_valid[j]
            tester.circuit.ren_update[j] = ren_update[j]
            tester.circuit.prefill[j] = prefill[j]

        # Interact w/ model
        (wen_out,
         ren_out,
         valid_out_data,
         valid_out_stencil) = model_ac.interact(wen_in=wen_in,
                                                ren_in=ren_in,
                                                tb_valid=tb_valid,
                                                ren_update=ren_update)

        tester.eval()

        for j in range(int_in_ports):
            tester.circuit.wen_out[j].expect(wen_out[j])

        for j in range(int_out_ports):
            # tester.circuit.ren_out[j].expect(ren_out[j])
            tester.circuit.valid_out_data[j].expect(valid_out_data[j])
            tester.circuit.valid_out_stencil[j].expect(valid_out_stencil[j])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#7
0
                self._agg[idx][0][self._agg_write_addr[idx][clog2(self.fetch_width) - 1, 0]]\
                    = self._data_in[idx]
            else:
                self._agg[idx][self._agg_write_addr[idx]
                               [self._agg_write_addr[0].width - 1, clog2(self.fetch_width)]]\
                    [self._agg_write_addr[idx][clog2(self.fetch_width) - 1, 0]]\
                    = self._data_in[idx]

    @always_comb
    def agg_to_sram(self):
        for i in range(self.fetch_width):
            self._data_out[i] = \
                self._agg[self._input_port_sel_addr][self._agg_read_addr[self._input_port_sel_addr]][i]

    @always_ff((posedge, "clk"), (negedge, "rst_n"))
    def increment_cycle_count(self):
        if ~self._rst_n:
            self._cycle_count = 0
        else:
            self._cycle_count = self._cycle_count + 1


if __name__ == "__main__":
    lake_dut = AggFormal()

    lift_config_reg(lake_dut.internal_generator)
    extract_formal_annotation(lake_dut, "agg_formal_annotation.txt")

    verilog(lake_dut, filename="agg_formal.sv",
            optimize_if=False)
示例#8
0
    def __init__(self,
                 word_width,
                 input_ports,
                 output_ports,
                 memories,
                 edges):

        super().__init__("LakeTop", debug=True)

        # parameters
        self.word_width = word_width
        self.input_ports = input_ports
        self.output_ports = output_ports

        self.default_config_width = 16
        self.cycle_count_width = 16

        self.stencil_valid = False

        # objects
        self.memories = memories
        self.edges = edges

        # tile enable and clock
        self.tile_en = self.input("tile_en", 1)
        self.tile_en.add_attribute(ConfigRegAttr("Tile logic enable manifested as clock gate"))
        self.tile_en.add_attribute(FormalAttr(self.tile_en.name, FormalSignalConstraint.SET1))

        self.clk_mem = self.clock("clk")
        self.clk_mem.add_attribute(FormalAttr(self.clk_mem.name, FormalSignalConstraint.CLK))

        # chaining
        chain_supported = False
        for mem in self.memories.keys():
            if self.memories[mem]["chaining"]:
                chain_supported = True
                break

        if chain_supported:
            self.chain_en = self.input("chain_en", 1)
            self.chain_en.add_attribute(ConfigRegAttr("Chaining enable"))
            self.chain_en.add_attribute(FormalAttr(self.chain_en.name, FormalSignalConstraint.SET0))
        else:
            self.chain_en = self.var("chain_en", 1)
            self.wire(self.chain_en, 0)

        # gate clock with tile_en
        gclk = self.var("gclk", 1)
        self.gclk = kts.util.clock(gclk)
        self.wire(gclk, self.clk_mem & self.tile_en)

        self.clk_en = self.clock_en("clk_en", 1)

        # active low asynchornous reset
        self.rst_n = self.reset("rst_n", 1)
        self.rst_n.add_attribute(FormalAttr(self.rst_n.name, FormalSignalConstraint.RSTN))

        # data in and out of top level Lake memory object
        self.data_in = self.input("data_in",
                                  width=self.word_width,
                                  size=self.input_ports,
                                  explicit_array=True,
                                  packed=True)
        self.data_in.add_attribute(FormalAttr(self.data_in.name, FormalSignalConstraint.SEQUENCE))

        self.data_out = self.output("data_out",
                                    width=self.word_width,
                                    size=self.output_ports,
                                    explicit_array=True,
                                    packed=True)
        self.data_out.add_attribute(FormalAttr(self.data_out.name, FormalSignalConstraint.SEQUENCE))

        # global cycle count for accessor comparison
        self._cycle_count = self.var("cycle_count", 16)

        @always_ff((posedge, self.gclk), (negedge, "rst_n"))
        def increment_cycle_count(self):
            if ~self.rst_n:
                self._cycle_count = 0
            else:
                self._cycle_count = self._cycle_count + 1

        self.add_always(increment_cycle_count)

        # info about memories
        num_mem = len(memories)
        subscript_mems = list(self.memories.keys())

        # list of the data out from each memory
        self.mem_data_outs = [self.var(f"mem_data_out_{subscript_mems[i]}",
                                       width=self.word_width,
                                       size=self.memories[subscript_mems[i]]
                                       ["read_port_width" if "read_port_width" in self.memories[subscript_mems[i]]
                                        else "read_write_port_width"],
                                       explicit_array=True, packed=True) for i in range(num_mem)]

        # keep track of write, read_addr, and write_addr vars for read/write memories
        # to later check whether there is a write and what to use for the shared port
        self.mem_read_write_addrs = {}

        # create memory instance for each memory
        self.mem_insts = {}
        i = 0
        for mem in self.memories.keys():
            m = mem_inst(self.memories[mem], self.word_width)
            self.mem_insts[mem] = m

            self.add_child(mem,
                           m,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           # put data out in memory data out list
                           data_out=self.mem_data_outs[i],
                           chain_en=self.chain_en)
            i += 1

        # get input and output memories
        is_input, is_output = [], []
        for mem_name in self.memories.keys():
            mem = self.memories[mem_name]
            if mem["is_input"]:
                is_input.append(mem_name)
            if mem["is_output"]:
                is_output.append(mem_name)

        # TODO direct connection to write doesn't work (?), so have to do this...
        self.low = self.var("low", 1)
        self.wire(self.low, 0)

        # TODO adding multiple ports to 1 memory after talking about mux with compiler team

        # set up input memories
        for i in range(len(is_input)):
            in_mem = is_input[i]

            # input addressor / accessor parameters
            input_dim = self.memories[in_mem]["input_edge_params"]["dim"]
            input_range = self.memories[in_mem]["input_edge_params"]["max_range"]
            input_stride = self.memories[in_mem]["input_edge_params"]["max_stride"]
            # input port associated with memory
            input_port_index = self.memories[in_mem]["input_port"]

            self.valid = self.var(
                f"input_port{input_port_index}_2{in_mem}_accessor_valid", 1)
            self.wire(self.mem_insts[in_mem].ports.write, self.valid)

            # hook up data from the specified input port to the memory
            safe_wire(self, self.mem_insts[in_mem].ports.data_in[0],
                      self.data_in[input_port_index])

            if self.memories[in_mem]["num_read_write_ports"] > 0:
                self.mem_read_write_addrs[in_mem] = {"write": self.valid}

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for writes to this input memory
            forloop = ForLoop(iterator_support=input_dim,
                              config_width=max(1, clog2(input_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"input_port{input_port_index}_2{in_mem}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=input_dim,
                            config_width=max(1, clog2(input_stride)))  # self.default_config_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[in_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[in_mem].ports.write_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["write_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=input_dim,
                             config_width=self.cycle_count_width)
            self.add_child(f"input_port{input_port_index}_2{in_mem}_write_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # set up output memories
        for i in range(len(is_output)):
            out_mem = is_output[i]

            # output addressor / accessor parameters
            output_dim = self.memories[out_mem]["output_edge_params"]["dim"]
            output_range = self.memories[out_mem]["output_edge_params"]["max_range"]
            output_stride = self.memories[out_mem]["output_edge_params"]["max_stride"]
            # output port associated with memory
            output_port_index = self.memories[out_mem]["output_port"]

            # hook up data from the memory to the specified output port
            self.wire(self.data_out[output_port_index],
                      self.mem_insts[out_mem].ports.data_out[0][0])
            # self.mem_data_outs[subscript_mems.index(out_mem)][0])

            self.valid = self.var(f"{out_mem}2output_port{output_port_index}_accessor_valid", 1)
            if self.memories[out_mem]["rw_same_cycle"]:
                self.wire(self.mem_insts[out_mem].ports.read, self.valid)

            # create IteratorDomain, AddressGenerator, and ScheduleGenerator
            # for reads from this output memory
            forloop = ForLoop(iterator_support=output_dim,
                              config_width=max(1, clog2(output_range)))  # self.default_config_width)
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(f"{out_mem}2output_port{output_port_index}_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            newAG = AddrGen(iterator_support=output_dim,
                            config_width=max(1, clog2(output_stride)))  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_addr_gen",
                           newAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            if self.memories[out_mem]["num_read_write_ports"] == 0:
                safe_wire(self, self.mem_insts[out_mem].ports.read_addr[0], newAG.ports.addr_out)
            else:
                self.mem_read_write_addrs[in_mem]["read_addr"] = newAG.ports.addr_out

            newSG = SchedGen(iterator_support=output_dim,
                             config_width=self.cycle_count_width)  # self.default_config_width)
            self.add_child(f"{out_mem}2output_port{output_port_index}_read_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # create shared IteratorDomains and accessors as well as
        # read/write addressors for memories connected by each edge
        for edge in self.edges:

            # see how many signals need to be selected between for
            # from and to signals for edge
            num_mux_from = len(edge["from_signal"])
            num_mux_to = len(edge["to_signal"])

            # get unique edge_name identifier for hardware modules
            edge_name = get_edge_name(edge)

            # create forloop and accessor valid output signal
            self.valid = self.var(edge_name + "_accessor_valid", 1)

            forloop = ForLoop(iterator_support=edge["dim"])
            self.forloop = forloop
            loop_itr = forloop.get_iter()
            loop_wth = forloop.get_cfg_width()

            self.add_child(edge_name + "_forloop",
                           forloop,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid)

            # create input addressor
            readAG = AddrGen(iterator_support=edge["dim"],
                             config_width=self.default_config_width)
            self.add_child(f"{edge_name}_read_addr_gen",
                           readAG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           step=self.valid,
                           mux_sel=forloop.ports.mux_sel_out,
                           restart=forloop.ports.restart)

            # assign read address to all from memories
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                # can assign same read addrs to all the memories
                for i in range(len(edge["from_signal"])):
                    safe_wire(self, self.mem_insts[edge["from_signal"][i]].ports.read_addr[0], readAG.ports.addr_out)
            else:
                for i in range(len(edge["from_signal"])):
                    self.mem_read_write_addrs[edge["from_signal"][i]]["read_addr"] = readAG.ports.addr_out

            # if needing to mux, choose which from memory we get data
            # from for to memory data in
            if num_mux_from > 1:
                num_mux_bits = clog2(num_mux_from)
                self.mux_sel = self.var(f"{edge_name}_mux_sel",
                                        width=num_mux_bits)

                read_addr_width = max(1, clog2(self.memories[edge["from_signal"][0]]["capacity"]))
                # decide which memory to get data from for to memory's data in
                safe_wire(self, self.mux_sel,
                          readAG.ports.addr_out[read_addr_width + num_mux_from - 1, read_addr_width])

                comb_mux_from = self.combinational()
                # for i in range(num_mux_from):
                # TODO want to use a switch statement here, but get add_fn_ln issue
                if_mux_sel = IfStmt(self.mux_sel == 0)
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][i])
                    if_mux_sel.then_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][0]].ports.data_out))
                    if_mux_sel.else_(self.mem_insts[edge["to_signal"][j]].ports.data_in.assign(self.mem_insts[edge["from_signal"][1]].ports.data_out))
                comb_mux_from.add_stmt(if_mux_sel)

            # no muxing from, data_out from the one and only memory
            # goes to all to memories (valid determines whether it is
            # actually written)
            else:
                for j in range(len(edge["to_signal"])):
                    # print("TO ", edge["to_signal"][j])
                    # print("FROM ", edge["from_signal"][0])
                    safe_wire(self,
                              self.mem_insts[edge["to_signal"][j]].ports.data_in,
                              # only one memory to read from
                              self.mem_insts[edge["from_signal"][0]].ports.data_out)

            # create output addressor
            writeAG = AddrGen(iterator_support=edge["dim"],
                              config_width=self.default_config_width)
            # step, mux_sel, restart may need delayed signals (assigned later)
            self.add_child(f"{edge_name}_write_addr_gen",
                           writeAG,
                           clk=self.gclk,
                           rst_n=self.rst_n)

            # set write addr for to memories
            if self.memories[edge["to_signal"][0]]["num_read_write_ports"] == 0:
                for i in range(len(edge["to_signal"])):
                    safe_wire(self, self.mem_insts[edge["to_signal"][i]].ports.write_addr[0], writeAG.ports.addr_out)
            else:
                for i in range(len(edge["to_signal"])):
                    self.mem_read_write_addrs[edge["to_signal"][i]] = {"write": self.valid, "write_addr": writeAG.ports.addr_out}

            # calculate necessary delay between from_signal to to_signal
            # TODO this may need to be more sophisticated and based on II as well
            # TODO just need to add for loops for all the ports
            if self.memories[edge["from_signal"][0]]["num_read_write_ports"] == 0:
                self.delay = self.memories[edge["from_signal"][0]]["read_info"][0]["latency"]
            else:
                self.delay = self.memories[edge["from_signal"][0]]["read_write_info"][0]["latency"]

            if self.delay > 0:
                # signals that need to be delayed due to edge latency
                self.delayed_writes = self.var(f"{edge_name}_delayed_writes",
                                               width=self.delay)
                self.delayed_mux_sels = self.var(f"{edge_name}_delayed_mux_sels",
                                                 width=self.forloop.ports.mux_sel_out.width,
                                                 size=self.delay,
                                                 explicit_array=True,
                                                 packed=True)
                self.delayed_restarts = self.var(f"{edge_name}_delayed_restarts",
                                                 width=self.delay)

                # delay in valid between read from memory and write to next memory
                @always_ff((posedge, self.gclk), (negedge, "rst_n"))
                def get_delayed_write(self):
                    if ~self.rst_n:
                        self.delayed_writes = 0
                        self.delayed_mux_sels = 0
                        self.delayed_restarts = 0
                    else:
                        for i in range(self.delay - 1):
                            self.delayed_writes[i + 1] = self.delayed_writes[i]
                            self.delayed_mux_sels[i + 1] = self.delayed_mux_sels[i]
                            self.delayed_restarts[i + 1] = self.delayed_restarts[i]
                        self.delayed_writes[0] = self.valid
                        self.delayed_mux_sels[0] = self.forloop.ports.mux_sel_out
                        self.delayed_restarts[0] = self.forloop.ports.restart

                self.add_always(get_delayed_write)

            # if we have a mux for the destination memories,
            # choose which mux to write to
            if num_mux_to > 1:
                num_mux_bits = clog2(num_mux_to)
                self.mux_sel_to = self.var(f"{edge_name}_mux_sel_to",
                                           width=num_mux_bits)

                write_addr_width = max(1, clog2(self.memories[edge["to_signal"][0]]["capacity"]))
                # decide which destination memory gets written to
                safe_wire(self, self.mux_sel_to,
                          writeAG.ports.addr_out[write_addr_width + num_mux_to - 1, write_addr_width])

                # wire the write (or if needed, delayed write) signal to the selected destination memory
                # and set write enable low for all other destination memories
                comb_mux_to = self.combinational()
                for i in range(num_mux_to):
                    if_mux_sel_to = IfStmt(self.mux_sel_to == i)
                    if self.delay == 0:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.valid))
                    else:
                        if_mux_sel_to.then_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.delayed_writes[self.delay - 1]))

                    if_mux_sel_to.else_(self.mem_insts[edge["to_signal"][i]].ports.write.assign(self.low))
                    comb_mux_to.add_stmt(if_mux_sel_to)

            # no muxing to, just write to the one destination memory
            else:
                if self.delay == 0:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.valid)
                else:
                    self.wire(self.mem_insts[edge["to_signal"][0]].ports.write, self.delayed_writes[self.delay - 1])

            # assign delayed signals for write addressor if needed
            if self.delay == 0:
                self.wire(writeAG.ports.step, self.valid)
                self.wire(writeAG.ports.mux_sel, self.forloop.ports.mux_sel_out)
                self.wire(writeAG.ports.restart, self.forloop.ports.restart)
            else:
                self.wire(writeAG.ports.step, self.delayed_writes[self.delay - 1])
                self.wire(writeAG.ports.mux_sel, self.delayed_mux_sels[self.delay - 1])
                self.wire(writeAG.ports.restart, self.delayed_restarts[self.delay - 1])

            # create accessor for edge
            newSG = SchedGen(iterator_support=edge["dim"],
                             config_width=self.cycle_count_width)  # self.default_config_width)

            self.add_child(edge_name + "_sched_gen",
                           newSG,
                           clk=self.gclk,
                           rst_n=self.rst_n,
                           mux_sel=forloop.ports.mux_sel_out,
                           finished=forloop.ports.restart,
                           cycle_count=self._cycle_count,
                           valid_output=self.valid)

        # for read write memories, choose either read or write address based on whether
        # we are writing to the memory (whether write enable is high)
        read_write_addr_comb = self.combinational()
        for mem_name in self.memories:
            if mem_name in self.mem_read_write_addrs:
                mem_info = self.mem_read_write_addrs[mem_name]
                if_write = IfStmt(mem_info["write"] == 1)
                addr_width = self.mem_insts[mem_name].ports.read_write_addr[0].width
                if_write.then_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["write_addr"][addr_width - 1, 0]))
                if_write.else_(self.mem_insts[mem_name].ports.read_write_addr[0].assign(mem_info["read_addr"][addr_width - 1, 0]))
                read_write_addr_comb.add_stmt(if_write)

        # clock enable and flush passes
        kts.passes.auto_insert_clock_enable(self.internal_generator)
        clk_en_port = self.internal_generator.get_port("clk_en")
        clk_en_port.add_attribute(FormalAttr(clk_en_port.name, FormalSignalConstraint.SET1))

        self.add_attribute("sync-reset=flush")
        kts.passes.auto_insert_sync_reset(self.internal_generator)
        flush_port = self.internal_generator.get_port("flush")

        # bring config registers up to top level
        lift_config_reg(self.internal_generator)
示例#9
0
def test_prefetcher_basic(input_latency=10,
                          max_prefetch=64,
                          fetch_width=32,
                          data_width=16):

    assert input_latency < max_prefetch, "Input latency must be smaller than fifo"

    fw_int = int(fetch_width / data_width)

    # Set up model..
    model_pf = PrefetcherModel(fetch_width=fetch_width,
                               data_width=data_width,
                               max_prefetch=max_prefetch)
    new_config = {}
    new_config['input_latency'] = input_latency

    model_pf.set_config(new_config=new_config)
    ###

    # Set up dut...
    dut = Prefetcher(fetch_width=fetch_width,
                     data_width=data_width,
                     max_prefetch=max_prefetch)
    lift_config_reg(dut.internal_generator)
    magma_dut = k.util.to_magma(dut,
                                flatten_array=True,
                                check_multiple_driver=False,
                                check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)
    ###

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0
    tester.circuit.data_in = 0
    tester.circuit.valid_read = 0
    tester.circuit.tba_rdy_in = 0
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.step(2)
    # Seed for posterity
    rand.seed(0)

    data_in = [0 for i in range(fw_int)]

    for i in range(1000):
        # Gen random data
        print(i)
        for j in range(fw_int):
            data_in[j] = rand.randint(0, 2**data_width - 1)
        tba_rdy_in = rand.randint(0, 1)
        valid_read = rand.randint(0, 1)
        mem_valid_data = rand.randint(0, 1)

        (model_d, model_v, model_stp, model_mem_valid) = \
            model_pf.interact(data_in, valid_read, tba_rdy_in, mem_valid_data)

        for j in range(fw_int):
            setattr(tester.circuit, f"data_in_{j}", data_in[j])
        tester.circuit.valid_read = valid_read
        tester.circuit.tba_rdy_in = tba_rdy_in

        tester.eval()

        # Check the step
        tester.circuit.prefetch_step.expect(model_stp)
        tester.circuit.valid_out.expect(model_v)
        if (model_v):
            for j in range(fw_int):
                getattr(tester.circuit, f"data_out_{j}").expect(model_d[j])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#10
0
def test_sync_groups(int_out_ports,
                     fetch_width=32,
                     data_width=16):

    fw_int = int(fetch_width / data_width)

    # Set up model..
    model_sg = SyncGroupsModel(fetch_width=fetch_width,
                               data_width=data_width,
                               int_out_ports=int_out_ports)

    rand.seed(0)
    group_choice = rand.randint(0, 1)

    new_config = {}
    if int_out_ports == 1:
        new_config['sync_group'] = 1
        new_config['sync_group_0'] = 1
    elif group_choice == 1:
        new_config['sync_group_0'] = 1
        new_config['sync_group_1'] = 1
        new_config['sync_group_2'] = 1
    else:
        new_config['sync_group_0'] = 1
        new_config['sync_group_1'] = 1
        new_config['sync_group_2'] = 2

    model_sg.set_config(new_config=new_config)
    ###

    # Set up dut...
    dut = SyncGroups(fetch_width=fetch_width,
                     data_width=data_width,
                     int_out_ports=int_out_ports)

    lift_config_reg(dut.internal_generator)
    magma_dut = kts.util.to_magma(dut, flatten_array=True,
                                  check_multiple_driver=False,
                                  check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)
    ###

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.step(2)
    # Seed for posterity

    data_in = []
    for i in range(int_out_ports):
        row = []
        for j in range(fw_int):
            row.append(0)
        data_in.append(row)

    for i in range(1000):
        # Gen random data
        ack_in = rand.randint(0, 2 ** int_out_ports - 1)
        ren_in = []
        valid_in = []
        mem_valid_data = []
        for j in range(int_out_ports):
            ren_in.append(rand.randint(0, 1))
            valid_in.append(rand.randint(0, 1))
            mem_valid_data.append(rand.randint(0, 1))
            for k in range(fw_int):
                data_in[j][k] = rand.randint(0, 2 ** data_width - 1)

        # Apply stimulus to dut
        tester.circuit.ack_in = ack_in
        for j in range(int_out_ports):
            tester.circuit.ren_in[j] = ren_in[j]
            tester.circuit.valid_in[j] = valid_in[j]
            tester.circuit.mem_valid_data[j] = mem_valid_data[j]

        for j in range(int_out_ports):
            for k in range(fw_int):
                setattr(tester.circuit, f"data_in_{j}_{k}", data_in[j][k])

        # Interact w/ model
        (model_do, model_vo, model_rd_sync, model_mem_valid) = \
            model_sg.interact(ack_in, data_in, valid_in, ren_in, mem_valid_data)

        tester.eval()

        for j in range(int_out_ports):
            for k in range(fw_int):
                getattr(tester.circuit, f"data_out_{j}_{k}").expect(model_do[j][k])

        for j in range(int_out_ports):
            tester.circuit.valid_out[j].expect(model_vo[j])
            tester.circuit.rd_sync_gate[j].expect(model_rd_sync[j])
            tester.circuit.mem_valid_data_out[j].expect(model_mem_valid[j])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#11
0
    def __init__(self,
                 data_width=16,
                 fetch_width=1,
                 mem_depth=512,
                 config_width=16,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6
                 ):

        super().__init__("lake_top_test")

        # generation parameters

        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._clk_en = self.input("clk_en", 1)
        self._flush = self.input("flush", 1)

        self._data_in = self.input("data_in", data_width, packed=True)

        # outputs
        self._data_out = self.output("data_out", data_width, packed=True)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._write_addr = self.var("write_addr", config_width)
        self._read_addr = self.var("read_addr", config_width)
        self._addr = self.var("addr", clog2(mem_depth))

        # memory module
        self.add_child(f"sram",
                       SRAMStub(data_width,
                                fetch_width,
                                mem_depth),
                       clk=self._clk,
                       wen=self._write,
                       cen=self._write | self._read,
                       addr=self._addr,
                       data_in=self._data_in,
                       data_out=self._data_out)

        # addressor modules
        self.add_child(f"input_addr_gen",
                       AddrGen(input_addr_iterator_support,
                               config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"output_addr_gen",
                       AddrGen(output_addr_iterator_support,
                               config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        # scheduler modules
        self.add_child(f"input_sched_gen",
                       SchedGen(input_sched_iterator_support,
                                config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._write)

        self.add_child(f"output_sched_gen",
                       SchedGen(output_sched_iterator_support,
                                config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._read)

        lift_config_reg(self.internal_generator)

        self.add_code(self.set_sram_addr)
示例#12
0
def test_tba(word_width=16,
             fetch_width=4,
             num_tb=1,
             tb_height=1,
             max_range=5,
             max_range_inner=5):

    model_tba = TBAModel(word_width,
                         fetch_width,
                         num_tb,
                         tb_height,
                         max_range,
                         max_range_inner)

    new_config = {}
    new_config["range_outer"] = 5
    new_config["range_inner"] = 3
    new_config["stride"] = 2
    new_config["indices"] = [0, 1, 2]
    new_config["dimensionality"] = 2
    new_config["tb_height"] = 1
    new_config["starting_addr"] = 0

    model_tba.set_config(new_config=new_config)

    dut = TransposeBufferAggregation(word_width,
                                     fetch_width,
                                     num_tb,
                                     tb_height,
                                     max_range,
                                     max_range_inner,
                                     max_stride=5,
                                     tb_iterator_support=2)

    lift_config_reg(dut.internal_generator)

    magma_dut = k.util.to_magma(dut, flatten_array=True, check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)

    # configuration registers
    tester.circuit.tb_0_indices_0 = 0
    tester.circuit.tb_0_indices_1 = 1
    tester.circuit.tb_0_indices_2 = 2

    tester.circuit.tb_0_range_outer = 5
    tester.circuit.tb_0_range_inner = 3
    tester.circuit.tb_0_stride = 2
    tester.circuit.tb_0_dimensionality = 2
    tester.circuit.tb_0_tb_height = 1
    tester.circuit.tb_0_starting_addr = 0

    tester.circuit.clk = 0
    tester.circuit.rst_n = 1
    tester.step(2)
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.tba_ren = 1
    tester.circuit.rst_n = 1

    rand.seed(0)

    num_iters = 100
    for i in range(num_iters):

        data = []
        for j in range(fetch_width):
            data.append(rand.randint(0, 2**word_width - 1))

        for j in range(fetch_width):
            setattr(tester.circuit, f"SRAM_to_tb_data_{j}", data[j])

        valid_data = rand.randint(0, 1)
        tester.circuit.valid_data = valid_data

        mem_valid_data = rand.randint(0, 1)
        tester.circuit.mem_valid_data = mem_valid_data

        tb_index_for_data = 0
        tester.circuit.tb_index_for_data = tb_index_for_data

        ack_in = valid_data
        tester.circuit.ack_in = ack_in

        model_data, model_valid = \
            model_tba.tba_main(data, valid_data, ack_in, tb_index_for_data, 1, mem_valid_data)

        tester.eval()
        tester.circuit.tb_to_interconnect_valid.expect(model_valid)
        if model_valid:
            tester.circuit.tb_to_interconnect_data.expect(model_data[0])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#13
0
def test_output_addr_basic(banks,
                           interconnect_output_ports,
                           enable_chain_output,
                           mem_depth=512,
                           num_tiles=1,
                           data_width=16,
                           fetch_width=32,
                           iterator_support=4,
                           address_width=16,
                           config_width=16,
                           chain_idx_output=0):

    fw_int = int(fetch_width / data_width)

    # Set up model..
    model_oac = OutputAddrCtrlModel(
        interconnect_output_ports=interconnect_output_ports,
        mem_depth=mem_depth,
        banks=banks,
        num_tiles=num_tiles,
        iterator_support=iterator_support,
        address_width=address_width,
        data_width=data_width,
        fetch_width=fetch_width,
        chain_idx_output=chain_idx_output)

    new_config = {}
    new_config['address_gen_0_starting_addr'] = 0
    new_config['address_gen_0_dimensionality'] = 3
    new_config['address_gen_0_strides_0'] = 1
    new_config['address_gen_0_strides_1'] = 3
    new_config['address_gen_0_strides_2'] = 9
    new_config['address_gen_0_ranges_0'] = 3
    new_config['address_gen_0_ranges_1'] = 3
    new_config['address_gen_0_ranges_2'] = 3

    new_config['address_gen_1_starting_addr'] = mem_depth
    new_config['address_gen_1_dimensionality'] = 3
    new_config['address_gen_1_strides_0'] = 1
    new_config['address_gen_1_strides_1'] = 3
    new_config['address_gen_1_strides_2'] = 9
    new_config['address_gen_1_ranges_0'] = 3
    new_config['address_gen_1_ranges_1'] = 3
    new_config['address_gen_1_ranges_2'] = 3

    model_oac.set_config(new_config=new_config)
    ###

    # Set up dut...
    dut = OutputAddrCtrl(interconnect_output_ports=interconnect_output_ports,
                         mem_depth=mem_depth,
                         num_tiles=num_tiles,
                         banks=banks,
                         iterator_support=iterator_support,
                         address_width=address_width,
                         config_width=config_width)

    lift_config_reg(dut.internal_generator)

    magma_dut = kts.util.to_magma(dut,
                                  flatten_array=True,
                                  check_multiple_driver=False,
                                  check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)
    ###

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    valid_in = []
    for i in range(interconnect_output_ports):
        valid_in.append(0)

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.step(2)
    # Seed for posterity
    rand.seed(0)

    for i in range(1000):
        for j in range(interconnect_output_ports):
            valid_in[j] = rand.randint(0, 1)
        step_in = rand.randint(0, 2**interconnect_output_ports - 1)

        for z in range(interconnect_output_ports):
            tester.circuit.valid_in[z] = valid_in[z]
        tester.circuit.step_in = step_in

        # top level config regs passed down
        tester.circuit.enable_chain_output = enable_chain_output
        tester.circuit.chain_idx_output = chain_idx_output

        (ren, addrs) = model_oac.interact(valid_in, step_in,
                                          enable_chain_output)

        tester.eval()

        if (banks == 1):
            for k in range(interconnect_output_ports):
                tester.circuit.ren[k].expect(ren[0][k])
        else:
            for j in range(banks):
                for k in range(interconnect_output_ports):
                    getattr(tester.circuit, f"ren_{j}")[k].expect(ren[j][k])

        if (interconnect_output_ports == 1):
            tester.circuit.addr_out.expect(addrs[0])
        else:
            for j in range(interconnect_output_ports):
                getattr(tester.circuit, f"addr_out_{z}").expect(addrs[z])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#14
0
def test_rw_arbiter_basic(int_out_ports,
                          fetch_width,
                          read_delay,
                          data_width=16,
                          memory_depth=256,
                          int_in_ports=1,
                          strg_wr_ports=1,
                          strg_rd_ports=1,
                          rw_same_cycle=False,
                          separate_addresses=False):

    fw_int = int(fetch_width / data_width)

    # Set up model..
    model_rwa = RWArbiterModel(fetch_width=fetch_width,
                               data_width=data_width,
                               memory_depth=memory_depth,
                               int_out_ports=int_out_ports,
                               read_delay=read_delay)

    new_config = {}
    model_rwa.set_config(new_config=new_config)
    ###

    # Set up dut...
    dut = RWArbiter(fetch_width=fetch_width,
                    data_width=data_width,
                    memory_depth=memory_depth,
                    int_in_ports=int_in_ports,
                    int_out_ports=int_out_ports,
                    strg_wr_ports=strg_wr_ports,
                    strg_rd_ports=strg_rd_ports,
                    read_delay=read_delay,
                    rw_same_cycle=rw_same_cycle,
                    separate_addresses=separate_addresses)

    lift_config_reg(dut.internal_generator)
    magma_dut = k.util.to_magma(dut, flatten_array=True,
                                check_multiple_driver=False,
                                check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)
    ###

    for key, value in new_config.items():
        setattr(tester.circuit, key, value)

    # initial reset
    tester.circuit.clk = 0
    tester.circuit.rst_n = 0

    if(int_out_ports == 1):
        tester.circuit.rd_addr = 0
    else:
        for i in range(int_out_ports):
            setattr(tester.circuit, f"rd_addr_{i}", 0)
    tester.step(2)
    tester.circuit.rst_n = 1
    tester.step(2)
    # Seed for posterity
    rand.seed(0)

    ren_in = [0 for i in range(int_out_ports)]
    ren_en = [0 for i in range(int_out_ports)]

    w_data = [0 for i in range(fw_int)]
    data_from_mem = [0 for i in range(fw_int)]

    for i in range(100):
        # Gen random data
        wen_in = rand.randint(0, 1)
        wen_en = 1

        for j in range(fw_int):
            w_data[j] = rand.randint(0, 2 ** data_width - 1)
            data_from_mem[j] = rand.randint(0, 2 ** data_width - 1)
        w_addr = rand.randint(0, 2 ** 9 - 1)

        ren_en_base = rand.randint(0, 1)
        for j in range(int_out_ports):
            ren_in[j] = rand.randint(0, 1)
            ren_en[j] = ren_en_base

        rd_addr = []
        for j in range(int_out_ports):
            rd_addr.append(rand.randint(0, 2 ** 9 - 1))

        mem_valid_data = []
        for j in range(strg_rd_ports):
            mem_valid_data.append(rand.randint(0, 1))

        # Apply stimulus to dut
        tester.circuit.wen_in = wen_in
        tester.circuit.wen_en = wen_en
        tester.circuit.w_addr = w_addr

        if fw_int == 1:
            tester.circuit.w_data_0_0 = w_data[0]
            tester.circuit.data_from_mem_0_0 = data_from_mem[0]
        else:
            for j in range(fw_int):
                setattr(tester.circuit, f"w_data_0_{j}", w_data[j])
                setattr(tester.circuit, f"data_from_mem_0_{j}", data_from_mem[j])

        for j in range(int_out_ports):
            tester.circuit.ren_in[j] = ren_in[j]
            tester.circuit.ren_en[j] = ren_en[j]

        if (int_out_ports == 1):
            tester.circuit.rd_addr = rd_addr[0]
        else:
            for j in range(int_out_ports):
                setattr(tester.circuit, f"rd_addr_{j}", rd_addr[j])

        if (strg_rd_ports == 1):
            tester.circuit.mem_valid_data = mem_valid_data[0]
        else:
            for j in range(strg_rd_ports):
                setattr(tester.circuit, f"mem_valid_data_{j}", mem_valid_data[j])

        # Interact w/ model
        (model_od, model_op, model_ov, model_cen_mem,
         model_wen_mem, model_mem_data, model_mem_addr,
         model_ack, model_out_mem_valid_data) = model_rwa.interact(wen_in,
                                                                   wen_en,
                                                                   w_data,
                                                                   w_addr,
                                                                   data_from_mem,
                                                                   ren_in,
                                                                   ren_en,
                                                                   rd_addr,
                                                                   mem_valid_data)

        tester.eval()

        # Check outputs
        tester.circuit.out_valid.expect(model_ov)
        if(model_ov):
            tester.circuit.out_port.expect(model_op)
            if fw_int == 1:
                tester.circuit.out_data_0_0.expect(model_od[0])
            else:
                for j in range(fw_int):
                    getattr(tester.circuit, f"out_data_0_{j}").expect(model_od[j])

        tester.circuit.cen_mem.expect(model_cen_mem)
        tester.circuit.wen_mem.expect(model_wen_mem)

        if fw_int == 1:
            tester.circuit.data_to_mem_0_0.expect(model_mem_data[0])
        else:
            for j in range(fw_int):
                getattr(tester.circuit, f"data_to_mem_0_{j}").expect(model_mem_data[j])

        tester.circuit.addr_to_mem.expect(model_mem_addr)
        tester.circuit.out_ack.expect(model_ack)

        print(mem_valid_data)
        print(model_out_mem_valid_data)
        if strg_rd_ports == 1:
            tester.circuit.out_mem_valid_data.expect(model_out_mem_valid_data[0])
        else:
            for j in range(strg_rd_ports):
                tester.circuit.out_mem_valid_data[j].expect(model_out_mem_valid_data[j])

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])
示例#15
0
    def __init__(self,
                 data_width=16,
                 fetch_width=4,
                 mem_depth=512,
                 config_width=9,
                 input_addr_iterator_support=6,
                 output_addr_iterator_support=6,
                 input_sched_iterator_support=6,
                 output_sched_iterator_support=6):

        super().__init__("lake_top_test")

        # generation parameters

        # inputs
        self._clk = self.clock("clk")
        self._rst_n = self.reset("rst_n")

        self._clk_en = self.input("clk_en", 1)
        self._flush = self.input("flush", 1)

        self._data_in = self.input("data_in", data_width, packed=True)

        # outputs
        self._data_out = self.output("data_out", data_width, packed=True)

        # local variables
        self._write = self.var("write", 1)
        self._read = self.var("read", 1)
        self._write_addr = self.var("write_addr", config_width)
        self._read_addr = self.var("read_addr", config_width)
        self._addr = self.var("addr", clog2(mem_depth))

        self._agg_write = self.var("agg_write", 1)
        self._agg_write_addr = self.var("agg_write_addr", 2)
        self._agg_read_addr = self.var("agg_read_addr", 2)

        self._tb_read = self.var("tb_read", 1)
        self._tb_write_addr = self.var("tb_write_addr", 2)
        self._tb_read_addr = self.var("tb_read_addr", 2)

        self._sram_write_data = self.var("sram_write_data",
                                         data_width,
                                         size=fetch_width,
                                         packed=True)
        self._sram_read_data = self.var("sram_read_data",
                                        data_width,
                                        size=fetch_width,
                                        packed=True)

        #        self._aggw_start_addr = self.input("aggw_start_addr", 2)
        #        self._aggw_start_addr.add_attribute(ConfigRegAttr("agg write start addr"))
        #        self._agg_start_addr = self.input("agg_start_addr", 2)
        #        self._agg_start_addr.add_attribute(ConfigRegAttr("agg read start addr"))

        self._agg_write_index = self.var("agg_write_index", 2, size=4)

        self._agg = self.var("agg",
                             width=data_width,
                             size=fetch_width,
                             packed=True)

        self.add_child(f"agg_write_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._agg_write,
                       addr_out=self._agg_write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"agg_read_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._agg_read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"agg_write_sched_gen",
                       SchedGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._agg_write)

        self._tb = self.var("tb", width=data_width, size=fetch_width)

        self.add_child(f"tb_write_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._tb_write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"tb_read_addr_gen",
                       AddrGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._tb_read,
                       addr_out=self._tb_read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"tb_read_sched_gen",
                       SchedGen(2, 2),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._tb_read)

        # memory module
        self.add_child(f"sram",
                       SRAMStub(data_width, fetch_width, mem_depth),
                       clk=self._clk,
                       wen=self._write,
                       cen=self._write | self._read,
                       addr=self._addr,
                       data_in=self._sram_write_data,
                       data_out=self._sram_read_data)

        # addressor modules
        self.add_child(f"input_addr_gen",
                       AddrGen(input_addr_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._write,
                       addr_out=self._write_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        self.add_child(f"output_addr_gen",
                       AddrGen(output_addr_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       step=self._read,
                       addr_out=self._read_addr,
                       clk_en=self._clk_en,
                       flush=self._flush)

        # scheduler modules
        self.add_child(f"input_sched_gen",
                       SchedGen(input_sched_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._write)

        self.add_child(f"output_sched_gen",
                       SchedGen(output_sched_iterator_support, config_width),
                       clk=self._clk,
                       rst_n=self._rst_n,
                       clk_en=self._clk_en,
                       flush=self._flush,
                       valid_output=self._read)

        lift_config_reg(self.internal_generator)

        self.add_code(self.set_sram_addr)
        self.add_code(self.agg_ctrl)
        self.add_code(self.tb_ctrl)
        self.add_code(self.agg_to_sram)
        self.add_code(self.tb_to_out)
示例#16
0
def test_sram_formal():

    sram_dut = SRAMFormal(
        data_width=16,  # CGRA Params
        mem_width=64,
        mem_depth=512,
        banks=1,
        input_addr_iterator_support=6,
        output_addr_iterator_support=6,
        input_sched_iterator_support=6,
        output_sched_iterator_support=6,
        config_width=16,
        #  output_config_width=16,
        interconnect_input_ports=1,  # Connection to int
        interconnect_output_ports=1,
        mem_input_ports=1,
        mem_output_ports=1,
        read_delay=1,  # Cycle delay in read (SRAM vs Register File)
        rw_same_cycle=False,  # Does the memory allow r+w in same cycle?
        agg_height=4)

    lift_config_reg(sram_dut.internal_generator)

    magma_dut = k.util.to_magma(sram_dut,
                                flatten_array=True,
                                check_flip_flop_always_ff=False)
    tester = fault.Tester(magma_dut, magma_dut.clk)

    in_ranges = [2, 8, 1]
    in_addr_strides = [1, 2, 0]
    in_addr_strt = 0
    in_sched_strides = [4, 8, 0]
    in_sched_strt = 4
    dim = 3

    (write_ranges,
     tform_in_addr) = transform_strides_and_ranges(ranges=in_ranges,
                                                   strides=in_addr_strides,
                                                   dimensionality=dim)
    (write_ranges,
     tform_in_sched) = transform_strides_and_ranges(ranges=in_ranges,
                                                    strides=in_sched_strides,
                                                    dimensionality=dim)

    out_ranges = [2, 8, 1]
    out_addr_strides = [1, 2, 0]
    out_addr_strt = 0
    out_sched_strides = [4, 8, 0]
    out_sched_strt = 6
    dim = 3

    (read_ranges,
     tform_out_addr) = transform_strides_and_ranges(ranges=out_ranges,
                                                    strides=out_addr_strides,
                                                    dimensionality=dim)
    (read_ranges,
     tform_out_sched) = transform_strides_and_ranges(ranges=out_ranges,
                                                     strides=out_sched_strides,
                                                     dimensionality=dim)

    config = {}
    # config["sram_write_loops_ranges_0"] = 485
    # config["sram_write_loops_ranges_1"] = 0
    # config["sram_write_loops_ranges_2"] = 0
    # config["sram_write_loops_ranges_3"] = 0
    # config["sram_write_loops_ranges_4"] = 0
    # config["sram_write_loops_ranges_5"] = 0
    # # config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5
    # # config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 3
    # # config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 1
    # # config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535
    # # config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535
    # # config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535
    # # config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535
    # config["sram_write_addr_gen_strides_0"] = 65534
    # config["sram_write_addr_gen_strides_1"] = 0
    # config["sram_write_addr_gen_strides_2"] = 0
    # config["sram_write_addr_gen_strides_3"] = 0
    # config["sram_write_addr_gen_strides_4"] = 0
    # config["sram_write_addr_gen_strides_5"] = 0
    # config["sram_write_addr_gen_starting_addr"] = 33791
    # config["sram_write_sched_gen_sched_addr_gen_starting_addr"] = 4
    # config["sram_read_addr_gen_strides_0"] = 65535
    # config["sram_read_addr_gen_strides_1"] = 65535
    # config["sram_read_addr_gen_strides_2"] = 65535
    # config["sram_read_addr_gen_strides_3"] = 65535
    # config["sram_read_addr_gen_strides_4"] = 65535
    # config["sram_read_addr_gen_strides_5"] = 65535
    # config["sram_write_loops_dimensionality"] = 0
    # # config["sram_read_loops_dimensionality"] = 0
    # # config["sram_read_loops_ranges_0"] = 0
    # # config["sram_read_loops_ranges_1"] = 65535
    # # config["sram_read_loops_ranges_2"] = 65535
    # # config["sram_read_loops_ranges_3"] = 65535
    # # config["sram_read_loops_ranges_4"] = 65535
    # # config["sram_read_loops_ranges_5"] = 65535
    # config["sram_read_addr_gen_starting_addr"] = 511
    # config["sram_write_sched_gen_sched_addr_gen_strides_0"] = 4
    # config["sram_write_sched_gen_sched_addr_gen_strides_1"] = 0
    # config["sram_write_sched_gen_sched_addr_gen_strides_2"] = 0
    # config["sram_write_sched_gen_sched_addr_gen_strides_3"] = 0
    # config["sram_write_sched_gen_sched_addr_gen_strides_4"] = 0
    # config["sram_write_sched_gen_sched_addr_gen_strides_5"] = 0

    # config["sram_read_loops_dimensionality"] = 0
    # config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 4
    # config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 65535
    # config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535
    # config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535
    # config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535
    # config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535
    # config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5
    # config["sram_read_loops_ranges_0"] = 398
    # config["sram_read_loops_ranges_1"] = 65535
    # config["sram_read_loops_ranges_2"] = 65535
    # config["sram_read_loops_ranges_3"] = 65535
    # config["sram_read_loops_ranges_4"] = 65535
    # config["sram_read_loops_ranges_5"] = 65535

    config["sram_read_sched_gen_sched_addr_gen_starting_addr"] = 5
    config["sram_write_loops_ranges_0"] = 65535
    config["sram_write_loops_ranges_1"] = 65535
    config["sram_write_loops_ranges_2"] = 65535
    config["sram_write_loops_ranges_3"] = 65535
    config["sram_write_loops_ranges_4"] = 65535
    config["sram_write_loops_ranges_5"] = 65535
    config["sram_read_sched_gen_sched_addr_gen_strides_0"] = 4
    config["sram_read_sched_gen_sched_addr_gen_strides_1"] = 65535
    config["sram_read_sched_gen_sched_addr_gen_strides_2"] = 65535
    config["sram_read_sched_gen_sched_addr_gen_strides_3"] = 65535
    config["sram_read_sched_gen_sched_addr_gen_strides_4"] = 65535
    config["sram_read_sched_gen_sched_addr_gen_strides_5"] = 65535
    config["sram_write_addr_gen_strides_0"] = 65535
    config["sram_write_addr_gen_strides_1"] = 65535
    config["sram_write_addr_gen_strides_2"] = 65535
    config["sram_write_addr_gen_strides_3"] = 65535
    config["sram_write_addr_gen_strides_4"] = 65535
    config["sram_write_addr_gen_strides_5"] = 65535
    config["sram_write_addr_gen_starting_addr"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_starting_addr"] = 4
    config["sram_read_addr_gen_strides_0"] = 65535
    config["sram_read_addr_gen_strides_1"] = 65535
    config["sram_read_addr_gen_strides_2"] = 65535
    config["sram_read_addr_gen_strides_3"] = 65535
    config["sram_read_addr_gen_strides_4"] = 65535
    config["sram_read_addr_gen_strides_5"] = 65535
    config["sram_write_loops_dimensionality"] = 0
    config["sram_read_loops_dimensionality"] = 0
    config["sram_read_loops_ranges_0"] = 398
    config["sram_read_loops_ranges_1"] = 65535
    config["sram_read_loops_ranges_2"] = 65535
    config["sram_read_loops_ranges_3"] = 65535
    config["sram_read_loops_ranges_4"] = 65535
    config["sram_read_loops_ranges_5"] = 65535
    config["sram_read_addr_gen_starting_addr"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_strides_0"] = 4
    config["sram_write_sched_gen_sched_addr_gen_strides_1"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_strides_2"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_strides_3"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_strides_4"] = 65535
    config["sram_write_sched_gen_sched_addr_gen_strides_5"] = 65535

    # configuration registers passed through from top level
    for key, value in config.items():
        setattr(tester.circuit, key, value)

    tester.circuit.clk = 0
    tester.circuit.rst_n = 1
    tester.step(2)
    tester.circuit.rst_n = 0
    tester.step(2)
    tester.circuit.rst_n = 1

    rand.seed(0)

    im_size = 40
    num_iters = im_size * im_size
    data_in = 0
    for i in range(num_iters):

        for i in range(4):
            setattr(tester.circuit, f'data_in_{i}', data_in + i)

        tester.eval()

        data_in = data_in + 4

        tester.step(2)

    with tempfile.TemporaryDirectory() as tempdir:
        tester.compile_and_run(target="verilator",
                               directory=tempdir,
                               magma_output="verilog",
                               flags=["-Wno-fatal"])